Rewrite the force calculations to fix memory issues

Move pair potentials and change to traditional Functor
Fix missing summation for energy
2025-09-10 22:47:54 -04:00 · 2025-09-10 22:03:11 -04:00 · 2025-09-10 06:10:36 -04:00
8 changed files with 300 additions and 335 deletions
--- a/kernels/CMakeLists.txt
+++ b/kernels/CMakeLists.txt
@ -1,15 +1,14 @@
 project(${NAME}_cuda_lib CUDA CXX)
 set(HEADER_FILES
-    pair_potentials.cuh
+    potentials/pair_potentials.cuh
    forces.cuh
 )
 set(SOURCE_FILES
    forces.cu
 )
 # The library contains header and source files.
-add_library(${NAME}_cuda_lib STATIC
+add_library(${NAME}_cuda_lib INTERFACE
    ${SOURCE_FILES}
    ${HEADER_FILES}
 )
--- a/kernels/forces.cu
+++ b/kernels/forces.cu
@ -1,36 +0,0 @@
 #include "forces.cuh"
 __global__ void CAC::calc_forces_and_energies(real *xs, real *forces,
                                              real *energies, int n_particles,
                                              real *box_len,
                                              PairPotential &potential) {
  int i = blockIdx.x * blockDim.x + threadIdx.x;
  if (i < n_particles) {
    real xi = xs[3 * i];
    real yi = xs[3 * i + 1];
    real zi = xs[3 * i + 2];
    for (int j = 0; j < n_particles; j++) {
      if (i != j) {
        real xj = xs[3 * j];
        real yj = xs[3 * j + 1];
        real zj = xs[3 * j + 2];
        real dx = xi - xj;
        real dy = yi - yj;
        real dz = zi - zj;
        // Apply periodic boundary conditions
        dx -= box_len[0] * round(dx / box_len[0]);
        dy -= box_len[1] * round(dy / box_len[1]);
        dz -= box_len[2] * round(dz / box_len[2]);
        ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz});
        forces[3 * i] += sol.force.x;
        forces[3 * i + 1] += sol.force.y;
        forces[3 * i + 2] += sol.force.z;
        energies[i] = sol.energy;
      }
    }
  }
 }
--- a/kernels/forces.cuh
+++ b/kernels/forces.cuh
@ -1,19 +1,80 @@
 #ifndef FORCES_CUH
 #define FORCES_CUH
-
+#include "potentials/pair_potentials.cuh"
 #include "pair_potentials.cuh"
 #include "precision.hpp"
 #include <cstdio>
 #include <type_traits>
 #include <variant>
 #include <vector>
 namespace CAC {
-/**
+
- * Calculate forces and energies using CUDA for acceleration
+inline void reset_forces_and_energies(int n_particles, real *forces,
- * This code currently only accepts a single PairPotential object and does an
+                                      real *energies) {
- * n^2 force calculation. Future improvements will:
+  cudaMemset(forces, 0, n_particles * sizeof(real) * 3);
- *   - Allow for neighbor listing
+  cudaMemset(energies, 0, n_particles * sizeof(real));
- *   - Allow for overlaid force calculations
+}
- */
+
 template <typename PotentialType>
 __global__ void calc_forces_and_energies(real *xs, real *forces, real *energies,
-                                         int n_particles, real *box_bd,
+                                         int n_particles, real *box_len,
-                                         PairPotential &potential);
+                                         PotentialType potential) {
  int i = blockIdx.x * blockDim.x + threadIdx.x;
  if (i == 0) {
    printf("n_particles: %d\n", n_particles);
    printf("box_len: %f %f %f\n", box_len[0], box_len[1], box_len[2]);
  }
  if (i < n_particles) {
    printf("Thread %d, Block %d\n", threadIdx.x, blockIdx.x);
    real xi = xs[3 * i];
    real yi = xs[3 * i + 1];
    real zi = xs[3 * i + 2];
    for (int j = 0; j < n_particles; j++) {
      if (i != j) {
        real xj = xs[3 * j];
        real yj = xs[3 * j + 1];
        real zj = xs[3 * j + 2];
        real dx = xi - xj;
        real dy = yi - yj;
        real dz = zi - zj;
        // Apply periodic boundary conditions
        dx -= box_len[0] * round(dx / box_len[0]);
        dy -= box_len[1] * round(dy / box_len[1]);
        dz -= box_len[2] * round(dz / box_len[2]);
        ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz});
        forces[3 * i] += sol.force.x;
        forces[3 * i + 1] += sol.force.y;
        forces[3 * i + 2] += sol.force.z;
        energies[i] += sol.energy;
      }
    }
  }
 }
 inline void launch_force_kernels(real *xs, real *forces, real *energies,
                                 int n_particles, real *box_len,
                                 std::vector<PairPotentials> potentials,
                                 int grid_size, int block_size) {
  reset_forces_and_energies(n_particles, forces, energies);
  for (const auto &potential : potentials) {
    std::visit(
        [&](const auto &potential) {
          using PotentialType = std::decay_t<decltype(potential)>;
          calc_forces_and_energies<PotentialType><<<grid_size, block_size>>>(
              xs, forces, energies, n_particles, box_len, potential);
        },
        potential);
    cudaDeviceSynchronize();
  }
 }
 } // namespace CAC
 #endif
--- a/kernels/pair_potentials.cuh
+++ b/kernels/pair_potentials.cuh
@ -1,91 +0,0 @@
 #ifndef POTENTIALS_CUH
 #define POTENTIALS_CUH
 #include "precision.hpp"
 #include "vec3.h"
 #ifdef __CUDACC__
 #define CUDA_CALLABLE __host__ __device__
 #else
 #define CUDA_CALLABLE
 #endif
 /**
 * Result struct for the Pair Potential
 */
 struct ForceAndEnergy {
  real energy;
  Vec3<real> force;
  CUDA_CALLABLE inline static ForceAndEnergy zero() {
    return {0.0, {0.0, 0.0, 0.0}};
  };
 };
 /**
 * Abstract implementation of a Pair Potential.
 * Pair potentials are potentials which depend solely on the distance
 * between two particles. These do not include multi-body potentials such as
 * EAM
 *
 */
 struct PairPotential {
  real m_rcutoffsq;
 CUDA_CALLABLE  PairPotential(real rcutoff) : m_rcutoffsq(rcutoff * rcutoff) {};
 #ifdef __CUDACC__
  CUDA_CALLABLE ~PairPotential();
 #else
  virtual ~PairPotential() = 0;
 #endif
  /**
   * Calculate the force and energy for a specific atom pair based on a
   * displacement vector r.
   */
  CUDA_CALLABLE virtual ForceAndEnergy calc_force_and_energy(Vec3<real> r) = 0;
 };
 /**
 * Calculate the Lennard-Jones energy and force for the current particle pair
 * described by displacement vector r
 */
 struct LennardJones : PairPotential {
  real m_epsilon;
  real m_sigma;
  CUDA_CALLABLE LennardJones(real sigma, real epsilon, real rcutoff)
      : PairPotential(rcutoff), m_epsilon(epsilon), m_sigma(sigma) {};
  CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3<real> r) {
    real rmagsq = r.squared_norm2();
    if (rmagsq < this->m_rcutoffsq && rmagsq > 0.0) {
      real inv_rmag = 1 / std::sqrt(rmagsq);
      // Pre-Compute the terms (doing this saves on multiple devisions/pow
      // function call)
      real sigma_r = m_sigma * inv_rmag;
      real sigma_r6 = sigma_r * sigma_r * sigma_r * sigma_r * sigma_r * sigma_r;
      real sigma_r12 = sigma_r6 * sigma_r6;
      // Get the energy
      real energy = 4.0 * m_epsilon * (sigma_r12 - sigma_r6);
      // Get the force vector
      real force_mag =
          4.0 * m_epsilon *
          (12.0 * sigma_r12 * inv_rmag - 6.0 * sigma_r6 * inv_rmag);
      Vec3<real> force = r.scale(force_mag * inv_rmag);
      return {energy, force};
    } else {
      return ForceAndEnergy::zero();
    }
  };
  CUDA_CALLABLE inline ~LennardJones(){};
 };
 inline PairPotential::~PairPotential() {};
 #endif
--- a/kernels/potentials/pair_potentials.cuh
+++ b/kernels/potentials/pair_potentials.cuh
@ -0,0 +1,118 @@
 #ifndef POTENTIALS_CUH
 #define POTENTIALS_CUH
 #include "precision.hpp"
 #include "vec3.h"
 #include <cmath>
 #include <cstdio>
 #include <variant>
 #ifdef __CUDACC__
 #define CUDA_CALLABLE __host__ __device__
 #else
 #define CUDA_CALLABLE
 #endif
 /**
 * Result struct for the Pair Potential
 */
 struct ForceAndEnergy {
  real energy;
  Vec3<real> force;
  CUDA_CALLABLE inline static ForceAndEnergy zero() {
    return {0.0, {0.0, 0.0, 0.0}};
  };
 };
 /**
 * Calculate the Lennard-Jones energy and force for the current particle
 * pair described by displacement vector r
 */
 struct LennardJones {
  real m_sigma;
  real m_epsilon;
  real m_rcutoffsq;
  CUDA_CALLABLE LennardJones(real sigma, real epsilon, real rcutoff) {
    m_sigma = sigma;
    m_epsilon = epsilon;
    m_rcutoffsq = rcutoff * rcutoff;
  };
  CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3<real> r) {
    real rmagsq = r.squared_norm2();
    if (rmagsq < m_rcutoffsq && rmagsq > 0.0) {
      real inv_rmag = 1 / sqrt(rmagsq);
      // Pre-Compute the terms (doing this saves on multiple devisions/pow
      // function call)
      real sigma_r = m_sigma * inv_rmag;
      real sigma_r6 = sigma_r * sigma_r * sigma_r * sigma_r * sigma_r * sigma_r;
      real sigma_r12 = sigma_r6 * sigma_r6;
      // Get the energy
      real energy = 4.0 * m_epsilon * (sigma_r12 - sigma_r6);
      // Get the force vector
      real force_mag =
          4.0 * m_epsilon *
          (12.0 * sigma_r12 * inv_rmag - 6.0 * sigma_r6 * inv_rmag);
      Vec3<real> force = r.scale(force_mag * inv_rmag);
      return {energy, force};
    } else {
      return ForceAndEnergy::zero();
    }
  };
 };
 /**
 * Calculate the Morse potential energy and force for the current particle pair
 * described by displacement vector r
 */
 struct Morse {
  real m_D;         // Depth of the potential well
  real m_a;         // Width of the potential
  real m_r0;        // Equilibrium bond distance
  real m_rcutoffsq; // Cutoff distance squared
  CUDA_CALLABLE Morse(real D, real a, real r0, real rcutoff) {
    m_D = D;
    m_a = a;
    m_r0 = r0;
    m_rcutoffsq = rcutoff * rcutoff;
  };
  CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3<real> r) {
    real rmagsq = r.squared_norm2();
    if (rmagsq < m_rcutoffsq && rmagsq > 0.0) {
      real rmag = sqrt(rmagsq);
      real dr = rmag - m_r0;
      // Compute exponentials
      real exp_a_dr = exp(-m_a * dr);
      real exp_2a_dr = exp_a_dr * exp_a_dr;
      // Energy: V(r) = D * (exp(-2a(r - r0)) - 2*exp(-a(r - r0)))
      real energy = m_D * (exp_2a_dr - 2.0 * exp_a_dr);
      // Force magnitude: F(r) = 2aD * (exp(-2a(r - r0)) - exp(-a(r - r0)))
      real force_mag = 2.0 * m_a * m_D * (exp_2a_dr - exp_a_dr);
      // Direction: normalized vector
      Vec3<real> force = r.scale(force_mag / rmag);
      return {energy, force};
    } else {
      return ForceAndEnergy::zero();
    }
  };
 };
 // Variant type for storing pair potential types
 using PairPotentials = std::variant<LennardJones, Morse>;
 #endif
--- a/tests/cuda_unit_tests/test_forces.cu
+++ b/tests/cuda_unit_tests/test_forces.cu
@ -5,11 +5,14 @@
 // Include your header files
 #include "forces.cuh"
-#include "pair_potentials.cuh"
+#include "potentials/pair_potentials.cuh"
 #include "precision.hpp"
-class CudaKernelTest : public ::testing::Test {
+class CudaForceKernelTest : public ::testing::Test {
 protected:
  const int GRID_SIZE = 1;
  const int BLOCK_SIZE = 4;
  void SetUp() override {
    // Set up CUDA device
    cudaError_t err = cudaSetDevice(0);
@ -50,53 +53,52 @@ protected:
    checkCudaError(cudaFree(device_ptr), "cudaFree");
    return host_data;
  }
  // Helper function to run the force calculation kernel
  std::pair<std::vector<real>, std::vector<real>>
  run_force_calculation(int n_particles, const std::vector<real> &positions,
                        const std::vector<real> &box_dimensions) {
    std::vector<real> forces(3 * n_particles, 0.0);
    std::vector<real> energies(n_particles, 0.0);
    real *d_positions = allocateAndCopyToGPU(positions);
    real *d_forces = allocateAndCopyToGPU(forces);
    real *d_energies = allocateAndCopyToGPU(energies);
    real *d_box_len = allocateAndCopyToGPU(box_dimensions);
    std::vector<PairPotentials> potentials = {LennardJones(1.0, 1.0, 3.0)};
    CAC::launch_force_kernels(d_positions, d_forces, d_energies, n_particles,
                              d_box_len, potentials, GRID_SIZE, BLOCK_SIZE);
    checkCudaError(cudaGetLastError(), "kernel launch");
    checkCudaError(cudaDeviceSynchronize(), "kernel execution");
    std::vector<real> result_forces =
        copyFromGPUAndFree(d_forces, 3 * n_particles);
    std::vector<real> result_energies =
        copyFromGPUAndFree(d_energies, n_particles);
    checkCudaError(cudaFree(d_positions), "cudaFree positions");
    checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
    return {result_forces, result_energies};
  }
 };
-TEST_F(CudaKernelTest, BasicFunctionalityTest) {
+TEST_F(CudaForceKernelTest, BasicFunctionalityTest) {
-  const int n_particles = 4;
+  const int n_particles = 2;
  const real tolerance = 1e-5;
  // Set up test data - simple 2x2 grid of particles
  std::vector<real> positions = {
      0.0, 0.0, 0.0, // particle 0
      1.0, 0.0, 0.0, // particle 1
      0.0, 1.0, 0.0, // particle 2
      1.0, 1.0, 0.0  // particle 3
  };
-  std::vector<real> forces(3 * n_particles, 0.0);
+  std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
  std::vector<real> energies(n_particles, 0.0);
  std::vector<real> box_dimensions = {10.0, 10.0,
                                      10.0}; // Large box to avoid PBC effects
-  // Allocate GPU memory and copy data
+  auto [result_forces, result_energies] =
-  real *d_positions = allocateAndCopyToGPU(positions);
+      run_force_calculation(n_particles, positions, box_dimensions);
  real *d_forces = allocateAndCopyToGPU(forces);
  real *d_energies = allocateAndCopyToGPU(energies);
  real *d_box_len = allocateAndCopyToGPU(box_dimensions);
  // Create Lennard-Jones potential (sigma=1.0, epsilon=1.0, rcutoff=3.0)
  LennardJones potential(1.0, 1.0, 3.0);
  // Launch kernel
  dim3 blockSize(256);
  dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
  CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
      d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
  checkCudaError(cudaGetLastError(), "kernel launch");
  checkCudaError(cudaDeviceSynchronize(), "kernel execution");
  // Copy results back to host
  std::vector<real> result_forces =
      copyFromGPUAndFree(d_forces, 3 * n_particles);
  std::vector<real> result_energies =
      copyFromGPUAndFree(d_energies, n_particles);
  // Clean up remaining GPU memory
  checkCudaError(cudaFree(d_positions), "cudaFree positions");
  checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
  // Verify results - forces should be non-zero and energies should be
  // calculated
@ -117,161 +119,72 @@ TEST_F(CudaKernelTest, BasicFunctionalityTest) {
    }
  }
-  EXPECT_FALSE(has_nonzero_force)
+  EXPECT_TRUE(has_nonzero_force)
      << "Expected non-zero forces between particles";
-  EXPECT_TRUE(has_nonzero_energy) << "Expected non-zero energies for particles";
+  EXPECT_TRUE(has_nonzero_energy)
      << "Expected non-zero energies for particles ";
 }
 //
 // TEST_F(CudaKernelTest, PeriodicBoundaryConditionsTest) {
 //   const int n_particles = 2;
 //   const real tolerance = 1e-5;
 //
 //   // Place particles near opposite edges of a small box
 //   std::vector<real> positions = {
 //       0.1, 0.0, 0.0, // particle 0 near left edge
 //       4.9, 0.0, 0.0  // particle 1 near right edge
 //   };
 //   std::vector<real> box_dimensions = {5.0, 5.0, 5.0}; // Small box to test
 //   PBC
 //
 //   auto [result_forces, result_energies] =
 //       run_force_calculation(n_particles, &positions, &box_dimensions);
 //
 //   // With PBC, particles should interact as if they're close (distance ~0.2)
 //   // rather than far apart (distance ~4.8)
 //   EXPECT_GT(std::abs(result_forces[0]), tolerance)
 //       << "Expected significant force due to PBC";
 //   EXPECT_GT(std::abs(result_energies[0]), tolerance)
 //       << "Expected significant energy due to PBC";
 // }
-TEST_F(CudaKernelTest, PeriodicBoundaryConditionsTest) {
+// TEST_F(CudaForceKernelTest, SingleParticleTest) {
-  const int n_particles = 2;
+//   const int n_particles = 1;
-  const real tolerance = 1e-5;
+//
 //   std::vector<real> positions = {0.0, 0.0, 0.0};
 //   std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
 //
 //   auto [result_forces, result_energies] =
 //       run_force_calculation(n_particles, positions, box_dimensions);
 //   // Single particle should have zero force and energy
 //   EXPECT_NEAR(result_forces[0], 0.0, 1e-10);
 //   EXPECT_NEAR(result_forces[1], 0.0, 1e-10);
 //   EXPECT_NEAR(result_forces[2], 0.0, 1e-10);
 //   EXPECT_NEAR(result_energies[0], 0.0, 1e-10);
 // }
-  // Place particles near opposite edges of a small box
+// TEST_F(CudaKernelTest, ForceSymmetryTest) {
-  std::vector<real> positions = {
+//   const int n_particles = 2;
-      0.1, 0.0, 0.0, // particle 0 near left edge
+//   const real tolerance = 1e-5;
-      4.9, 0.0, 0.0  // particle 1 near right edge
+//
-  };
+//   std::vector<real> positions = {
-
+//       0.0, 0.0, 0.0, // particle 0
-  std::vector<real> forces(3 * n_particles, 0.0);
+//       1.5, 0.0, 0.0  // particle 1
-  std::vector<real> energies(n_particles, 0.0);
+//   };
-  std::vector<real> box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC
+//   std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
-
+//
-  // Allocate GPU memory and copy data
+//   auto [result_forces, result_energies] =
-  real *d_positions = allocateAndCopyToGPU(positions);
+//       run_force_calculation(n_particles, &positions, &box_dimensions);
-  real *d_forces = allocateAndCopyToGPU(forces);
+//
-  real *d_energies = allocateAndCopyToGPU(energies);
+//   // Newton's third law: forces should be equal and opposite
-  real *d_box_len = allocateAndCopyToGPU(box_dimensions);
+//   EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance)
-
+//       << "Force x-components should be opposite";
-  // Create Lennard-Jones potential with large cutoff to ensure interaction
+//   EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance)
-  LennardJones potential(1.0, 1.0, 3.0);
+//       << "Force y-components should be opposite";
-
+//   EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance)
-  // Launch kernel
+//       << "Force z-components should be opposite";
-  dim3 blockSize(256);
+//
-  dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
+//   // Energies should be equal for symmetric particles
-
+//   EXPECT_NEAR(result_energies[0], result_energies[1], tolerance)
-  CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
+//       << "Energies should be equal";
-      d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
+// }
  checkCudaError(cudaGetLastError(), "kernel launch");
  checkCudaError(cudaDeviceSynchronize(), "kernel execution");
  // Copy results back to host
  std::vector<real> result_forces =
      copyFromGPUAndFree(d_forces, 3 * n_particles);
  std::vector<real> result_energies =
      copyFromGPUAndFree(d_energies, n_particles);
  checkCudaError(cudaFree(d_positions), "cudaFree positions");
  checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
  // With PBC, particles should interact as if they're close (distance ~0.2)
  // rather than far apart (distance ~4.8)
  EXPECT_GT(std::abs(result_forces[0]), tolerance)
      << "Expected significant force due to PBC";
  EXPECT_GT(std::abs(result_energies[0]), tolerance)
      << "Expected significant energy due to PBC";
 }
 TEST_F(CudaKernelTest, SingleParticleTest) {
  const int n_particles = 1;
  std::vector<real> positions = {0.0, 0.0, 0.0};
  std::vector<real> forces(3 * n_particles, 0.0);
  std::vector<real> energies(n_particles, 0.0);
  std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
  real *d_positions = allocateAndCopyToGPU(positions);
  real *d_forces = allocateAndCopyToGPU(forces);
  real *d_energies = allocateAndCopyToGPU(energies);
  real *d_box_len = allocateAndCopyToGPU(box_dimensions);
  LennardJones potential(1.0, 1.0, 3.0);
  dim3 blockSize(256);
  dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
  CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
      d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
  checkCudaError(cudaGetLastError(), "kernel launch");
  checkCudaError(cudaDeviceSynchronize(), "kernel execution");
  std::vector<real> result_forces =
      copyFromGPUAndFree(d_forces, 3 * n_particles);
  std::vector<real> result_energies =
      copyFromGPUAndFree(d_energies, n_particles);
  checkCudaError(cudaFree(d_positions), "cudaFree positions");
  checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
  // Single particle should have zero force and energy
  EXPECT_NEAR(result_forces[0], 0.0, 1e-10);
  EXPECT_NEAR(result_forces[1], 0.0, 1e-10);
  EXPECT_NEAR(result_forces[2], 0.0, 1e-10);
  EXPECT_NEAR(result_energies[0], 0.0, 1e-10);
 }
 TEST_F(CudaKernelTest, ForceSymmetryTest) {
  const int n_particles = 2;
  const real tolerance = 1e-5;
  std::vector<real> positions = {
      0.0, 0.0, 0.0, // particle 0
      1.5, 0.0, 0.0  // particle 1
  };
  std::vector<real> forces(3 * n_particles, 0.0);
  std::vector<real> energies(n_particles, 0.0);
  std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
  real *d_positions = allocateAndCopyToGPU(positions);
  real *d_forces = allocateAndCopyToGPU(forces);
  real *d_energies = allocateAndCopyToGPU(energies);
  real *d_box_len = allocateAndCopyToGPU(box_dimensions);
  LennardJones potential(1.0, 1.0, 3.0);
  dim3 blockSize(256);
  dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
  CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
      d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
  checkCudaError(cudaGetLastError(), "kernel launch");
  checkCudaError(cudaDeviceSynchronize(), "kernel execution");
  std::vector<real> result_forces =
      copyFromGPUAndFree(d_forces, 3 * n_particles);
  std::vector<real> result_energies =
      copyFromGPUAndFree(d_energies, n_particles);
  checkCudaError(cudaFree(d_positions), "cudaFree positions");
  checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
  // Newton's third law: forces should be equal and opposite
  EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance)
      << "Force x-components should be opposite";
  EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance)
      << "Force y-components should be opposite";
  EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance)
      << "Force z-components should be opposite";
  // Energies should be equal for symmetric particles
  EXPECT_NEAR(result_energies[0], result_energies[1], tolerance)
      << "Energies should be equal";
 }
 // Main function to run tests
 int main(int argc, char **argv) {
  ::testing::InitGoogleTest(&argc, argv);
  // Check if CUDA is available
  int deviceCount;
  cudaError_t err = cudaGetDeviceCount(&deviceCount);
  if (err != cudaSuccess || deviceCount == 0) {
    std::cout << "No CUDA devices available. Skipping CUDA tests." << std::endl;
    return 0;
  }
  return RUN_ALL_TESTS();
 }
--- a/tests/cuda_unit_tests/test_potential.cu
+++ b/tests/cuda_unit_tests/test_potential.cu
@ -1,4 +1,4 @@
-#include "pair_potentials.cuh"
+#include "potentials/pair_potentials.cuh"
 #include "precision.hpp"
 #include "gtest/gtest.h"
 #include <cmath>
@ -69,6 +69,7 @@ __global__ void lennard_jones_test_kernel(TestResults *results) {
    auto result = lj.calc_force_and_energy(r);
    results->energy_values[2] = result.energy;
    results->force_values[2] = result.force;
    results->at_minimum_pass =
        (fabs(result.energy + epsilon) < tolerance) &&
        vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result.force, tolerance);
--- a/tests/unit_tests/test_potential.cpp
+++ b/tests/unit_tests/test_potential.cpp
@ -1,4 +1,4 @@
-#include "pair_potentials.cuh"
+#include "potentials/pair_potentials.cuh"
 #include "precision.hpp"
 #include "gtest/gtest.h"
 #include <cmath>
Author	SHA1	Message	Date
Alex Selimov	ac44ceaab1	Rewrite the force calculations to fix memory issues	2025-09-10 22:47:54 -04:00
Alex Selimov	2d948a7e76	Move pair potentials and change to traditional Functor	2025-09-10 22:03:11 -04:00
Alex Selimov	a638c4f388	Fix missing summation for energy	2025-09-10 06:10:36 -04:00