Rewrite the force calculations to fix memory issues

2025-09-10 22:47:54 -04:00 · 2025-09-10 22:47:54 -04:00 · ac44ceaab1
commit ac44ceaab1
parent 2d948a7e76
4 changed files with 79 additions and 64 deletions
--- a/kernels/CMakeLists.txt
+++ b/kernels/CMakeLists.txt
@ -5,11 +5,10 @@ set(HEADER_FILES
    forces.cuh
 )
 set(SOURCE_FILES
    forces.cu
 )
 # The library contains header and source files.
-add_library(${NAME}_cuda_lib STATIC
+add_library(${NAME}_cuda_lib INTERFACE
    ${SOURCE_FILES}
    ${HEADER_FILES}
 )
--- a/kernels/forces.cu
+++ b/kernels/forces.cu
@ -1,36 +0,0 @@
 #include "forces.cuh"
 __global__ void CAC::calc_forces_and_energies(real *xs, real *forces,
                                              real *energies, int n_particles,
                                              real *box_len,
                                              PairPotential &potential) {
  int i = blockIdx.x * blockDim.x + threadIdx.x;
  if (i < n_particles) {
    real xi = xs[3 * i];
    real yi = xs[3 * i + 1];
    real zi = xs[3 * i + 2];
    for (int j = 0; j < n_particles; j++) {
      if (i != j) {
        real xj = xs[3 * j];
        real yj = xs[3 * j + 1];
        real zj = xs[3 * j + 2];
        real dx = xi - xj;
        real dy = yi - yj;
        real dz = zi - zj;
        // Apply periodic boundary conditions
        dx -= box_len[0] * round(dx / box_len[0]);
        dy -= box_len[1] * round(dy / box_len[1]);
        dz -= box_len[2] * round(dz / box_len[2]);
        ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz});
        forces[3 * i] += sol.force.x;
        forces[3 * i + 1] += sol.force.y;
        forces[3 * i + 2] += sol.force.z;
        energies[i] += sol.energy;
      }
    }
  }
 }
--- a/kernels/forces.cuh
+++ b/kernels/forces.cuh
@ -1,19 +1,80 @@
 #ifndef FORCES_CUH
 #define FORCES_CUH
-
+#include "potentials/pair_potentials.cuh"
 #include "pair_potentials.cuh"
 #include "precision.hpp"
 #include <cstdio>
 #include <type_traits>
 #include <variant>
 #include <vector>
 namespace CAC {
-/**
+
- * Calculate forces and energies using CUDA for acceleration
+inline void reset_forces_and_energies(int n_particles, real *forces,
- * This code currently only accepts a single PairPotential object and does an
+                                      real *energies) {
- * n^2 force calculation. Future improvements will:
+  cudaMemset(forces, 0, n_particles * sizeof(real) * 3);
- *   - Allow for neighbor listing
+  cudaMemset(energies, 0, n_particles * sizeof(real));
- *   - Allow for overlaid force calculations
+}
- */
+
 template <typename PotentialType>
 __global__ void calc_forces_and_energies(real *xs, real *forces, real *energies,
-                                         int n_particles, real *box_bd,
+                                         int n_particles, real *box_len,
-                                         PairPotential &potential);
+                                         PotentialType potential) {
  int i = blockIdx.x * blockDim.x + threadIdx.x;
  if (i == 0) {
    printf("n_particles: %d\n", n_particles);
    printf("box_len: %f %f %f\n", box_len[0], box_len[1], box_len[2]);
  }
  if (i < n_particles) {
    printf("Thread %d, Block %d\n", threadIdx.x, blockIdx.x);
    real xi = xs[3 * i];
    real yi = xs[3 * i + 1];
    real zi = xs[3 * i + 2];
    for (int j = 0; j < n_particles; j++) {
      if (i != j) {
        real xj = xs[3 * j];
        real yj = xs[3 * j + 1];
        real zj = xs[3 * j + 2];
        real dx = xi - xj;
        real dy = yi - yj;
        real dz = zi - zj;
        // Apply periodic boundary conditions
        dx -= box_len[0] * round(dx / box_len[0]);
        dy -= box_len[1] * round(dy / box_len[1]);
        dz -= box_len[2] * round(dz / box_len[2]);
        ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz});
        forces[3 * i] += sol.force.x;
        forces[3 * i + 1] += sol.force.y;
        forces[3 * i + 2] += sol.force.z;
        energies[i] += sol.energy;
      }
    }
  }
 }
 inline void launch_force_kernels(real *xs, real *forces, real *energies,
                                 int n_particles, real *box_len,
                                 std::vector<PairPotentials> potentials,
                                 int grid_size, int block_size) {
  reset_forces_and_energies(n_particles, forces, energies);
  for (const auto &potential : potentials) {
    std::visit(
        [&](const auto &potential) {
          using PotentialType = std::decay_t<decltype(potential)>;
          calc_forces_and_energies<PotentialType><<<grid_size, block_size>>>(
              xs, forces, energies, n_particles, box_len, potential);
        },
        potential);
    cudaDeviceSynchronize();
  }
 }
 } // namespace CAC
 #endif
--- a/tests/cuda_unit_tests/test_forces.cu
+++ b/tests/cuda_unit_tests/test_forces.cu
@ -5,13 +5,13 @@
 // Include your header files
 #include "forces.cuh"
-#include "pair_potentials.cuh"
+#include "potentials/pair_potentials.cuh"
 #include "precision.hpp"
 class CudaForceKernelTest : public ::testing::Test {
 protected:
-  const int BLOCK_SIZE = 1;
+  const int GRID_SIZE = 1;
-  const int THREADS_PER_BLOCK = 4;
+  const int BLOCK_SIZE = 4;
  void SetUp() override {
    // Set up CUDA device
@ -66,17 +66,9 @@ protected:
    real *d_energies = allocateAndCopyToGPU(energies);
    real *d_box_len = allocateAndCopyToGPU(box_dimensions);
-    // Allocate potential on the GPU
+    std::vector<PairPotentials> potentials = {LennardJones(1.0, 1.0, 3.0)};
-    LennardJones h_potential(1.0, 1.0, 3.0);
+    CAC::launch_force_kernels(d_positions, d_forces, d_energies, n_particles,
-    LennardJones *d_potential;
+                              d_box_len, potentials, GRID_SIZE, BLOCK_SIZE);
    checkCudaError(cudaMalloc(&d_potential, sizeof(LennardJones)),
                   "cudaMalloc potential");
    checkCudaError(cudaMemcpy(d_potential, &h_potential, sizeof(LennardJones),
                              cudaMemcpyHostToDevice),
                   "cudaMemcpy H2D potential");
    CAC::calc_forces_and_energies<<<BLOCK_SIZE, THREADS_PER_BLOCK>>>(
        d_positions, d_forces, d_energies, n_particles, d_box_len, d_potential);
    checkCudaError(cudaGetLastError(), "kernel launch");
    checkCudaError(cudaDeviceSynchronize(), "kernel execution");
@ -88,7 +80,6 @@ protected:
    checkCudaError(cudaFree(d_positions), "cudaFree positions");
    checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
    checkCudaError(cudaFree(d_potential), "cudaFree potential");
    return {result_forces, result_energies};
  }