Rewrite the force calculations to fix memory issues

This commit is contained in:
Alex Selimov 2025-09-10 22:47:54 -04:00
parent 2d948a7e76
commit ac44ceaab1
Signed by: aselimov
GPG key ID: 3DDB9C3E023F1F31
4 changed files with 79 additions and 64 deletions

View file

@ -5,11 +5,10 @@ set(HEADER_FILES
forces.cuh forces.cuh
) )
set(SOURCE_FILES set(SOURCE_FILES
forces.cu
) )
# The library contains header and source files. # The library contains header and source files.
add_library(${NAME}_cuda_lib STATIC add_library(${NAME}_cuda_lib INTERFACE
${SOURCE_FILES} ${SOURCE_FILES}
${HEADER_FILES} ${HEADER_FILES}
) )

View file

@ -1,36 +0,0 @@
#include "forces.cuh"
__global__ void CAC::calc_forces_and_energies(real *xs, real *forces,
real *energies, int n_particles,
real *box_len,
PairPotential &potential) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n_particles) {
real xi = xs[3 * i];
real yi = xs[3 * i + 1];
real zi = xs[3 * i + 2];
for (int j = 0; j < n_particles; j++) {
if (i != j) {
real xj = xs[3 * j];
real yj = xs[3 * j + 1];
real zj = xs[3 * j + 2];
real dx = xi - xj;
real dy = yi - yj;
real dz = zi - zj;
// Apply periodic boundary conditions
dx -= box_len[0] * round(dx / box_len[0]);
dy -= box_len[1] * round(dy / box_len[1]);
dz -= box_len[2] * round(dz / box_len[2]);
ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz});
forces[3 * i] += sol.force.x;
forces[3 * i + 1] += sol.force.y;
forces[3 * i + 2] += sol.force.z;
energies[i] += sol.energy;
}
}
}
}

View file

@ -1,19 +1,80 @@
#ifndef FORCES_CUH #ifndef FORCES_CUH
#define FORCES_CUH #define FORCES_CUH
#include "potentials/pair_potentials.cuh"
#include "pair_potentials.cuh"
#include "precision.hpp" #include "precision.hpp"
#include <cstdio>
#include <type_traits>
#include <variant>
#include <vector>
namespace CAC { namespace CAC {
/**
* Calculate forces and energies using CUDA for acceleration inline void reset_forces_and_energies(int n_particles, real *forces,
* This code currently only accepts a single PairPotential object and does an real *energies) {
* n^2 force calculation. Future improvements will: cudaMemset(forces, 0, n_particles * sizeof(real) * 3);
* - Allow for neighbor listing cudaMemset(energies, 0, n_particles * sizeof(real));
* - Allow for overlaid force calculations }
*/
template <typename PotentialType>
__global__ void calc_forces_and_energies(real *xs, real *forces, real *energies, __global__ void calc_forces_and_energies(real *xs, real *forces, real *energies,
int n_particles, real *box_bd, int n_particles, real *box_len,
PairPotential &potential); PotentialType potential) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i == 0) {
printf("n_particles: %d\n", n_particles);
printf("box_len: %f %f %f\n", box_len[0], box_len[1], box_len[2]);
}
if (i < n_particles) {
printf("Thread %d, Block %d\n", threadIdx.x, blockIdx.x);
real xi = xs[3 * i];
real yi = xs[3 * i + 1];
real zi = xs[3 * i + 2];
for (int j = 0; j < n_particles; j++) {
if (i != j) {
real xj = xs[3 * j];
real yj = xs[3 * j + 1];
real zj = xs[3 * j + 2];
real dx = xi - xj;
real dy = yi - yj;
real dz = zi - zj;
// Apply periodic boundary conditions
dx -= box_len[0] * round(dx / box_len[0]);
dy -= box_len[1] * round(dy / box_len[1]);
dz -= box_len[2] * round(dz / box_len[2]);
ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz});
forces[3 * i] += sol.force.x;
forces[3 * i + 1] += sol.force.y;
forces[3 * i + 2] += sol.force.z;
energies[i] += sol.energy;
}
}
}
}
inline void launch_force_kernels(real *xs, real *forces, real *energies,
int n_particles, real *box_len,
std::vector<PairPotentials> potentials,
int grid_size, int block_size) {
reset_forces_and_energies(n_particles, forces, energies);
for (const auto &potential : potentials) {
std::visit(
[&](const auto &potential) {
using PotentialType = std::decay_t<decltype(potential)>;
calc_forces_and_energies<PotentialType><<<grid_size, block_size>>>(
xs, forces, energies, n_particles, box_len, potential);
},
potential);
cudaDeviceSynchronize();
}
}
} // namespace CAC } // namespace CAC
#endif #endif

View file

@ -5,13 +5,13 @@
// Include your header files // Include your header files
#include "forces.cuh" #include "forces.cuh"
#include "pair_potentials.cuh" #include "potentials/pair_potentials.cuh"
#include "precision.hpp" #include "precision.hpp"
class CudaForceKernelTest : public ::testing::Test { class CudaForceKernelTest : public ::testing::Test {
protected: protected:
const int BLOCK_SIZE = 1; const int GRID_SIZE = 1;
const int THREADS_PER_BLOCK = 4; const int BLOCK_SIZE = 4;
void SetUp() override { void SetUp() override {
// Set up CUDA device // Set up CUDA device
@ -66,17 +66,9 @@ protected:
real *d_energies = allocateAndCopyToGPU(energies); real *d_energies = allocateAndCopyToGPU(energies);
real *d_box_len = allocateAndCopyToGPU(box_dimensions); real *d_box_len = allocateAndCopyToGPU(box_dimensions);
// Allocate potential on the GPU std::vector<PairPotentials> potentials = {LennardJones(1.0, 1.0, 3.0)};
LennardJones h_potential(1.0, 1.0, 3.0); CAC::launch_force_kernels(d_positions, d_forces, d_energies, n_particles,
LennardJones *d_potential; d_box_len, potentials, GRID_SIZE, BLOCK_SIZE);
checkCudaError(cudaMalloc(&d_potential, sizeof(LennardJones)),
"cudaMalloc potential");
checkCudaError(cudaMemcpy(d_potential, &h_potential, sizeof(LennardJones),
cudaMemcpyHostToDevice),
"cudaMemcpy H2D potential");
CAC::calc_forces_and_energies<<<BLOCK_SIZE, THREADS_PER_BLOCK>>>(
d_positions, d_forces, d_energies, n_particles, d_box_len, d_potential);
checkCudaError(cudaGetLastError(), "kernel launch"); checkCudaError(cudaGetLastError(), "kernel launch");
checkCudaError(cudaDeviceSynchronize(), "kernel execution"); checkCudaError(cudaDeviceSynchronize(), "kernel execution");
@ -88,7 +80,6 @@ protected:
checkCudaError(cudaFree(d_positions), "cudaFree positions"); checkCudaError(cudaFree(d_positions), "cudaFree positions");
checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
checkCudaError(cudaFree(d_potential), "cudaFree potential");
return {result_forces, result_energies}; return {result_forces, result_energies};
} }