Rewrite the force calculations to fix memory issues
This commit is contained in:
parent
2d948a7e76
commit
ac44ceaab1
4 changed files with 79 additions and 64 deletions
|
@ -5,11 +5,10 @@ set(HEADER_FILES
|
||||||
forces.cuh
|
forces.cuh
|
||||||
)
|
)
|
||||||
set(SOURCE_FILES
|
set(SOURCE_FILES
|
||||||
forces.cu
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# The library contains header and source files.
|
# The library contains header and source files.
|
||||||
add_library(${NAME}_cuda_lib STATIC
|
add_library(${NAME}_cuda_lib INTERFACE
|
||||||
${SOURCE_FILES}
|
${SOURCE_FILES}
|
||||||
${HEADER_FILES}
|
${HEADER_FILES}
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,36 +0,0 @@
|
||||||
#include "forces.cuh"
|
|
||||||
|
|
||||||
__global__ void CAC::calc_forces_and_energies(real *xs, real *forces,
|
|
||||||
real *energies, int n_particles,
|
|
||||||
real *box_len,
|
|
||||||
PairPotential &potential) {
|
|
||||||
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
if (i < n_particles) {
|
|
||||||
real xi = xs[3 * i];
|
|
||||||
real yi = xs[3 * i + 1];
|
|
||||||
real zi = xs[3 * i + 2];
|
|
||||||
|
|
||||||
for (int j = 0; j < n_particles; j++) {
|
|
||||||
if (i != j) {
|
|
||||||
real xj = xs[3 * j];
|
|
||||||
real yj = xs[3 * j + 1];
|
|
||||||
real zj = xs[3 * j + 2];
|
|
||||||
|
|
||||||
real dx = xi - xj;
|
|
||||||
real dy = yi - yj;
|
|
||||||
real dz = zi - zj;
|
|
||||||
|
|
||||||
// Apply periodic boundary conditions
|
|
||||||
dx -= box_len[0] * round(dx / box_len[0]);
|
|
||||||
dy -= box_len[1] * round(dy / box_len[1]);
|
|
||||||
dz -= box_len[2] * round(dz / box_len[2]);
|
|
||||||
|
|
||||||
ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz});
|
|
||||||
forces[3 * i] += sol.force.x;
|
|
||||||
forces[3 * i + 1] += sol.force.y;
|
|
||||||
forces[3 * i + 2] += sol.force.z;
|
|
||||||
energies[i] += sol.energy;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,19 +1,80 @@
|
||||||
#ifndef FORCES_CUH
|
#ifndef FORCES_CUH
|
||||||
#define FORCES_CUH
|
#define FORCES_CUH
|
||||||
|
#include "potentials/pair_potentials.cuh"
|
||||||
#include "pair_potentials.cuh"
|
|
||||||
#include "precision.hpp"
|
#include "precision.hpp"
|
||||||
|
#include <cstdio>
|
||||||
|
#include <type_traits>
|
||||||
|
#include <variant>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
namespace CAC {
|
namespace CAC {
|
||||||
/**
|
|
||||||
* Calculate forces and energies using CUDA for acceleration
|
inline void reset_forces_and_energies(int n_particles, real *forces,
|
||||||
* This code currently only accepts a single PairPotential object and does an
|
real *energies) {
|
||||||
* n^2 force calculation. Future improvements will:
|
cudaMemset(forces, 0, n_particles * sizeof(real) * 3);
|
||||||
* - Allow for neighbor listing
|
cudaMemset(energies, 0, n_particles * sizeof(real));
|
||||||
* - Allow for overlaid force calculations
|
}
|
||||||
*/
|
|
||||||
|
template <typename PotentialType>
|
||||||
__global__ void calc_forces_and_energies(real *xs, real *forces, real *energies,
|
__global__ void calc_forces_and_energies(real *xs, real *forces, real *energies,
|
||||||
int n_particles, real *box_bd,
|
int n_particles, real *box_len,
|
||||||
PairPotential &potential);
|
PotentialType potential) {
|
||||||
|
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
|
if (i == 0) {
|
||||||
|
printf("n_particles: %d\n", n_particles);
|
||||||
|
printf("box_len: %f %f %f\n", box_len[0], box_len[1], box_len[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i < n_particles) {
|
||||||
|
printf("Thread %d, Block %d\n", threadIdx.x, blockIdx.x);
|
||||||
|
real xi = xs[3 * i];
|
||||||
|
real yi = xs[3 * i + 1];
|
||||||
|
real zi = xs[3 * i + 2];
|
||||||
|
|
||||||
|
for (int j = 0; j < n_particles; j++) {
|
||||||
|
if (i != j) {
|
||||||
|
real xj = xs[3 * j];
|
||||||
|
real yj = xs[3 * j + 1];
|
||||||
|
real zj = xs[3 * j + 2];
|
||||||
|
|
||||||
|
real dx = xi - xj;
|
||||||
|
real dy = yi - yj;
|
||||||
|
real dz = zi - zj;
|
||||||
|
|
||||||
|
// Apply periodic boundary conditions
|
||||||
|
dx -= box_len[0] * round(dx / box_len[0]);
|
||||||
|
dy -= box_len[1] * round(dy / box_len[1]);
|
||||||
|
dz -= box_len[2] * round(dz / box_len[2]);
|
||||||
|
|
||||||
|
ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz});
|
||||||
|
forces[3 * i] += sol.force.x;
|
||||||
|
forces[3 * i + 1] += sol.force.y;
|
||||||
|
forces[3 * i + 2] += sol.force.z;
|
||||||
|
energies[i] += sol.energy;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void launch_force_kernels(real *xs, real *forces, real *energies,
|
||||||
|
int n_particles, real *box_len,
|
||||||
|
std::vector<PairPotentials> potentials,
|
||||||
|
int grid_size, int block_size) {
|
||||||
|
|
||||||
|
reset_forces_and_energies(n_particles, forces, energies);
|
||||||
|
|
||||||
|
for (const auto &potential : potentials) {
|
||||||
|
std::visit(
|
||||||
|
[&](const auto &potential) {
|
||||||
|
using PotentialType = std::decay_t<decltype(potential)>;
|
||||||
|
calc_forces_and_energies<PotentialType><<<grid_size, block_size>>>(
|
||||||
|
xs, forces, energies, n_particles, box_len, potential);
|
||||||
|
},
|
||||||
|
potential);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
}
|
||||||
|
}
|
||||||
} // namespace CAC
|
} // namespace CAC
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -5,13 +5,13 @@
|
||||||
|
|
||||||
// Include your header files
|
// Include your header files
|
||||||
#include "forces.cuh"
|
#include "forces.cuh"
|
||||||
#include "pair_potentials.cuh"
|
#include "potentials/pair_potentials.cuh"
|
||||||
#include "precision.hpp"
|
#include "precision.hpp"
|
||||||
|
|
||||||
class CudaForceKernelTest : public ::testing::Test {
|
class CudaForceKernelTest : public ::testing::Test {
|
||||||
protected:
|
protected:
|
||||||
const int BLOCK_SIZE = 1;
|
const int GRID_SIZE = 1;
|
||||||
const int THREADS_PER_BLOCK = 4;
|
const int BLOCK_SIZE = 4;
|
||||||
|
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
// Set up CUDA device
|
// Set up CUDA device
|
||||||
|
@ -66,17 +66,9 @@ protected:
|
||||||
real *d_energies = allocateAndCopyToGPU(energies);
|
real *d_energies = allocateAndCopyToGPU(energies);
|
||||||
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
||||||
|
|
||||||
// Allocate potential on the GPU
|
std::vector<PairPotentials> potentials = {LennardJones(1.0, 1.0, 3.0)};
|
||||||
LennardJones h_potential(1.0, 1.0, 3.0);
|
CAC::launch_force_kernels(d_positions, d_forces, d_energies, n_particles,
|
||||||
LennardJones *d_potential;
|
d_box_len, potentials, GRID_SIZE, BLOCK_SIZE);
|
||||||
checkCudaError(cudaMalloc(&d_potential, sizeof(LennardJones)),
|
|
||||||
"cudaMalloc potential");
|
|
||||||
checkCudaError(cudaMemcpy(d_potential, &h_potential, sizeof(LennardJones),
|
|
||||||
cudaMemcpyHostToDevice),
|
|
||||||
"cudaMemcpy H2D potential");
|
|
||||||
|
|
||||||
CAC::calc_forces_and_energies<<<BLOCK_SIZE, THREADS_PER_BLOCK>>>(
|
|
||||||
d_positions, d_forces, d_energies, n_particles, d_box_len, d_potential);
|
|
||||||
|
|
||||||
checkCudaError(cudaGetLastError(), "kernel launch");
|
checkCudaError(cudaGetLastError(), "kernel launch");
|
||||||
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
||||||
|
@ -88,7 +80,6 @@ protected:
|
||||||
|
|
||||||
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
||||||
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
||||||
checkCudaError(cudaFree(d_potential), "cudaFree potential");
|
|
||||||
|
|
||||||
return {result_forces, result_energies};
|
return {result_forces, result_energies};
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue