diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt index bef60a3..f07f1a4 100644 --- a/kernels/CMakeLists.txt +++ b/kernels/CMakeLists.txt @@ -5,11 +5,10 @@ set(HEADER_FILES forces.cuh ) set(SOURCE_FILES - forces.cu ) # The library contains header and source files. -add_library(${NAME}_cuda_lib STATIC +add_library(${NAME}_cuda_lib INTERFACE ${SOURCE_FILES} ${HEADER_FILES} ) diff --git a/kernels/forces.cu b/kernels/forces.cu deleted file mode 100644 index 83c1027..0000000 --- a/kernels/forces.cu +++ /dev/null @@ -1,36 +0,0 @@ -#include "forces.cuh" - -__global__ void CAC::calc_forces_and_energies(real *xs, real *forces, - real *energies, int n_particles, - real *box_len, - PairPotential &potential) { - int i = blockIdx.x * blockDim.x + threadIdx.x; - if (i < n_particles) { - real xi = xs[3 * i]; - real yi = xs[3 * i + 1]; - real zi = xs[3 * i + 2]; - - for (int j = 0; j < n_particles; j++) { - if (i != j) { - real xj = xs[3 * j]; - real yj = xs[3 * j + 1]; - real zj = xs[3 * j + 2]; - - real dx = xi - xj; - real dy = yi - yj; - real dz = zi - zj; - - // Apply periodic boundary conditions - dx -= box_len[0] * round(dx / box_len[0]); - dy -= box_len[1] * round(dy / box_len[1]); - dz -= box_len[2] * round(dz / box_len[2]); - - ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz}); - forces[3 * i] += sol.force.x; - forces[3 * i + 1] += sol.force.y; - forces[3 * i + 2] += sol.force.z; - energies[i] += sol.energy; - } - } - } -} diff --git a/kernels/forces.cuh b/kernels/forces.cuh index 87a610f..b96f412 100644 --- a/kernels/forces.cuh +++ b/kernels/forces.cuh @@ -1,19 +1,80 @@ #ifndef FORCES_CUH #define FORCES_CUH - -#include "pair_potentials.cuh" +#include "potentials/pair_potentials.cuh" #include "precision.hpp" +#include +#include +#include +#include + namespace CAC { -/** - * Calculate forces and energies using CUDA for acceleration - * This code currently only accepts a single PairPotential object and does an - * n^2 force calculation. Future improvements will: - * - Allow for neighbor listing - * - Allow for overlaid force calculations - */ + +inline void reset_forces_and_energies(int n_particles, real *forces, + real *energies) { + cudaMemset(forces, 0, n_particles * sizeof(real) * 3); + cudaMemset(energies, 0, n_particles * sizeof(real)); +} + +template __global__ void calc_forces_and_energies(real *xs, real *forces, real *energies, - int n_particles, real *box_bd, - PairPotential &potential); + int n_particles, real *box_len, + PotentialType potential) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i == 0) { + printf("n_particles: %d\n", n_particles); + printf("box_len: %f %f %f\n", box_len[0], box_len[1], box_len[2]); + } + + if (i < n_particles) { + printf("Thread %d, Block %d\n", threadIdx.x, blockIdx.x); + real xi = xs[3 * i]; + real yi = xs[3 * i + 1]; + real zi = xs[3 * i + 2]; + + for (int j = 0; j < n_particles; j++) { + if (i != j) { + real xj = xs[3 * j]; + real yj = xs[3 * j + 1]; + real zj = xs[3 * j + 2]; + + real dx = xi - xj; + real dy = yi - yj; + real dz = zi - zj; + + // Apply periodic boundary conditions + dx -= box_len[0] * round(dx / box_len[0]); + dy -= box_len[1] * round(dy / box_len[1]); + dz -= box_len[2] * round(dz / box_len[2]); + + ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz}); + forces[3 * i] += sol.force.x; + forces[3 * i + 1] += sol.force.y; + forces[3 * i + 2] += sol.force.z; + energies[i] += sol.energy; + } + } + } +} + +inline void launch_force_kernels(real *xs, real *forces, real *energies, + int n_particles, real *box_len, + std::vector potentials, + int grid_size, int block_size) { + + reset_forces_and_energies(n_particles, forces, energies); + + for (const auto &potential : potentials) { + std::visit( + [&](const auto &potential) { + using PotentialType = std::decay_t; + calc_forces_and_energies<<>>( + xs, forces, energies, n_particles, box_len, potential); + }, + potential); + cudaDeviceSynchronize(); + } +} } // namespace CAC #endif diff --git a/tests/cuda_unit_tests/test_forces.cu b/tests/cuda_unit_tests/test_forces.cu index 6b27b7b..a576ec8 100644 --- a/tests/cuda_unit_tests/test_forces.cu +++ b/tests/cuda_unit_tests/test_forces.cu @@ -5,13 +5,13 @@ // Include your header files #include "forces.cuh" -#include "pair_potentials.cuh" +#include "potentials/pair_potentials.cuh" #include "precision.hpp" class CudaForceKernelTest : public ::testing::Test { protected: - const int BLOCK_SIZE = 1; - const int THREADS_PER_BLOCK = 4; + const int GRID_SIZE = 1; + const int BLOCK_SIZE = 4; void SetUp() override { // Set up CUDA device @@ -66,17 +66,9 @@ protected: real *d_energies = allocateAndCopyToGPU(energies); real *d_box_len = allocateAndCopyToGPU(box_dimensions); - // Allocate potential on the GPU - LennardJones h_potential(1.0, 1.0, 3.0); - LennardJones *d_potential; - checkCudaError(cudaMalloc(&d_potential, sizeof(LennardJones)), - "cudaMalloc potential"); - checkCudaError(cudaMemcpy(d_potential, &h_potential, sizeof(LennardJones), - cudaMemcpyHostToDevice), - "cudaMemcpy H2D potential"); - - CAC::calc_forces_and_energies<<>>( - d_positions, d_forces, d_energies, n_particles, d_box_len, d_potential); + std::vector potentials = {LennardJones(1.0, 1.0, 3.0)}; + CAC::launch_force_kernels(d_positions, d_forces, d_energies, n_particles, + d_box_len, potentials, GRID_SIZE, BLOCK_SIZE); checkCudaError(cudaGetLastError(), "kernel launch"); checkCudaError(cudaDeviceSynchronize(), "kernel execution"); @@ -88,7 +80,6 @@ protected: checkCudaError(cudaFree(d_positions), "cudaFree positions"); checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); - checkCudaError(cudaFree(d_potential), "cudaFree potential"); return {result_forces, result_energies}; }