From a638c4f388506e067ea2d1a002dd1ef1e9677f49 Mon Sep 17 00:00:00 2001 From: Alex Selimov Date: Wed, 10 Sep 2025 06:10:36 -0400 Subject: [PATCH 1/3] Fix missing summation for energy --- kernels/forces.cu | 2 +- tests/cuda_unit_tests/test_forces.cu | 304 ++++++++++----------------- 2 files changed, 114 insertions(+), 192 deletions(-) diff --git a/kernels/forces.cu b/kernels/forces.cu index 2251bd5..83c1027 100644 --- a/kernels/forces.cu +++ b/kernels/forces.cu @@ -29,7 +29,7 @@ __global__ void CAC::calc_forces_and_energies(real *xs, real *forces, forces[3 * i] += sol.force.x; forces[3 * i + 1] += sol.force.y; forces[3 * i + 2] += sol.force.z; - energies[i] = sol.energy; + energies[i] += sol.energy; } } } diff --git a/tests/cuda_unit_tests/test_forces.cu b/tests/cuda_unit_tests/test_forces.cu index ca84e55..6b27b7b 100644 --- a/tests/cuda_unit_tests/test_forces.cu +++ b/tests/cuda_unit_tests/test_forces.cu @@ -8,8 +8,11 @@ #include "pair_potentials.cuh" #include "precision.hpp" -class CudaKernelTest : public ::testing::Test { +class CudaForceKernelTest : public ::testing::Test { protected: + const int BLOCK_SIZE = 1; + const int THREADS_PER_BLOCK = 4; + void SetUp() override { // Set up CUDA device cudaError_t err = cudaSetDevice(0); @@ -50,53 +53,61 @@ protected: checkCudaError(cudaFree(device_ptr), "cudaFree"); return host_data; } + + // Helper function to run the force calculation kernel + std::pair, std::vector> + run_force_calculation(int n_particles, const std::vector &positions, + const std::vector &box_dimensions) { + std::vector forces(3 * n_particles, 0.0); + std::vector energies(n_particles, 0.0); + + real *d_positions = allocateAndCopyToGPU(positions); + real *d_forces = allocateAndCopyToGPU(forces); + real *d_energies = allocateAndCopyToGPU(energies); + real *d_box_len = allocateAndCopyToGPU(box_dimensions); + + // Allocate potential on the GPU + LennardJones h_potential(1.0, 1.0, 3.0); + LennardJones *d_potential; + checkCudaError(cudaMalloc(&d_potential, sizeof(LennardJones)), + "cudaMalloc potential"); + checkCudaError(cudaMemcpy(d_potential, &h_potential, sizeof(LennardJones), + cudaMemcpyHostToDevice), + "cudaMemcpy H2D potential"); + + CAC::calc_forces_and_energies<<>>( + d_positions, d_forces, d_energies, n_particles, d_box_len, d_potential); + + checkCudaError(cudaGetLastError(), "kernel launch"); + checkCudaError(cudaDeviceSynchronize(), "kernel execution"); + + std::vector result_forces = + copyFromGPUAndFree(d_forces, 3 * n_particles); + std::vector result_energies = + copyFromGPUAndFree(d_energies, n_particles); + + checkCudaError(cudaFree(d_positions), "cudaFree positions"); + checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); + checkCudaError(cudaFree(d_potential), "cudaFree potential"); + + return {result_forces, result_energies}; + } }; -TEST_F(CudaKernelTest, BasicFunctionalityTest) { - const int n_particles = 4; +TEST_F(CudaForceKernelTest, BasicFunctionalityTest) { + const int n_particles = 2; const real tolerance = 1e-5; // Set up test data - simple 2x2 grid of particles std::vector positions = { 0.0, 0.0, 0.0, // particle 0 1.0, 0.0, 0.0, // particle 1 - 0.0, 1.0, 0.0, // particle 2 - 1.0, 1.0, 0.0 // particle 3 }; - std::vector forces(3 * n_particles, 0.0); - std::vector energies(n_particles, 0.0); - std::vector box_dimensions = {10.0, 10.0, - 10.0}; // Large box to avoid PBC effects + std::vector box_dimensions = {10.0, 10.0, 10.0}; - // Allocate GPU memory and copy data - real *d_positions = allocateAndCopyToGPU(positions); - real *d_forces = allocateAndCopyToGPU(forces); - real *d_energies = allocateAndCopyToGPU(energies); - real *d_box_len = allocateAndCopyToGPU(box_dimensions); - - // Create Lennard-Jones potential (sigma=1.0, epsilon=1.0, rcutoff=3.0) - LennardJones potential(1.0, 1.0, 3.0); - - // Launch kernel - dim3 blockSize(256); - dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x); - - CAC::calc_forces_and_energies<<>>( - d_positions, d_forces, d_energies, n_particles, d_box_len, potential); - - checkCudaError(cudaGetLastError(), "kernel launch"); - checkCudaError(cudaDeviceSynchronize(), "kernel execution"); - - // Copy results back to host - std::vector result_forces = - copyFromGPUAndFree(d_forces, 3 * n_particles); - std::vector result_energies = - copyFromGPUAndFree(d_energies, n_particles); - - // Clean up remaining GPU memory - checkCudaError(cudaFree(d_positions), "cudaFree positions"); - checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); + auto [result_forces, result_energies] = + run_force_calculation(n_particles, positions, box_dimensions); // Verify results - forces should be non-zero and energies should be // calculated @@ -117,161 +128,72 @@ TEST_F(CudaKernelTest, BasicFunctionalityTest) { } } - EXPECT_FALSE(has_nonzero_force) + EXPECT_TRUE(has_nonzero_force) << "Expected non-zero forces between particles"; - EXPECT_TRUE(has_nonzero_energy) << "Expected non-zero energies for particles"; + EXPECT_TRUE(has_nonzero_energy) + << "Expected non-zero energies for particles "; } +// +// TEST_F(CudaKernelTest, PeriodicBoundaryConditionsTest) { +// const int n_particles = 2; +// const real tolerance = 1e-5; +// +// // Place particles near opposite edges of a small box +// std::vector positions = { +// 0.1, 0.0, 0.0, // particle 0 near left edge +// 4.9, 0.0, 0.0 // particle 1 near right edge +// }; +// std::vector box_dimensions = {5.0, 5.0, 5.0}; // Small box to test +// PBC +// +// auto [result_forces, result_energies] = +// run_force_calculation(n_particles, &positions, &box_dimensions); +// +// // With PBC, particles should interact as if they're close (distance ~0.2) +// // rather than far apart (distance ~4.8) +// EXPECT_GT(std::abs(result_forces[0]), tolerance) +// << "Expected significant force due to PBC"; +// EXPECT_GT(std::abs(result_energies[0]), tolerance) +// << "Expected significant energy due to PBC"; +// } -TEST_F(CudaKernelTest, PeriodicBoundaryConditionsTest) { - const int n_particles = 2; - const real tolerance = 1e-5; +// TEST_F(CudaForceKernelTest, SingleParticleTest) { +// const int n_particles = 1; +// +// std::vector positions = {0.0, 0.0, 0.0}; +// std::vector box_dimensions = {10.0, 10.0, 10.0}; +// +// auto [result_forces, result_energies] = +// run_force_calculation(n_particles, positions, box_dimensions); +// // Single particle should have zero force and energy +// EXPECT_NEAR(result_forces[0], 0.0, 1e-10); +// EXPECT_NEAR(result_forces[1], 0.0, 1e-10); +// EXPECT_NEAR(result_forces[2], 0.0, 1e-10); +// EXPECT_NEAR(result_energies[0], 0.0, 1e-10); +// } - // Place particles near opposite edges of a small box - std::vector positions = { - 0.1, 0.0, 0.0, // particle 0 near left edge - 4.9, 0.0, 0.0 // particle 1 near right edge - }; - - std::vector forces(3 * n_particles, 0.0); - std::vector energies(n_particles, 0.0); - std::vector box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC - - // Allocate GPU memory and copy data - real *d_positions = allocateAndCopyToGPU(positions); - real *d_forces = allocateAndCopyToGPU(forces); - real *d_energies = allocateAndCopyToGPU(energies); - real *d_box_len = allocateAndCopyToGPU(box_dimensions); - - // Create Lennard-Jones potential with large cutoff to ensure interaction - LennardJones potential(1.0, 1.0, 3.0); - - // Launch kernel - dim3 blockSize(256); - dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x); - - CAC::calc_forces_and_energies<<>>( - d_positions, d_forces, d_energies, n_particles, d_box_len, potential); - - checkCudaError(cudaGetLastError(), "kernel launch"); - checkCudaError(cudaDeviceSynchronize(), "kernel execution"); - - // Copy results back to host - std::vector result_forces = - copyFromGPUAndFree(d_forces, 3 * n_particles); - std::vector result_energies = - copyFromGPUAndFree(d_energies, n_particles); - - checkCudaError(cudaFree(d_positions), "cudaFree positions"); - checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); - - // With PBC, particles should interact as if they're close (distance ~0.2) - // rather than far apart (distance ~4.8) - EXPECT_GT(std::abs(result_forces[0]), tolerance) - << "Expected significant force due to PBC"; - EXPECT_GT(std::abs(result_energies[0]), tolerance) - << "Expected significant energy due to PBC"; -} - -TEST_F(CudaKernelTest, SingleParticleTest) { - const int n_particles = 1; - - std::vector positions = {0.0, 0.0, 0.0}; - std::vector forces(3 * n_particles, 0.0); - std::vector energies(n_particles, 0.0); - std::vector box_dimensions = {10.0, 10.0, 10.0}; - - real *d_positions = allocateAndCopyToGPU(positions); - real *d_forces = allocateAndCopyToGPU(forces); - real *d_energies = allocateAndCopyToGPU(energies); - real *d_box_len = allocateAndCopyToGPU(box_dimensions); - - LennardJones potential(1.0, 1.0, 3.0); - - dim3 blockSize(256); - dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x); - - CAC::calc_forces_and_energies<<>>( - d_positions, d_forces, d_energies, n_particles, d_box_len, potential); - - checkCudaError(cudaGetLastError(), "kernel launch"); - checkCudaError(cudaDeviceSynchronize(), "kernel execution"); - - std::vector result_forces = - copyFromGPUAndFree(d_forces, 3 * n_particles); - std::vector result_energies = - copyFromGPUAndFree(d_energies, n_particles); - - checkCudaError(cudaFree(d_positions), "cudaFree positions"); - checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); - - // Single particle should have zero force and energy - EXPECT_NEAR(result_forces[0], 0.0, 1e-10); - EXPECT_NEAR(result_forces[1], 0.0, 1e-10); - EXPECT_NEAR(result_forces[2], 0.0, 1e-10); - EXPECT_NEAR(result_energies[0], 0.0, 1e-10); -} - -TEST_F(CudaKernelTest, ForceSymmetryTest) { - const int n_particles = 2; - const real tolerance = 1e-5; - - std::vector positions = { - 0.0, 0.0, 0.0, // particle 0 - 1.5, 0.0, 0.0 // particle 1 - }; - - std::vector forces(3 * n_particles, 0.0); - std::vector energies(n_particles, 0.0); - std::vector box_dimensions = {10.0, 10.0, 10.0}; - - real *d_positions = allocateAndCopyToGPU(positions); - real *d_forces = allocateAndCopyToGPU(forces); - real *d_energies = allocateAndCopyToGPU(energies); - real *d_box_len = allocateAndCopyToGPU(box_dimensions); - - LennardJones potential(1.0, 1.0, 3.0); - - dim3 blockSize(256); - dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x); - - CAC::calc_forces_and_energies<<>>( - d_positions, d_forces, d_energies, n_particles, d_box_len, potential); - - checkCudaError(cudaGetLastError(), "kernel launch"); - checkCudaError(cudaDeviceSynchronize(), "kernel execution"); - - std::vector result_forces = - copyFromGPUAndFree(d_forces, 3 * n_particles); - std::vector result_energies = - copyFromGPUAndFree(d_energies, n_particles); - - checkCudaError(cudaFree(d_positions), "cudaFree positions"); - checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); - - // Newton's third law: forces should be equal and opposite - EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance) - << "Force x-components should be opposite"; - EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance) - << "Force y-components should be opposite"; - EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance) - << "Force z-components should be opposite"; - - // Energies should be equal for symmetric particles - EXPECT_NEAR(result_energies[0], result_energies[1], tolerance) - << "Energies should be equal"; -} - -// Main function to run tests -int main(int argc, char **argv) { - ::testing::InitGoogleTest(&argc, argv); - - // Check if CUDA is available - int deviceCount; - cudaError_t err = cudaGetDeviceCount(&deviceCount); - if (err != cudaSuccess || deviceCount == 0) { - std::cout << "No CUDA devices available. Skipping CUDA tests." << std::endl; - return 0; - } - - return RUN_ALL_TESTS(); -} +// TEST_F(CudaKernelTest, ForceSymmetryTest) { +// const int n_particles = 2; +// const real tolerance = 1e-5; +// +// std::vector positions = { +// 0.0, 0.0, 0.0, // particle 0 +// 1.5, 0.0, 0.0 // particle 1 +// }; +// std::vector box_dimensions = {10.0, 10.0, 10.0}; +// +// auto [result_forces, result_energies] = +// run_force_calculation(n_particles, &positions, &box_dimensions); +// +// // Newton's third law: forces should be equal and opposite +// EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance) +// << "Force x-components should be opposite"; +// EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance) +// << "Force y-components should be opposite"; +// EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance) +// << "Force z-components should be opposite"; +// +// // Energies should be equal for symmetric particles +// EXPECT_NEAR(result_energies[0], result_energies[1], tolerance) +// << "Energies should be equal"; +// } From 2d948a7e76cc434ad7bfecd9d802aebec04df2a4 Mon Sep 17 00:00:00 2001 From: Alex Selimov Date: Wed, 10 Sep 2025 22:03:11 -0400 Subject: [PATCH 2/3] Move pair potentials and change to traditional Functor --- kernels/CMakeLists.txt | 2 +- kernels/pair_potentials.cuh | 91 ------------------ kernels/potentials/pair_potentials.cuh | 118 ++++++++++++++++++++++++ tests/cuda_unit_tests/test_potential.cu | 3 +- tests/unit_tests/test_potential.cpp | 2 +- 5 files changed, 122 insertions(+), 94 deletions(-) delete mode 100644 kernels/pair_potentials.cuh create mode 100644 kernels/potentials/pair_potentials.cuh diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt index fac4474..bef60a3 100644 --- a/kernels/CMakeLists.txt +++ b/kernels/CMakeLists.txt @@ -1,7 +1,7 @@ project(${NAME}_cuda_lib CUDA CXX) set(HEADER_FILES - pair_potentials.cuh + potentials/pair_potentials.cuh forces.cuh ) set(SOURCE_FILES diff --git a/kernels/pair_potentials.cuh b/kernels/pair_potentials.cuh deleted file mode 100644 index d6c97ac..0000000 --- a/kernels/pair_potentials.cuh +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef POTENTIALS_CUH -#define POTENTIALS_CUH - -#include "precision.hpp" -#include "vec3.h" - -#ifdef __CUDACC__ -#define CUDA_CALLABLE __host__ __device__ -#else -#define CUDA_CALLABLE -#endif - -/** - * Result struct for the Pair Potential - */ -struct ForceAndEnergy { - real energy; - Vec3 force; - - CUDA_CALLABLE inline static ForceAndEnergy zero() { - return {0.0, {0.0, 0.0, 0.0}}; - }; -}; - -/** - * Abstract implementation of a Pair Potential. - * Pair potentials are potentials which depend solely on the distance - * between two particles. These do not include multi-body potentials such as - * EAM - * - */ -struct PairPotential { - real m_rcutoffsq; - -CUDA_CALLABLE PairPotential(real rcutoff) : m_rcutoffsq(rcutoff * rcutoff) {}; -#ifdef __CUDACC__ - CUDA_CALLABLE ~PairPotential(); -#else - virtual ~PairPotential() = 0; -#endif - - /** - * Calculate the force and energy for a specific atom pair based on a - * displacement vector r. - */ - CUDA_CALLABLE virtual ForceAndEnergy calc_force_and_energy(Vec3 r) = 0; -}; - -/** - * Calculate the Lennard-Jones energy and force for the current particle pair - * described by displacement vector r - */ -struct LennardJones : PairPotential { - real m_epsilon; - real m_sigma; - - CUDA_CALLABLE LennardJones(real sigma, real epsilon, real rcutoff) - : PairPotential(rcutoff), m_epsilon(epsilon), m_sigma(sigma) {}; - - CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3 r) { - real rmagsq = r.squared_norm2(); - if (rmagsq < this->m_rcutoffsq && rmagsq > 0.0) { - real inv_rmag = 1 / std::sqrt(rmagsq); - - // Pre-Compute the terms (doing this saves on multiple devisions/pow - // function call) - real sigma_r = m_sigma * inv_rmag; - real sigma_r6 = sigma_r * sigma_r * sigma_r * sigma_r * sigma_r * sigma_r; - real sigma_r12 = sigma_r6 * sigma_r6; - - // Get the energy - real energy = 4.0 * m_epsilon * (sigma_r12 - sigma_r6); - - // Get the force vector - real force_mag = - 4.0 * m_epsilon * - (12.0 * sigma_r12 * inv_rmag - 6.0 * sigma_r6 * inv_rmag); - Vec3 force = r.scale(force_mag * inv_rmag); - - return {energy, force}; - - } else { - return ForceAndEnergy::zero(); - } - }; - - CUDA_CALLABLE inline ~LennardJones(){}; -}; - -inline PairPotential::~PairPotential() {}; -#endif diff --git a/kernels/potentials/pair_potentials.cuh b/kernels/potentials/pair_potentials.cuh new file mode 100644 index 0000000..537b03c --- /dev/null +++ b/kernels/potentials/pair_potentials.cuh @@ -0,0 +1,118 @@ +#ifndef POTENTIALS_CUH +#define POTENTIALS_CUH + +#include "precision.hpp" +#include "vec3.h" +#include +#include +#include + +#ifdef __CUDACC__ +#define CUDA_CALLABLE __host__ __device__ +#else +#define CUDA_CALLABLE +#endif + +/** + * Result struct for the Pair Potential + */ +struct ForceAndEnergy { + real energy; + Vec3 force; + + CUDA_CALLABLE inline static ForceAndEnergy zero() { + return {0.0, {0.0, 0.0, 0.0}}; + }; +}; + +/** + * Calculate the Lennard-Jones energy and force for the current particle + * pair described by displacement vector r + */ +struct LennardJones { + real m_sigma; + real m_epsilon; + real m_rcutoffsq; + + CUDA_CALLABLE LennardJones(real sigma, real epsilon, real rcutoff) { + m_sigma = sigma; + m_epsilon = epsilon; + m_rcutoffsq = rcutoff * rcutoff; + }; + + CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3 r) { + real rmagsq = r.squared_norm2(); + if (rmagsq < m_rcutoffsq && rmagsq > 0.0) { + real inv_rmag = 1 / sqrt(rmagsq); + + // Pre-Compute the terms (doing this saves on multiple devisions/pow + // function call) + real sigma_r = m_sigma * inv_rmag; + real sigma_r6 = sigma_r * sigma_r * sigma_r * sigma_r * sigma_r * sigma_r; + real sigma_r12 = sigma_r6 * sigma_r6; + + // Get the energy + real energy = 4.0 * m_epsilon * (sigma_r12 - sigma_r6); + + // Get the force vector + real force_mag = + 4.0 * m_epsilon * + (12.0 * sigma_r12 * inv_rmag - 6.0 * sigma_r6 * inv_rmag); + Vec3 force = r.scale(force_mag * inv_rmag); + + return {energy, force}; + + } else { + return ForceAndEnergy::zero(); + } + }; +}; + +/** + * Calculate the Morse potential energy and force for the current particle pair + * described by displacement vector r + */ +struct Morse { + real m_D; // Depth of the potential well + real m_a; // Width of the potential + real m_r0; // Equilibrium bond distance + real m_rcutoffsq; // Cutoff distance squared + + CUDA_CALLABLE Morse(real D, real a, real r0, real rcutoff) { + m_D = D; + m_a = a; + m_r0 = r0; + m_rcutoffsq = rcutoff * rcutoff; + }; + + CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3 r) { + real rmagsq = r.squared_norm2(); + if (rmagsq < m_rcutoffsq && rmagsq > 0.0) { + real rmag = sqrt(rmagsq); + real dr = rmag - m_r0; + + // Compute exponentials + real exp_a_dr = exp(-m_a * dr); + real exp_2a_dr = exp_a_dr * exp_a_dr; + + // Energy: V(r) = D * (exp(-2a(r - r0)) - 2*exp(-a(r - r0))) + real energy = m_D * (exp_2a_dr - 2.0 * exp_a_dr); + + // Force magnitude: F(r) = 2aD * (exp(-2a(r - r0)) - exp(-a(r - r0))) + real force_mag = 2.0 * m_a * m_D * (exp_2a_dr - exp_a_dr); + + // Direction: normalized vector + Vec3 force = r.scale(force_mag / rmag); + + return {energy, force}; + + } else { + return ForceAndEnergy::zero(); + } + }; +}; + +// Variant type for storing pair potential types +using PairPotentials = std::variant; + +#endif diff --git a/tests/cuda_unit_tests/test_potential.cu b/tests/cuda_unit_tests/test_potential.cu index cab3216..9511ea5 100644 --- a/tests/cuda_unit_tests/test_potential.cu +++ b/tests/cuda_unit_tests/test_potential.cu @@ -1,4 +1,4 @@ -#include "pair_potentials.cuh" +#include "potentials/pair_potentials.cuh" #include "precision.hpp" #include "gtest/gtest.h" #include @@ -69,6 +69,7 @@ __global__ void lennard_jones_test_kernel(TestResults *results) { auto result = lj.calc_force_and_energy(r); results->energy_values[2] = result.energy; results->force_values[2] = result.force; + results->at_minimum_pass = (fabs(result.energy + epsilon) < tolerance) && vec3_near(Vec3{0.0, 0.0, 0.0}, result.force, tolerance); diff --git a/tests/unit_tests/test_potential.cpp b/tests/unit_tests/test_potential.cpp index a94f022..d6bf23b 100644 --- a/tests/unit_tests/test_potential.cpp +++ b/tests/unit_tests/test_potential.cpp @@ -1,4 +1,4 @@ -#include "pair_potentials.cuh" +#include "potentials/pair_potentials.cuh" #include "precision.hpp" #include "gtest/gtest.h" #include From ac44ceaab106f7b8d8b0341e013e8a49f859c961 Mon Sep 17 00:00:00 2001 From: Alex Selimov Date: Wed, 10 Sep 2025 22:47:54 -0400 Subject: [PATCH 3/3] Rewrite the force calculations to fix memory issues --- kernels/CMakeLists.txt | 3 +- kernels/forces.cu | 36 ------------ kernels/forces.cuh | 83 ++++++++++++++++++++++++---- tests/cuda_unit_tests/test_forces.cu | 21 ++----- 4 files changed, 79 insertions(+), 64 deletions(-) delete mode 100644 kernels/forces.cu diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt index bef60a3..f07f1a4 100644 --- a/kernels/CMakeLists.txt +++ b/kernels/CMakeLists.txt @@ -5,11 +5,10 @@ set(HEADER_FILES forces.cuh ) set(SOURCE_FILES - forces.cu ) # The library contains header and source files. -add_library(${NAME}_cuda_lib STATIC +add_library(${NAME}_cuda_lib INTERFACE ${SOURCE_FILES} ${HEADER_FILES} ) diff --git a/kernels/forces.cu b/kernels/forces.cu deleted file mode 100644 index 83c1027..0000000 --- a/kernels/forces.cu +++ /dev/null @@ -1,36 +0,0 @@ -#include "forces.cuh" - -__global__ void CAC::calc_forces_and_energies(real *xs, real *forces, - real *energies, int n_particles, - real *box_len, - PairPotential &potential) { - int i = blockIdx.x * blockDim.x + threadIdx.x; - if (i < n_particles) { - real xi = xs[3 * i]; - real yi = xs[3 * i + 1]; - real zi = xs[3 * i + 2]; - - for (int j = 0; j < n_particles; j++) { - if (i != j) { - real xj = xs[3 * j]; - real yj = xs[3 * j + 1]; - real zj = xs[3 * j + 2]; - - real dx = xi - xj; - real dy = yi - yj; - real dz = zi - zj; - - // Apply periodic boundary conditions - dx -= box_len[0] * round(dx / box_len[0]); - dy -= box_len[1] * round(dy / box_len[1]); - dz -= box_len[2] * round(dz / box_len[2]); - - ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz}); - forces[3 * i] += sol.force.x; - forces[3 * i + 1] += sol.force.y; - forces[3 * i + 2] += sol.force.z; - energies[i] += sol.energy; - } - } - } -} diff --git a/kernels/forces.cuh b/kernels/forces.cuh index 87a610f..b96f412 100644 --- a/kernels/forces.cuh +++ b/kernels/forces.cuh @@ -1,19 +1,80 @@ #ifndef FORCES_CUH #define FORCES_CUH - -#include "pair_potentials.cuh" +#include "potentials/pair_potentials.cuh" #include "precision.hpp" +#include +#include +#include +#include + namespace CAC { -/** - * Calculate forces and energies using CUDA for acceleration - * This code currently only accepts a single PairPotential object and does an - * n^2 force calculation. Future improvements will: - * - Allow for neighbor listing - * - Allow for overlaid force calculations - */ + +inline void reset_forces_and_energies(int n_particles, real *forces, + real *energies) { + cudaMemset(forces, 0, n_particles * sizeof(real) * 3); + cudaMemset(energies, 0, n_particles * sizeof(real)); +} + +template __global__ void calc_forces_and_energies(real *xs, real *forces, real *energies, - int n_particles, real *box_bd, - PairPotential &potential); + int n_particles, real *box_len, + PotentialType potential) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i == 0) { + printf("n_particles: %d\n", n_particles); + printf("box_len: %f %f %f\n", box_len[0], box_len[1], box_len[2]); + } + + if (i < n_particles) { + printf("Thread %d, Block %d\n", threadIdx.x, blockIdx.x); + real xi = xs[3 * i]; + real yi = xs[3 * i + 1]; + real zi = xs[3 * i + 2]; + + for (int j = 0; j < n_particles; j++) { + if (i != j) { + real xj = xs[3 * j]; + real yj = xs[3 * j + 1]; + real zj = xs[3 * j + 2]; + + real dx = xi - xj; + real dy = yi - yj; + real dz = zi - zj; + + // Apply periodic boundary conditions + dx -= box_len[0] * round(dx / box_len[0]); + dy -= box_len[1] * round(dy / box_len[1]); + dz -= box_len[2] * round(dz / box_len[2]); + + ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz}); + forces[3 * i] += sol.force.x; + forces[3 * i + 1] += sol.force.y; + forces[3 * i + 2] += sol.force.z; + energies[i] += sol.energy; + } + } + } +} + +inline void launch_force_kernels(real *xs, real *forces, real *energies, + int n_particles, real *box_len, + std::vector potentials, + int grid_size, int block_size) { + + reset_forces_and_energies(n_particles, forces, energies); + + for (const auto &potential : potentials) { + std::visit( + [&](const auto &potential) { + using PotentialType = std::decay_t; + calc_forces_and_energies<<>>( + xs, forces, energies, n_particles, box_len, potential); + }, + potential); + cudaDeviceSynchronize(); + } +} } // namespace CAC #endif diff --git a/tests/cuda_unit_tests/test_forces.cu b/tests/cuda_unit_tests/test_forces.cu index 6b27b7b..a576ec8 100644 --- a/tests/cuda_unit_tests/test_forces.cu +++ b/tests/cuda_unit_tests/test_forces.cu @@ -5,13 +5,13 @@ // Include your header files #include "forces.cuh" -#include "pair_potentials.cuh" +#include "potentials/pair_potentials.cuh" #include "precision.hpp" class CudaForceKernelTest : public ::testing::Test { protected: - const int BLOCK_SIZE = 1; - const int THREADS_PER_BLOCK = 4; + const int GRID_SIZE = 1; + const int BLOCK_SIZE = 4; void SetUp() override { // Set up CUDA device @@ -66,17 +66,9 @@ protected: real *d_energies = allocateAndCopyToGPU(energies); real *d_box_len = allocateAndCopyToGPU(box_dimensions); - // Allocate potential on the GPU - LennardJones h_potential(1.0, 1.0, 3.0); - LennardJones *d_potential; - checkCudaError(cudaMalloc(&d_potential, sizeof(LennardJones)), - "cudaMalloc potential"); - checkCudaError(cudaMemcpy(d_potential, &h_potential, sizeof(LennardJones), - cudaMemcpyHostToDevice), - "cudaMemcpy H2D potential"); - - CAC::calc_forces_and_energies<<>>( - d_positions, d_forces, d_energies, n_particles, d_box_len, d_potential); + std::vector potentials = {LennardJones(1.0, 1.0, 3.0)}; + CAC::launch_force_kernels(d_positions, d_forces, d_energies, n_particles, + d_box_len, potentials, GRID_SIZE, BLOCK_SIZE); checkCudaError(cudaGetLastError(), "kernel launch"); checkCudaError(cudaDeviceSynchronize(), "kernel execution"); @@ -88,7 +80,6 @@ protected: checkCudaError(cudaFree(d_positions), "cudaFree positions"); checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); - checkCudaError(cudaFree(d_potential), "cudaFree potential"); return {result_forces, result_energies}; }