Add force calculation kernel and fix incorrect ctest configuration for Cuda tests

This commit is contained in:
Alex Selimov 2025-08-27 22:07:47 -04:00
parent cad74747bf
commit dc74e4e5c0
Signed by: aselimov
GPG key ID: 3DDB9C3E023F1F31
6 changed files with 348 additions and 6 deletions

View file

@ -2,12 +2,14 @@ project(${NAME}_cuda_lib CUDA CXX)
set(HEADER_FILES
pair_potentials.cuh
forces.cuh
)
set(SOURCE_FILES
forces.cu
)
# The library contains header and source files.
add_library(${NAME}_cuda_lib INTERFACE
add_library(${NAME}_cuda_lib STATIC
${SOURCE_FILES}
${HEADER_FILES}
)

36
kernels/forces.cu Normal file
View file

@ -0,0 +1,36 @@
#include "forces.cuh"
__global__ void CAC::calc_forces_and_energies(real *xs, real *forces,
real *energies, int n_particles,
real *box_len,
PairPotential &potential) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n_particles) {
real xi = xs[3 * i];
real yi = xs[3 * i + 1];
real zi = xs[3 * i + 2];
for (int j = 0; j < n_particles; j++) {
if (i != j) {
real xj = xs[3 * j];
real yj = xs[3 * j + 1];
real zj = xs[3 * j + 2];
real dx = xi - xj;
real dy = yi - yj;
real dz = zi - zj;
// Apply periodic boundary conditions
dx -= box_len[0] * round(dx / box_len[0]);
dy -= box_len[1] * round(dy / box_len[1]);
dz -= box_len[2] * round(dz / box_len[2]);
ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz});
forces[3 * i] += sol.force.x;
forces[3 * i + 1] += sol.force.y;
forces[3 * i + 2] += sol.force.z;
energies[i] = sol.energy;
}
}
}
}

19
kernels/forces.cuh Normal file
View file

@ -0,0 +1,19 @@
#ifndef FORCES_CUH
#define FORCES_CUH
#include "pair_potentials.cuh"
#include "precision.hpp"
namespace CAC {
/**
* Calculate forces and energies using CUDA for acceleration
* This code currently only accepts a single PairPotential object and does an
* n^2 force calculation. Future improvements will:
* - Allow for neighbor listing
* - Allow for overlaid force calculations
*/
__global__ void calc_forces_and_energies(real *xs, real *forces, real *energies,
int n_particles, real *box_bd,
PairPotential &potential);
} // namespace CAC
#endif

View file

@ -1,5 +1,5 @@
#ifndef POTENTIALS_H
#define POTENTIALS_H
#ifndef POTENTIALS_CUH
#define POTENTIALS_CUH
#include "precision.hpp"
#include "vec3.h"
@ -84,8 +84,8 @@ struct LennardJones : PairPotential {
}
};
CUDA_CALLABLE ~LennardJones(){};
CUDA_CALLABLE inline ~LennardJones(){};
};
PairPotential::~PairPotential() {};
inline PairPotential::~PairPotential() {};
#endif

View file

@ -2,8 +2,16 @@ include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR})
add_executable(${NAME}_cuda_tests
test_potential.cu
test_forces.cu
)
target_link_libraries(${NAME}_cuda_tests gtest gtest_main)
target_link_libraries(${NAME}_cuda_tests ${CMAKE_PROJECT_NAME}_cuda_lib)
add_test(NAME ${NAME}CudaTests COMMAND ${CMAKE_BINARY_DIR}/tests/unit_tests/${NAME}_tests)
add_test(NAME ${NAME}CudaTests COMMAND ${CMAKE_BINARY_DIR}/tests/cuda_unit_tests/${NAME}_cuda_tests)
# Add environment variables for NVIDIA GPU selection. Useful for facilitating testing on multi gpu
# systems
set_property(TEST ${NAME}CudaTests PROPERTY ENVIRONMENT
"__NV_PRIME_RENDER_OFFLOAD=1"
"__GLX_VENDOR_LIBRARY_NAME=nvidia"
)

View file

@ -0,0 +1,277 @@
#include <cmath>
#include <cuda_runtime.h>
#include <gtest/gtest.h>
#include <vector>
// Include your header files
#include "forces.cuh"
#include "pair_potentials.cuh"
#include "precision.hpp"
class CudaKernelTest : public ::testing::Test {
protected:
void SetUp() override {
// Set up CUDA device
cudaError_t err = cudaSetDevice(0);
ASSERT_EQ(err, cudaSuccess) << "Failed to set CUDA device";
}
void TearDown() override {
// Clean up any remaining GPU memory
cudaDeviceReset();
}
// Helper function to check CUDA errors
void checkCudaError(cudaError_t err, const std::string &operation) {
ASSERT_EQ(err, cudaSuccess)
<< "CUDA error in " << operation << ": " << cudaGetErrorString(err);
}
// Helper function to allocate and copy data to GPU
template <typename T>
T *allocateAndCopyToGPU(const std::vector<T> &host_data) {
T *device_ptr;
size_t size = host_data.size() * sizeof(T);
checkCudaError(cudaMalloc(&device_ptr, size), "cudaMalloc");
checkCudaError(
cudaMemcpy(device_ptr, host_data.data(), size, cudaMemcpyHostToDevice),
"cudaMemcpy H2D");
return device_ptr;
}
// Helper function to copy data from GPU and free GPU memory
template <typename T>
std::vector<T> copyFromGPUAndFree(T *device_ptr, size_t count) {
std::vector<T> host_data(count);
size_t size = count * sizeof(T);
checkCudaError(
cudaMemcpy(host_data.data(), device_ptr, size, cudaMemcpyDeviceToHost),
"cudaMemcpy D2H");
checkCudaError(cudaFree(device_ptr), "cudaFree");
return host_data;
}
};
TEST_F(CudaKernelTest, BasicFunctionalityTest) {
const int n_particles = 4;
const real tolerance = 1e-5;
// Set up test data - simple 2x2 grid of particles
std::vector<real> positions = {
0.0, 0.0, 0.0, // particle 0
1.0, 0.0, 0.0, // particle 1
0.0, 1.0, 0.0, // particle 2
1.0, 1.0, 0.0 // particle 3
};
std::vector<real> forces(3 * n_particles, 0.0);
std::vector<real> energies(n_particles, 0.0);
std::vector<real> box_dimensions = {10.0, 10.0,
10.0}; // Large box to avoid PBC effects
// Allocate GPU memory and copy data
real *d_positions = allocateAndCopyToGPU(positions);
real *d_forces = allocateAndCopyToGPU(forces);
real *d_energies = allocateAndCopyToGPU(energies);
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
// Create Lennard-Jones potential (sigma=1.0, epsilon=1.0, rcutoff=3.0)
LennardJones potential(1.0, 1.0, 3.0);
// Launch kernel
dim3 blockSize(256);
dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
checkCudaError(cudaGetLastError(), "kernel launch");
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
// Copy results back to host
std::vector<real> result_forces =
copyFromGPUAndFree(d_forces, 3 * n_particles);
std::vector<real> result_energies =
copyFromGPUAndFree(d_energies, n_particles);
// Clean up remaining GPU memory
checkCudaError(cudaFree(d_positions), "cudaFree positions");
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
// Verify results - forces should be non-zero and energies should be
// calculated
bool has_nonzero_force = false;
bool has_nonzero_energy = false;
for (int i = 0; i < 3 * n_particles; i++) {
if (std::abs(result_forces[i]) > tolerance) {
has_nonzero_force = true;
break;
}
}
for (int i = 0; i < n_particles; i++) {
if (std::abs(result_energies[i]) > tolerance) {
has_nonzero_energy = true;
break;
}
}
EXPECT_FALSE(has_nonzero_force)
<< "Expected non-zero forces between particles";
EXPECT_TRUE(has_nonzero_energy) << "Expected non-zero energies for particles";
}
TEST_F(CudaKernelTest, PeriodicBoundaryConditionsTest) {
const int n_particles = 2;
const real tolerance = 1e-5;
// Place particles near opposite edges of a small box
std::vector<real> positions = {
0.1, 0.0, 0.0, // particle 0 near left edge
4.9, 0.0, 0.0 // particle 1 near right edge
};
std::vector<real> forces(3 * n_particles, 0.0);
std::vector<real> energies(n_particles, 0.0);
std::vector<real> box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC
// Allocate GPU memory and copy data
real *d_positions = allocateAndCopyToGPU(positions);
real *d_forces = allocateAndCopyToGPU(forces);
real *d_energies = allocateAndCopyToGPU(energies);
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
// Create Lennard-Jones potential with large cutoff to ensure interaction
LennardJones potential(1.0, 1.0, 3.0);
// Launch kernel
dim3 blockSize(256);
dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
checkCudaError(cudaGetLastError(), "kernel launch");
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
// Copy results back to host
std::vector<real> result_forces =
copyFromGPUAndFree(d_forces, 3 * n_particles);
std::vector<real> result_energies =
copyFromGPUAndFree(d_energies, n_particles);
checkCudaError(cudaFree(d_positions), "cudaFree positions");
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
// With PBC, particles should interact as if they're close (distance ~0.2)
// rather than far apart (distance ~4.8)
EXPECT_GT(std::abs(result_forces[0]), tolerance)
<< "Expected significant force due to PBC";
EXPECT_GT(std::abs(result_energies[0]), tolerance)
<< "Expected significant energy due to PBC";
}
TEST_F(CudaKernelTest, SingleParticleTest) {
const int n_particles = 1;
std::vector<real> positions = {0.0, 0.0, 0.0};
std::vector<real> forces(3 * n_particles, 0.0);
std::vector<real> energies(n_particles, 0.0);
std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
real *d_positions = allocateAndCopyToGPU(positions);
real *d_forces = allocateAndCopyToGPU(forces);
real *d_energies = allocateAndCopyToGPU(energies);
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
LennardJones potential(1.0, 1.0, 3.0);
dim3 blockSize(256);
dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
checkCudaError(cudaGetLastError(), "kernel launch");
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
std::vector<real> result_forces =
copyFromGPUAndFree(d_forces, 3 * n_particles);
std::vector<real> result_energies =
copyFromGPUAndFree(d_energies, n_particles);
checkCudaError(cudaFree(d_positions), "cudaFree positions");
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
// Single particle should have zero force and energy
EXPECT_NEAR(result_forces[0], 0.0, 1e-10);
EXPECT_NEAR(result_forces[1], 0.0, 1e-10);
EXPECT_NEAR(result_forces[2], 0.0, 1e-10);
EXPECT_NEAR(result_energies[0], 0.0, 1e-10);
}
TEST_F(CudaKernelTest, ForceSymmetryTest) {
const int n_particles = 2;
const real tolerance = 1e-5;
std::vector<real> positions = {
0.0, 0.0, 0.0, // particle 0
1.5, 0.0, 0.0 // particle 1
};
std::vector<real> forces(3 * n_particles, 0.0);
std::vector<real> energies(n_particles, 0.0);
std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
real *d_positions = allocateAndCopyToGPU(positions);
real *d_forces = allocateAndCopyToGPU(forces);
real *d_energies = allocateAndCopyToGPU(energies);
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
LennardJones potential(1.0, 1.0, 3.0);
dim3 blockSize(256);
dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
checkCudaError(cudaGetLastError(), "kernel launch");
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
std::vector<real> result_forces =
copyFromGPUAndFree(d_forces, 3 * n_particles);
std::vector<real> result_energies =
copyFromGPUAndFree(d_energies, n_particles);
checkCudaError(cudaFree(d_positions), "cudaFree positions");
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
// Newton's third law: forces should be equal and opposite
EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance)
<< "Force x-components should be opposite";
EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance)
<< "Force y-components should be opposite";
EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance)
<< "Force z-components should be opposite";
// Energies should be equal for symmetric particles
EXPECT_NEAR(result_energies[0], result_energies[1], tolerance)
<< "Energies should be equal";
}
// Main function to run tests
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
// Check if CUDA is available
int deviceCount;
cudaError_t err = cudaGetDeviceCount(&deviceCount);
if (err != cudaSuccess || deviceCount == 0) {
std::cout << "No CUDA devices available. Skipping CUDA tests." << std::endl;
return 0;
}
return RUN_ALL_TESTS();
}