#include #include #include #include // Include your header files #include "forces.cuh" #include "pair_potentials.cuh" #include "precision.hpp" class CudaKernelTest : public ::testing::Test { protected: void SetUp() override { // Set up CUDA device cudaError_t err = cudaSetDevice(0); ASSERT_EQ(err, cudaSuccess) << "Failed to set CUDA device"; } void TearDown() override { // Clean up any remaining GPU memory cudaDeviceReset(); } // Helper function to check CUDA errors void checkCudaError(cudaError_t err, const std::string &operation) { ASSERT_EQ(err, cudaSuccess) << "CUDA error in " << operation << ": " << cudaGetErrorString(err); } // Helper function to allocate and copy data to GPU template T *allocateAndCopyToGPU(const std::vector &host_data) { T *device_ptr; size_t size = host_data.size() * sizeof(T); checkCudaError(cudaMalloc(&device_ptr, size), "cudaMalloc"); checkCudaError( cudaMemcpy(device_ptr, host_data.data(), size, cudaMemcpyHostToDevice), "cudaMemcpy H2D"); return device_ptr; } // Helper function to copy data from GPU and free GPU memory template std::vector copyFromGPUAndFree(T *device_ptr, size_t count) { std::vector host_data(count); size_t size = count * sizeof(T); checkCudaError( cudaMemcpy(host_data.data(), device_ptr, size, cudaMemcpyDeviceToHost), "cudaMemcpy D2H"); checkCudaError(cudaFree(device_ptr), "cudaFree"); return host_data; } }; TEST_F(CudaKernelTest, BasicFunctionalityTest) { const int n_particles = 4; const real tolerance = 1e-5; // Set up test data - simple 2x2 grid of particles std::vector positions = { 0.0, 0.0, 0.0, // particle 0 1.0, 0.0, 0.0, // particle 1 0.0, 1.0, 0.0, // particle 2 1.0, 1.0, 0.0 // particle 3 }; std::vector forces(3 * n_particles, 0.0); std::vector energies(n_particles, 0.0); std::vector box_dimensions = {10.0, 10.0, 10.0}; // Large box to avoid PBC effects // Allocate GPU memory and copy data real *d_positions = allocateAndCopyToGPU(positions); real *d_forces = allocateAndCopyToGPU(forces); real *d_energies = allocateAndCopyToGPU(energies); real *d_box_len = allocateAndCopyToGPU(box_dimensions); // Create Lennard-Jones potential (sigma=1.0, epsilon=1.0, rcutoff=3.0) LennardJones potential(1.0, 1.0, 3.0); // Launch kernel dim3 blockSize(256); dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x); CAC::calc_forces_and_energies<<>>( d_positions, d_forces, d_energies, n_particles, d_box_len, potential); checkCudaError(cudaGetLastError(), "kernel launch"); checkCudaError(cudaDeviceSynchronize(), "kernel execution"); // Copy results back to host std::vector result_forces = copyFromGPUAndFree(d_forces, 3 * n_particles); std::vector result_energies = copyFromGPUAndFree(d_energies, n_particles); // Clean up remaining GPU memory checkCudaError(cudaFree(d_positions), "cudaFree positions"); checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); // Verify results - forces should be non-zero and energies should be // calculated bool has_nonzero_force = false; bool has_nonzero_energy = false; for (int i = 0; i < 3 * n_particles; i++) { if (std::abs(result_forces[i]) > tolerance) { has_nonzero_force = true; break; } } for (int i = 0; i < n_particles; i++) { if (std::abs(result_energies[i]) > tolerance) { has_nonzero_energy = true; break; } } EXPECT_FALSE(has_nonzero_force) << "Expected non-zero forces between particles"; EXPECT_TRUE(has_nonzero_energy) << "Expected non-zero energies for particles"; } TEST_F(CudaKernelTest, PeriodicBoundaryConditionsTest) { const int n_particles = 2; const real tolerance = 1e-5; // Place particles near opposite edges of a small box std::vector positions = { 0.1, 0.0, 0.0, // particle 0 near left edge 4.9, 0.0, 0.0 // particle 1 near right edge }; std::vector forces(3 * n_particles, 0.0); std::vector energies(n_particles, 0.0); std::vector box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC // Allocate GPU memory and copy data real *d_positions = allocateAndCopyToGPU(positions); real *d_forces = allocateAndCopyToGPU(forces); real *d_energies = allocateAndCopyToGPU(energies); real *d_box_len = allocateAndCopyToGPU(box_dimensions); // Create Lennard-Jones potential with large cutoff to ensure interaction LennardJones potential(1.0, 1.0, 3.0); // Launch kernel dim3 blockSize(256); dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x); CAC::calc_forces_and_energies<<>>( d_positions, d_forces, d_energies, n_particles, d_box_len, potential); checkCudaError(cudaGetLastError(), "kernel launch"); checkCudaError(cudaDeviceSynchronize(), "kernel execution"); // Copy results back to host std::vector result_forces = copyFromGPUAndFree(d_forces, 3 * n_particles); std::vector result_energies = copyFromGPUAndFree(d_energies, n_particles); checkCudaError(cudaFree(d_positions), "cudaFree positions"); checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); // With PBC, particles should interact as if they're close (distance ~0.2) // rather than far apart (distance ~4.8) EXPECT_GT(std::abs(result_forces[0]), tolerance) << "Expected significant force due to PBC"; EXPECT_GT(std::abs(result_energies[0]), tolerance) << "Expected significant energy due to PBC"; } TEST_F(CudaKernelTest, SingleParticleTest) { const int n_particles = 1; std::vector positions = {0.0, 0.0, 0.0}; std::vector forces(3 * n_particles, 0.0); std::vector energies(n_particles, 0.0); std::vector box_dimensions = {10.0, 10.0, 10.0}; real *d_positions = allocateAndCopyToGPU(positions); real *d_forces = allocateAndCopyToGPU(forces); real *d_energies = allocateAndCopyToGPU(energies); real *d_box_len = allocateAndCopyToGPU(box_dimensions); LennardJones potential(1.0, 1.0, 3.0); dim3 blockSize(256); dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x); CAC::calc_forces_and_energies<<>>( d_positions, d_forces, d_energies, n_particles, d_box_len, potential); checkCudaError(cudaGetLastError(), "kernel launch"); checkCudaError(cudaDeviceSynchronize(), "kernel execution"); std::vector result_forces = copyFromGPUAndFree(d_forces, 3 * n_particles); std::vector result_energies = copyFromGPUAndFree(d_energies, n_particles); checkCudaError(cudaFree(d_positions), "cudaFree positions"); checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); // Single particle should have zero force and energy EXPECT_NEAR(result_forces[0], 0.0, 1e-10); EXPECT_NEAR(result_forces[1], 0.0, 1e-10); EXPECT_NEAR(result_forces[2], 0.0, 1e-10); EXPECT_NEAR(result_energies[0], 0.0, 1e-10); } TEST_F(CudaKernelTest, ForceSymmetryTest) { const int n_particles = 2; const real tolerance = 1e-5; std::vector positions = { 0.0, 0.0, 0.0, // particle 0 1.5, 0.0, 0.0 // particle 1 }; std::vector forces(3 * n_particles, 0.0); std::vector energies(n_particles, 0.0); std::vector box_dimensions = {10.0, 10.0, 10.0}; real *d_positions = allocateAndCopyToGPU(positions); real *d_forces = allocateAndCopyToGPU(forces); real *d_energies = allocateAndCopyToGPU(energies); real *d_box_len = allocateAndCopyToGPU(box_dimensions); LennardJones potential(1.0, 1.0, 3.0); dim3 blockSize(256); dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x); CAC::calc_forces_and_energies<<>>( d_positions, d_forces, d_energies, n_particles, d_box_len, potential); checkCudaError(cudaGetLastError(), "kernel launch"); checkCudaError(cudaDeviceSynchronize(), "kernel execution"); std::vector result_forces = copyFromGPUAndFree(d_forces, 3 * n_particles); std::vector result_energies = copyFromGPUAndFree(d_energies, n_particles); checkCudaError(cudaFree(d_positions), "cudaFree positions"); checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); // Newton's third law: forces should be equal and opposite EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance) << "Force x-components should be opposite"; EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance) << "Force y-components should be opposite"; EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance) << "Force z-components should be opposite"; // Energies should be equal for symmetric particles EXPECT_NEAR(result_energies[0], result_energies[1], tolerance) << "Energies should be equal"; } // Main function to run tests int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); // Check if CUDA is available int deviceCount; cudaError_t err = cudaGetDeviceCount(&deviceCount); if (err != cudaSuccess || deviceCount == 0) { std::cout << "No CUDA devices available. Skipping CUDA tests." << std::endl; return 0; } return RUN_ALL_TESTS(); }