diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt index abcaadf..f07f1a4 100644 --- a/kernels/CMakeLists.txt +++ b/kernels/CMakeLists.txt @@ -3,14 +3,12 @@ project(${NAME}_cuda_lib CUDA CXX) set(HEADER_FILES potentials/pair_potentials.cuh forces.cuh - kernel_config.cuh ) set(SOURCE_FILES - kernel_config.cu ) # The library contains header and source files. -add_library(${NAME}_cuda_lib STATIC +add_library(${NAME}_cuda_lib INTERFACE ${SOURCE_FILES} ${HEADER_FILES} ) diff --git a/kernels/forces.cuh b/kernels/forces.cuh index e4f52f1..640da42 100644 --- a/kernels/forces.cuh +++ b/kernels/forces.cuh @@ -1,75 +1,74 @@ #ifndef FORCES_CUH #define FORCES_CUH -#include "kernel_config.cuh" #include "potentials/pair_potentials.cuh" #include "precision.hpp" #include -#include +#include +#include #include namespace CAC { -inline void reset_forces_and_energies(int n_particles, - float4 *forces_energies) { - cudaMemset(forces_energies, 0, n_particles * sizeof(float4)); +inline void reset_forces_and_energies(int n_particles, real *forces, + real *energies) { + cudaMemset(forces, 0, n_particles * sizeof(real) * 3); + cudaMemset(energies, 0, n_particles * sizeof(real)); } template -__global__ void calc_forces_and_energies(float4 *pos, float4 *force_energies, +__global__ void calc_forces_and_energies(real *xs, real *forces, real *energies, int n_particles, real *box_len, PotentialType potential) { - - int i = get_thread_id(); + int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < n_particles) { - float4 my_pos = pos[i]; // Loads 16 bytes in one transaction - real xi = my_pos.x; - real yi = my_pos.y; - real zi = my_pos.z; - - real total_fx = 0, total_fy = 0, total_fz = 0, total_energy = 0; + real xi = xs[3 * i]; + real yi = xs[3 * i + 1]; + real zi = xs[3 * i + 2]; for (int j = 0; j < n_particles; j++) { if (i != j) { - float4 other_pos = pos[j]; - real dx = xi - other_pos.x; - real dy = yi - other_pos.y; - real dz = zi - other_pos.z; + real xj = xs[3 * j]; + real yj = xs[3 * j + 1]; + real zj = xs[3 * j + 2]; + + real dx = xi - xj; + real dy = yi - yj; + real dz = zi - zj; // Apply periodic boundary conditions dx -= box_len[0] * round(dx / box_len[0]); dy -= box_len[1] * round(dy / box_len[1]); dz -= box_len[2] * round(dz / box_len[2]); - float4 sol = potential.calc_force_and_energy({dx, dy, dz}); - total_fx += sol.x; - total_fy += sol.y; - total_fz += sol.z; - total_energy += sol.w; + ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz}); + forces[3 * i] += sol.force.x; + forces[3 * i + 1] += sol.force.y; + forces[3 * i + 2] += sol.force.z; + energies[i] += sol.energy; } } - - force_energies[i] = make_float4(total_fx, total_fy, total_fz, total_energy); } } -inline void launch_force_kernels(float4 *xs, float4 *force_energies, + +inline void launch_force_kernels(real *xs, real *forces, real *energies, int n_particles, real *box_len, std::vector potentials, - dim3 blocks, dim3 threads_per_block) { + int grid_size, int block_size) { - reset_forces_and_energies(n_particles, force_energies); + reset_forces_and_energies(n_particles, forces, energies); for (const auto &potential : potentials) { std::visit( [&](const auto &potential) { using PotentialType = std::decay_t; - calc_forces_and_energies - <<>>(xs, force_energies, n_particles, - box_len, potential); + calc_forces_and_energies<<>>( + xs, forces, energies, n_particles, box_len, potential); }, potential); cudaDeviceSynchronize(); } } } // namespace CAC + #endif diff --git a/kernels/kernel_config.cu b/kernels/kernel_config.cu deleted file mode 100644 index 3c1644c..0000000 --- a/kernels/kernel_config.cu +++ /dev/null @@ -1,73 +0,0 @@ -#include "kernel_config.cuh" -#include -#include - -size_t KernelConfig::total_threads() const { - return (size_t)blocks.x * blocks.y * blocks.z * threads.x * threads.y * - threads.z; -} - -void KernelConfig::print() const { - printf("Grid: (%u, %u, %u), Block: (%u, %u, %u), Total threads: %zu\n", - blocks.x, blocks.y, blocks.z, threads.x, threads.y, threads.z, - total_threads()); -} - -KernelConfig get_launch_config(size_t n_elements, int threads_per_block, - int max_blocks_per_dim) { - - // Ensure threads_per_block is valid - threads_per_block = std::min(threads_per_block, 1024); - threads_per_block = std::max(threads_per_block, 32); - - // Calculate total blocks needed - size_t total_blocks = - (n_elements + threads_per_block - 1) / threads_per_block; - - dim3 threads(threads_per_block); - dim3 blocks; - - if (total_blocks <= max_blocks_per_dim) { - // Simple 1D grid - blocks = dim3(total_blocks); - } else { - // Use 2D grid - blocks.x = max_blocks_per_dim; - blocks.y = (total_blocks + max_blocks_per_dim - 1) / max_blocks_per_dim; - - // If still too big, use 3D grid - if (blocks.y > max_blocks_per_dim) { - blocks.y = max_blocks_per_dim; - blocks.z = - (total_blocks + (size_t)max_blocks_per_dim * max_blocks_per_dim - 1) / - ((size_t)max_blocks_per_dim * max_blocks_per_dim); - } - } - - return KernelConfig(blocks, threads); -} - -int get_optimal_block_size(int device_id) { - cudaDeviceProp prop; - cudaGetDeviceProperties(&prop, device_id); - - // Use a fraction of max threads per block for better occupancy - // Typically 256 or 512 work well for most kernels - if (prop.maxThreadsPerBlock >= 1024) { - return 256; // Good balance of occupancy and register usage - } else if (prop.maxThreadsPerBlock >= 512) { - return 256; - } else { - return prop.maxThreadsPerBlock / 2; - } -} - -KernelConfig get_launch_config_advanced(size_t n_elements, int device_id) { - cudaDeviceProp prop; - cudaGetDeviceProperties(&prop, device_id); - - int threads_per_block = get_optimal_block_size(device_id); - int max_blocks_per_dim = prop.maxGridSize[0]; - - return get_launch_config(n_elements, threads_per_block, max_blocks_per_dim); -} diff --git a/kernels/kernel_config.cuh b/kernels/kernel_config.cuh deleted file mode 100644 index 66364fe..0000000 --- a/kernels/kernel_config.cuh +++ /dev/null @@ -1,83 +0,0 @@ -#ifndef KERNEL_CONFIG_CUH -#define KERNEL_CONFIG_CUH -#include -#include - -/** - * Structure to hold grid launch configuration - */ -struct KernelConfig { - dim3 blocks; - dim3 threads; - - // Convenience constructor - KernelConfig(dim3 b, dim3 t) : blocks(b), threads(t) {} - - // Total number of threads launched - size_t total_threads() const; - - // Print configuration for debugging - void print() const; -}; - -/** - * Calculate optimal CUDA launch configuration for 1D problem - * - * @param n_elements Number of elements to process - * @param threads_per_block Desired threads per block (default: 256) - * @param max_blocks_per_dim Maximum blocks per grid dimension (default: 65535) - * @return LaunchConfig with optimal grid and block dimensions - */ -KernelConfig get_launch_config(size_t n_elements, int threads_per_block = 256, - int max_blocks_per_dim = 65535); - -/** - * Calculate 1D thread index for kernels launched with get_launch_config() - * Use this inside your CUDA kernels - */ -__device__ inline size_t get_thread_id() { - return (size_t)blockIdx.z * gridDim.x * gridDim.y * blockDim.x + - (size_t)blockIdx.y * gridDim.x * blockDim.x + - (size_t)blockIdx.x * blockDim.x + threadIdx.x; -} - -/** - * Alternative version that takes grid dimensions as parameters - * Useful if you need the index calculation in multiple places - */ -__device__ inline size_t get_thread_id(dim3 gridDim, dim3 blockDim, - dim3 blockIdx, dim3 threadIdx) { - return (size_t)blockIdx.z * gridDim.x * gridDim.y * blockDim.x + - (size_t)blockIdx.y * gridDim.x * blockDim.x + - (size_t)blockIdx.x * blockDim.x + threadIdx.x; -} - -/** - * GPU device properties helper - gets optimal block size for current device - */ -int get_optimal_block_size(int device_id = 0); - -/** - * Advanced version that considers device properties - */ -KernelConfig get_launch_config_advanced(size_t n_elements, int device_id = 0); - -// Example usage in your kernel: -/* -template -__global__ void calc_forces_and_energies(float4 *pos, float4 *force_energies, - size_t n_particles, real *box_len, - PotentialType potential) { - - size_t i = get_thread_id(); - - if (i < n_particles) { - // Your existing force calculation code here... - float4 my_pos = pos[i]; - // ... rest of kernel unchanged - } -} - -*/ - -#endif diff --git a/kernels/potentials/pair_potentials.cuh b/kernels/potentials/pair_potentials.cuh index 792405c..537b03c 100644 --- a/kernels/potentials/pair_potentials.cuh +++ b/kernels/potentials/pair_potentials.cuh @@ -5,7 +5,6 @@ #include "vec3.h" #include #include -#include #include #ifdef __CUDACC__ @@ -14,6 +13,18 @@ #define CUDA_CALLABLE #endif +/** + * Result struct for the Pair Potential + */ +struct ForceAndEnergy { + real energy; + Vec3 force; + + CUDA_CALLABLE inline static ForceAndEnergy zero() { + return {0.0, {0.0, 0.0, 0.0}}; + }; +}; + /** * Calculate the Lennard-Jones energy and force for the current particle * pair described by displacement vector r @@ -29,7 +40,7 @@ struct LennardJones { m_rcutoffsq = rcutoff * rcutoff; }; - CUDA_CALLABLE float4 calc_force_and_energy(Vec3 r) { + CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3 r) { real rmagsq = r.squared_norm2(); if (rmagsq < m_rcutoffsq && rmagsq > 0.0) { real inv_rmag = 1 / sqrt(rmagsq); @@ -49,10 +60,10 @@ struct LennardJones { (12.0 * sigma_r12 * inv_rmag - 6.0 * sigma_r6 * inv_rmag); Vec3 force = r.scale(force_mag * inv_rmag); - return make_float4(force.x, force.y, force.z, energy); + return {energy, force}; } else { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + return ForceAndEnergy::zero(); } }; }; @@ -74,7 +85,7 @@ struct Morse { m_rcutoffsq = rcutoff * rcutoff; }; - CUDA_CALLABLE float4 calc_force_and_energy(Vec3 r) { + CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3 r) { real rmagsq = r.squared_norm2(); if (rmagsq < m_rcutoffsq && rmagsq > 0.0) { real rmag = sqrt(rmagsq); @@ -93,10 +104,10 @@ struct Morse { // Direction: normalized vector Vec3 force = r.scale(force_mag / rmag); - return make_float4(force.x, force.y, force.z, energy); + return {energy, force}; } else { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + return ForceAndEnergy::zero(); } }; }; diff --git a/src/precision.hpp b/src/precision.hpp index aabc471..c132c09 100644 --- a/src/precision.hpp +++ b/src/precision.hpp @@ -1,15 +1,15 @@ #ifndef PRECISION_H #define PRECISION_H -#ifdef USE_DOUBLE +#ifdef USE_FLOATS /* - * If macro USE_DOUBLE is set then the default type will be double - * precision. Otherwise we use floats by default + * If macro USE_FLOATS is set then the default type will be floating point + * precision. Otherwise we use double precision by default */ -typedef double real; -#else typedef float real; +#else +typedef double real; #endif #endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8310b86..7f994a6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -10,4 +10,5 @@ if(NOT EXISTS ${GOOGLETEST_DIR}) endif() add_subdirectory(lib/googletest) +add_subdirectory(unit_tests) add_subdirectory(cuda_unit_tests) diff --git a/tests/cuda_unit_tests/test_forces.cu b/tests/cuda_unit_tests/test_forces.cu index 923ebe5..3576dbb 100644 --- a/tests/cuda_unit_tests/test_forces.cu +++ b/tests/cuda_unit_tests/test_forces.cu @@ -5,12 +5,14 @@ // Include your header files #include "forces.cuh" -#include "kernel_config.cuh" #include "potentials/pair_potentials.cuh" #include "precision.hpp" class CudaForceKernelTest : public ::testing::Test { protected: + const int GRID_SIZE = 1; + const int BLOCK_SIZE = 4; + void SetUp() override { // Set up CUDA device cudaError_t err = cudaSetDevice(0); @@ -53,32 +55,33 @@ protected: } // Helper function to run the force calculation kernel - std::vector - run_force_calculation(int n_particles, const std::vector &positions, + std::pair, std::vector> + run_force_calculation(int n_particles, const std::vector &positions, const std::vector &box_dimensions) { - std::vector force_energies(n_particles, - make_float4(0.0, 0.0, 0.0, 0.0)); + std::vector forces(3 * n_particles, 0.0); + std::vector energies(n_particles, 0.0); - KernelConfig kernel_config = get_launch_config(n_particles); - float4 *d_positions = allocateAndCopyToGPU(positions); - float4 *d_force_energies = allocateAndCopyToGPU(force_energies); + real *d_positions = allocateAndCopyToGPU(positions); + real *d_forces = allocateAndCopyToGPU(forces); + real *d_energies = allocateAndCopyToGPU(energies); real *d_box_len = allocateAndCopyToGPU(box_dimensions); std::vector potentials = {LennardJones(1.0, 1.0, 3.0)}; - CAC::launch_force_kernels(d_positions, d_force_energies, n_particles, - d_box_len, potentials, kernel_config.blocks, - kernel_config.threads); + CAC::launch_force_kernels(d_positions, d_forces, d_energies, n_particles, + d_box_len, potentials, GRID_SIZE, BLOCK_SIZE); checkCudaError(cudaGetLastError(), "kernel launch"); checkCudaError(cudaDeviceSynchronize(), "kernel execution"); - std::vector result_force_energies = - copyFromGPUAndFree(d_force_energies, n_particles); + std::vector result_forces = + copyFromGPUAndFree(d_forces, 3 * n_particles); + std::vector result_energies = + copyFromGPUAndFree(d_energies, n_particles); checkCudaError(cudaFree(d_positions), "cudaFree positions"); checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); - return result_force_energies; + return {result_forces, result_energies}; } }; @@ -87,14 +90,14 @@ TEST_F(CudaForceKernelTest, BasicFunctionalityTest) { const real tolerance = 1e-5; // Set up test data - simple 2x2 grid of particles - std::vector positions = { - make_float4(0.0, 0.0, 0.0, 0.0), // particle 0 - make_float4(0.5, 0.0, 0.0, 0.0), // particle 1 + std::vector positions = { + 0.0, 0.0, 0.0, // particle 0 + 0.5, 0.0, 0.0, // particle 1 }; std::vector box_dimensions = {10.0, 10.0, 10.0}; - auto result_force_energies = + auto [result_forces, result_energies] = run_force_calculation(n_particles, positions, box_dimensions); // Verify results - forces should be non-zero and energies should be @@ -102,14 +105,17 @@ TEST_F(CudaForceKernelTest, BasicFunctionalityTest) { bool has_nonzero_force = false; bool has_nonzero_energy = false; - for (int i = 0; i < n_particles; i++) { - if (std::abs(result_force_energies[i].x) > tolerance || - std::abs(result_force_energies[i].y) > tolerance || - std::abs(result_force_energies[i].z) > tolerance) { + for (int i = 0; i < 3 * n_particles; i++) { + if (std::abs(result_forces[i]) > tolerance) { has_nonzero_force = true; + break; } - if (std::abs(result_force_energies[i].w) > tolerance) { + } + + for (int i = 0; i < n_particles; i++) { + if (std::abs(result_energies[i]) > tolerance) { has_nonzero_energy = true; + break; } } @@ -124,61 +130,60 @@ TEST_F(CudaForceKernelTest, PeriodicBoundaryConditionsTest) { const real tolerance = 1e-5; // Place particles near opposite edges of a small box - std::vector positions = { - make_float4(0.1, 0.0, 0.0, 0.0), // particle 0 near left edge - make_float4(4.9, 0.0, 0.0, 0.0) // particle 1 near right edge + std::vector positions = { + 0.1, 0.0, 0.0, // particle 0 near left edge + 4.9, 0.0, 0.0 // particle 1 near right edge }; std::vector box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC - auto result_force_energies = + auto [result_forces, result_energies] = run_force_calculation(n_particles, positions, box_dimensions); // With PBC, particles should interact as if they're close (distance ~0.2) // rather than far apart (distance ~4.8) - EXPECT_GT(std::abs(result_force_energies[0].x), tolerance) + EXPECT_GT(std::abs(result_forces[0]), tolerance) << "Expected significant force due to PBC"; + EXPECT_GT(std::abs(result_energies[0]), tolerance) + << "Expected significant energy due to PBC"; } TEST_F(CudaForceKernelTest, SingleParticleTest) { const int n_particles = 1; - std::vector positions = {make_float4(0.0, 0.0, 0.0, 0.0)}; + std::vector positions = {0.0, 0.0, 0.0}; std::vector box_dimensions = {10.0, 10.0, 10.0}; - auto result_force_energies = + auto [result_forces, result_energies] = run_force_calculation(n_particles, positions, box_dimensions); // Single particle should have zero force and energy - EXPECT_NEAR(result_force_energies[0].x, 0.0, 1e-10); - EXPECT_NEAR(result_force_energies[0].y, 0.0, 1e-10); - EXPECT_NEAR(result_force_energies[0].z, 0.0, 1e-10); - EXPECT_NEAR(result_force_energies[0].w, 0.0, 1e-10); + EXPECT_NEAR(result_forces[0], 0.0, 1e-10); + EXPECT_NEAR(result_forces[1], 0.0, 1e-10); + EXPECT_NEAR(result_forces[2], 0.0, 1e-10); + EXPECT_NEAR(result_energies[0], 0.0, 1e-10); } TEST_F(CudaForceKernelTest, ForceSymmetryTest) { const int n_particles = 2; const real tolerance = 1e-5; - std::vector positions = { - make_float4(0.0, 0.0, 0.0, 0.0), // particle 0 - make_float4(1.5, 0.0, 0.0, 0.0) // particle 1 + std::vector positions = { + 0.0, 0.0, 0.0, // particle 0 + 1.5, 0.0, 0.0 // particle 1 }; std::vector box_dimensions = {10.0, 10.0, 10.0}; - auto result_force_energies = + auto [result_forces, result_energies] = run_force_calculation(n_particles, positions, box_dimensions); // Newton's third law: forces should be equal and opposite - EXPECT_NEAR(result_force_energies[0].x, -result_force_energies[1].x, - tolerance) + EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance) << "Force x-components should be opposite"; - EXPECT_NEAR(result_force_energies[0].y, -result_force_energies[1].y, - tolerance) + EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance) << "Force y-components should be opposite"; - EXPECT_NEAR(result_force_energies[0].z, -result_force_energies[1].z, - tolerance) + EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance) << "Force z-components should be opposite"; // Energies should be equal for symmetric particles - EXPECT_NEAR(result_force_energies[0].w, result_force_energies[1].w, tolerance) + EXPECT_NEAR(result_energies[0], result_energies[1], tolerance) << "Energies should be equal"; } diff --git a/tests/cuda_unit_tests/test_potential.cu b/tests/cuda_unit_tests/test_potential.cu index 2541ada..9511ea5 100644 --- a/tests/cuda_unit_tests/test_potential.cu +++ b/tests/cuda_unit_tests/test_potential.cu @@ -2,7 +2,6 @@ #include "precision.hpp" #include "gtest/gtest.h" #include -#include #include // Structure to hold test results from device @@ -19,7 +18,8 @@ struct TestResults { bool near_cutoff_pass; // Additional result data for exact checks - float4 force_energy_values[10]; + real energy_values[10]; + Vec3 force_values[10]; }; // Check if two Vec3 values are close within tolerance @@ -35,7 +35,7 @@ __global__ void lennard_jones_test_kernel(TestResults *results) { real sigma = 1.0; real epsilon = 1.0; real r_cutoff = 2.5; - real tolerance = 1e-5; + real tolerance = 1e-10; // Create LennardJones object on device LennardJones lj(sigma, epsilon, r_cutoff); @@ -43,78 +43,87 @@ __global__ void lennard_jones_test_kernel(TestResults *results) { // Zero Distance Test { Vec3 r = {0.0, 0.0, 0.0}; - float4 result = lj.calc_force_and_energy(r); - results->force_energy_values[0] = result; + auto result = lj.calc_force_and_energy(r); + results->energy_values[0] = result.energy; + results->force_values[0] = result.force; results->zero_distance_pass = - (result.w == 0.0) && - vec3_near(Vec3{0.0, 0.0, 0.0}, - Vec3{result.x, result.y, result.z}, tolerance); + (result.energy == 0.0) && + vec3_near(Vec3{0.0, 0.0, 0.0}, result.force, tolerance); } // Beyond Cutoff Test { Vec3 r = {3.0, 0.0, 0.0}; - float4 result = lj.calc_force_and_energy(r); - results->force_energy_values[1] = result; + auto result = lj.calc_force_and_energy(r); + results->energy_values[1] = result.energy; + results->force_values[1] = result.force; results->beyond_cutoff_pass = - (result.w == 0.0) && - vec3_near(Vec3{0.0, 0.0, 0.0}, - Vec3{result.x, result.y, result.z}, tolerance); + (result.energy == 0.0) && + vec3_near(Vec3{0.0, 0.0, 0.0}, result.force, tolerance); } // At Minimum Test { real min_dist = pow(2.0, 1.0 / 6.0) * sigma; Vec3 r = {min_dist, 0.0, 0.0}; - float4 result = lj.calc_force_and_energy(r); - results->force_energy_values[2] = result; + auto result = lj.calc_force_and_energy(r); + results->energy_values[2] = result.energy; + results->force_values[2] = result.force; results->at_minimum_pass = - (fabs(result.w + epsilon) < tolerance) && - vec3_near(Vec3{0.0, 0.0, 0.0}, - Vec3{result.x, result.y, result.z}, tolerance); + (fabs(result.energy + epsilon) < tolerance) && + vec3_near(Vec3{0.0, 0.0, 0.0}, result.force, tolerance); } // At Equilibrium Test { Vec3 r = {sigma, 0.0, 0.0}; - float4 result = lj.calc_force_and_energy(r); - results->force_energy_values[3] = result; - results->at_equilibrium_pass = - (fabs(result.w) < tolerance) && (result.x > 0.0) && - (fabs(result.y) < tolerance) && (fabs(result.z) < tolerance); + auto result = lj.calc_force_and_energy(r); + results->energy_values[3] = result.energy; + results->force_values[3] = result.force; + results->at_equilibrium_pass = (fabs(result.energy) < tolerance) && + (result.force.x > 0.0) && + (fabs(result.force.y) < tolerance) && + (fabs(result.force.z) < tolerance); } // Repulsive Region Test { - Vec3 r = {0.8f * sigma, 0.0, 0.0}; - float4 result = lj.calc_force_and_energy(r); - results->force_energy_values[4] = result; - results->repulsive_region_pass = (result.w > 0.0) && (result.x > 0.0); + Vec3 r = {0.8 * sigma, 0.0, 0.0}; + auto result = lj.calc_force_and_energy(r); + results->energy_values[4] = result.energy; + results->force_values[4] = result.force; + results->repulsive_region_pass = + (result.energy > 0.0) && (result.force.x > 0.0); } // Attractive Region Test { - Vec3 r = {1.5f * sigma, 0.0, 0.0}; - float4 result = lj.calc_force_and_energy(r); - results->force_energy_values[5] = result; - results->attractive_region_pass = (result.w < 0.0) && (result.x < 0.0); + Vec3 r = {1.5 * sigma, 0.0, 0.0}; + auto result = lj.calc_force_and_energy(r); + results->energy_values[5] = result.energy; + results->force_values[5] = result.force; + results->attractive_region_pass = + (result.energy < 0.0) && (result.force.x < 0.0); } // Arbitrary Direction Test { Vec3 r = {1.0, 1.0, 1.0}; - float4 result = lj.calc_force_and_energy(r); - results->force_energy_values[6] = result; + auto result = lj.calc_force_and_energy(r); + results->energy_values[6] = result.energy; + results->force_values[6] = result.force; real r_mag = sqrt(r.squared_norm2()); Vec3 normalized_r = r.scale(1.0 / r_mag); - real force_dot_r = result.x * normalized_r.x + result.y * normalized_r.y + - result.z * normalized_r.z; + real force_dot_r = result.force.x * normalized_r.x + + result.force.y * normalized_r.y + + result.force.z * normalized_r.z; results->arbitrary_direction_pass = - (force_dot_r < 0.0) && (fabs(result.x - result.y) < tolerance) && - (fabs(result.y - result.z) < tolerance); + (force_dot_r < 0.0) && + (fabs(result.force.x - result.force.y) < tolerance) && + (fabs(result.force.y - result.force.z) < tolerance); } // Parameter Variation Test @@ -126,31 +135,34 @@ __global__ void lennard_jones_test_kernel(TestResults *results) { LennardJones lj2(new_sigma, new_epsilon, new_r_cutoff); Vec3 r = {2.0, 0.0, 0.0}; - float4 result1 = lj.calc_force_and_energy(r); - float4 result2 = lj2.calc_force_and_energy(r); + auto result1 = lj.calc_force_and_energy(r); + auto result2 = lj2.calc_force_and_energy(r); - results->force_energy_values[7] = result2; + results->energy_values[7] = result2.energy; + results->force_values[7] = result2.force; - results->parameter_variation_pass = - (result1.w != result2.w) && (result1.x != result2.x); + results->parameter_variation_pass = (result1.energy != result2.energy) && + (result1.force.x != result2.force.x); } // Exact Value Check Test { LennardJones lj_exact(1.0, 1.0, 3.0); Vec3 r = {1.5, 0.0, 0.0}; - float4 result = lj_exact.calc_force_and_energy(r); + auto result = lj_exact.calc_force_and_energy(r); - results->force_energy_values[8] = result; + results->energy_values[8] = result.energy; + results->force_values[8] = result.force; real expected_energy = 4.0 * (pow(1.0 / 1.5, 12) - pow(1.0 / 1.5, 6)); real expected_force = 24.0 * (pow(1.0 / 1.5, 6) - 2.0 * pow(1.0 / 1.5, 12)) / 1.5; results->exact_value_check_pass = - (fabs(result.w - expected_energy) < tolerance) && - (fabs(result.x + expected_force) < tolerance) && - (fabs(result.y) < tolerance) && (fabs(result.z) < tolerance); + (fabs(result.energy - expected_energy) < tolerance) && + (fabs(result.force.x + expected_force) < tolerance) && + (fabs(result.force.y) < tolerance) && + (fabs(result.force.z) < tolerance); } // Near Cutoff Test @@ -161,18 +173,16 @@ __global__ void lennard_jones_test_kernel(TestResults *results) { Vec3 r_inside = {inside_cutoff, 0.0, 0.0}; Vec3 r_outside = {outside_cutoff, 0.0, 0.0}; - float4 result_inside = lj.calc_force_and_energy(r_inside); - float4 result_outside = lj.calc_force_and_energy(r_outside); + auto result_inside = lj.calc_force_and_energy(r_inside); + auto result_outside = lj.calc_force_and_energy(r_outside); - results->force_energy_values[9] = result_inside; + results->energy_values[9] = result_inside.energy; + results->force_values[9] = result_inside.force; results->near_cutoff_pass = - (result_inside.w != 0.0) && (result_inside.x != 0.0) && - (result_outside.w == 0.0) && - vec3_near( - Vec3{0.0, 0.0, 0.0}, - Vec3{result_outside.x, result_outside.y, result_outside.z}, - tolerance); + (result_inside.energy != 0.0) && (result_inside.force.x != 0.0) && + (result_outside.energy == 0.0) && + vec3_near(Vec3{0.0, 0.0, 0.0}, result_outside.force, tolerance); } } @@ -240,48 +250,44 @@ TEST_F(LennardJonesCudaTest, DeviceZeroDistance) { auto results = runDeviceTests(); EXPECT_TRUE(results.zero_distance_pass) << "Zero distance test failed on device. Energy: " - << results.force_energy_values[0].w << ", Force: (" - << results.force_energy_values[0].x << ", " - << results.force_energy_values[0].y << ", " - << results.force_energy_values[0].z << ")"; + << results.energy_values[0] << ", Force: (" << results.force_values[0].x + << ", " << results.force_values[0].y << ", " << results.force_values[0].z + << ")"; } TEST_F(LennardJonesCudaTest, DeviceBeyondCutoff) { auto results = runDeviceTests(); EXPECT_TRUE(results.beyond_cutoff_pass) << "Beyond cutoff test failed on device. Energy: " - << results.force_energy_values[1].w; + << results.energy_values[1]; } TEST_F(LennardJonesCudaTest, DeviceAtMinimum) { auto results = runDeviceTests(); EXPECT_TRUE(results.at_minimum_pass) << "At minimum test failed on device. Energy: " - << results.force_energy_values[2].w; + << results.energy_values[2]; } TEST_F(LennardJonesCudaTest, DeviceAtEquilibrium) { auto results = runDeviceTests(); EXPECT_TRUE(results.at_equilibrium_pass) << "At equilibrium test failed on device. Energy: " - << results.force_energy_values[3].w - << ", Force x: " << results.force_energy_values[3].x; + << results.energy_values[3] << ", Force x: " << results.force_values[3].x; } TEST_F(LennardJonesCudaTest, DeviceRepulsiveRegion) { auto results = runDeviceTests(); EXPECT_TRUE(results.repulsive_region_pass) << "Repulsive region test failed on device. Energy: " - << results.force_energy_values[4].w - << ", Force x: " << results.force_energy_values[4].x; + << results.energy_values[4] << ", Force x: " << results.force_values[4].x; } TEST_F(LennardJonesCudaTest, DeviceAttractiveRegion) { auto results = runDeviceTests(); EXPECT_TRUE(results.attractive_region_pass) << "Attractive region test failed on device. Energy: " - << results.force_energy_values[5].w - << ", Force x: " << results.force_energy_values[5].x; + << results.energy_values[5] << ", Force x: " << results.force_values[5].x; } TEST_F(LennardJonesCudaTest, DeviceArbitraryDirection) { @@ -300,13 +306,12 @@ TEST_F(LennardJonesCudaTest, DeviceExactValueCheck) { auto results = runDeviceTests(); EXPECT_TRUE(results.exact_value_check_pass) << "Exact value check test failed on device. Energy: " - << results.force_energy_values[8].w - << ", Force x: " << results.force_energy_values[8].x; + << results.energy_values[8] << ", Force x: " << results.force_values[8].x; } TEST_F(LennardJonesCudaTest, DeviceNearCutoff) { auto results = runDeviceTests(); EXPECT_TRUE(results.near_cutoff_pass) << "Near cutoff test failed on device. Inside energy: " - << results.force_energy_values[9].w; + << results.energy_values[9]; } diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt new file mode 100644 index 0000000..c396ab7 --- /dev/null +++ b/tests/unit_tests/CMakeLists.txt @@ -0,0 +1,9 @@ +include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR}) + +add_executable(${NAME}_tests + test_potential.cpp +) + +target_link_libraries(${NAME}_tests gtest gtest_main) +target_link_libraries(${NAME}_tests ${CMAKE_PROJECT_NAME}_cuda_lib) +add_test(NAME ${NAME}Tests COMMAND ${CMAKE_BINARY_DIR}/tests/unit_tests/${NAME}_tests) diff --git a/tests/unit_tests/test_example.cpp b/tests/unit_tests/test_example.cpp new file mode 100644 index 0000000..bde73e6 --- /dev/null +++ b/tests/unit_tests/test_example.cpp @@ -0,0 +1,5 @@ +#include "gtest/gtest.h" + +TEST(Example, Equals) { + EXPECT_EQ(1, 1); +} \ No newline at end of file diff --git a/tests/unit_tests/test_potential.cpp b/tests/unit_tests/test_potential.cpp new file mode 100644 index 0000000..d6bf23b --- /dev/null +++ b/tests/unit_tests/test_potential.cpp @@ -0,0 +1,174 @@ +#include "potentials/pair_potentials.cuh" +#include "precision.hpp" +#include "gtest/gtest.h" +#include + +class LennardJonesTest : public ::testing::Test { +protected: + void SetUp() override { + // Default parameters + sigma = 1.0; + epsilon = 1.0; + r_cutoff = 2.5; + + // Create default LennardJones object + lj = new LennardJones(sigma, epsilon, r_cutoff); + } + + void TearDown() override { delete lj; } + + real sigma; + real epsilon; + real r_cutoff; + LennardJones *lj; + + // Helper function to compare Vec3 values with tolerance + void expect_vec3_near(const Vec3 &expected, const Vec3 &actual, + real tolerance) { + EXPECT_NEAR(expected.x, actual.x, tolerance); + EXPECT_NEAR(expected.y, actual.y, tolerance); + EXPECT_NEAR(expected.z, actual.z, tolerance); + } +}; + +TEST_F(LennardJonesTest, ZeroDistance) { + // At zero distance, the calculation should return zero force and energy + Vec3 r = {0.0, 0.0, 0.0}; + auto result = lj->calc_force_and_energy(r); + + EXPECT_EQ(0.0, result.energy); + expect_vec3_near({0.0, 0.0, 0.0}, result.force, 1e-10); +} + +TEST_F(LennardJonesTest, BeyondCutoff) { + // Distance beyond cutoff should return zero force and energy + Vec3 r = {3.0, 0.0, 0.0}; // 3.0 > r_cutoff (2.5) + auto result = lj->calc_force_and_energy(r); + + EXPECT_EQ(0.0, result.energy); + expect_vec3_near({0.0, 0.0, 0.0}, result.force, 1e-10); +} + +TEST_F(LennardJonesTest, AtMinimum) { + // The LJ potential has a minimum at r = 2^(1/6) * sigma + real min_dist = std::pow(2.0, 1.0 / 6.0) * sigma; + Vec3 r = {min_dist, 0.0, 0.0}; + auto result = lj->calc_force_and_energy(r); + + // At minimum, force should be close to zero + EXPECT_NEAR(-epsilon, result.energy, 1e-10); + expect_vec3_near({0.0, 0.0, 0.0}, result.force, 1e-10); +} + +TEST_F(LennardJonesTest, AtEquilibrium) { + // At r = sigma, the energy should be zero and force should be repulsive + Vec3 r = {sigma, 0.0, 0.0}; + auto result = lj->calc_force_and_energy(r); + + EXPECT_NEAR(0.0, result.energy, 1e-10); + EXPECT_GT(result.force.x, + 0.0); // Force should be repulsive (positive x-direction) + EXPECT_NEAR(0.0, result.force.y, 1e-10); + EXPECT_NEAR(0.0, result.force.z, 1e-10); +} + +TEST_F(LennardJonesTest, RepulsiveRegion) { + // Test in the repulsive region (r < sigma) + Vec3 r = {0.8 * sigma, 0.0, 0.0}; + auto result = lj->calc_force_and_energy(r); + + // Energy should be positive and force should be repulsive + EXPECT_GT(result.energy, 0.0); + EXPECT_GT(result.force.x, 0.0); // Force should be repulsive +} + +TEST_F(LennardJonesTest, AttractiveRegion) { + // Test in the attractive region (sigma < r < r_min) + Vec3 r = {1.5 * sigma, 0.0, 0.0}; + auto result = lj->calc_force_and_energy(r); + + // Energy should be negative and force should be attractive + EXPECT_LT(result.energy, 0.0); + EXPECT_LT(result.force.x, + 0.0); // Force should be attractive (negative x-direction) +} + +TEST_F(LennardJonesTest, ArbitraryDirection) { + // Test with a vector in an arbitrary direction + Vec3 r = {1.0, 1.0, 1.0}; + auto result = lj->calc_force_and_energy(r); + + // The force should be in the same direction as r but opposite sign + // (attractive region) + real r_mag = std::sqrt(r.squared_norm2()); + + // Calculate expected force direction (should be along -r) + Vec3 normalized_r = r.scale(1.0 / r_mag); + real force_dot_r = result.force.x * normalized_r.x + + result.force.y * normalized_r.y + + result.force.z * normalized_r.z; + + // In this case, we're at r = sqrt(3) * sigma which is in attractive region + EXPECT_LT(force_dot_r, 0.0); // Force should be attractive + + // Force should be symmetric in all dimensions for this vector + EXPECT_NEAR(result.force.x, result.force.y, 1e-10); + EXPECT_NEAR(result.force.y, result.force.z, 1e-10); +} + +TEST_F(LennardJonesTest, ParameterVariation) { + // Test with different parameter values + real new_sigma = 2.0; + real new_epsilon = 0.5; + real new_r_cutoff = 5.0; + + LennardJones lj2(new_sigma, new_epsilon, new_r_cutoff); + + Vec3 r = {2.0, 0.0, 0.0}; + auto result1 = lj->calc_force_and_energy(r); + auto result2 = lj2.calc_force_and_energy(r); + + // Results should be different with different parameters + EXPECT_NE(result1.energy, result2.energy); + EXPECT_NE(result1.force.x, result2.force.x); +} + +TEST_F(LennardJonesTest, ExactValueCheck) { + // Test with pre-calculated values for a specific case + LennardJones lj_exact(1.0, 1.0, 3.0); + Vec3 r = {1.5, 0.0, 0.0}; + auto result = lj_exact.calc_force_and_energy(r); + + // Pre-calculated values (you may need to adjust these based on your specific + // implementation) + real expected_energy = + 4.0 * (std::pow(1.0 / 1.5, 12) - std::pow(1.0 / 1.5, 6)); + real expected_force = + 24.0 * (std::pow(1.0 / 1.5, 6) - 2.0 * std::pow(1.0 / 1.5, 12)) / 1.5; + + EXPECT_NEAR(expected_energy, result.energy, 1e-10); + EXPECT_NEAR(-expected_force, result.force.x, + 1e-10); // Negative because force is attractive + EXPECT_NEAR(0.0, result.force.y, 1e-10); + EXPECT_NEAR(0.0, result.force.z, 1e-10); +} + +TEST_F(LennardJonesTest, NearCutoff) { + // Test behavior just inside and just outside the cutoff + real inside_cutoff = r_cutoff - 0.01; + real outside_cutoff = r_cutoff + 0.01; + + Vec3 r_inside = {inside_cutoff, 0.0, 0.0}; + Vec3 r_outside = {outside_cutoff, 0.0, 0.0}; + + auto result_inside = lj->calc_force_and_energy(r_inside); + auto result_outside = lj->calc_force_and_energy(r_outside); + + // Inside should have non-zero values + EXPECT_NE(0.0, result_inside.energy); + EXPECT_NE(0.0, result_inside.force.x); + + // Outside should be zero + EXPECT_EQ(0.0, result_outside.energy); + expect_vec3_near({0.0, 0.0, 0.0}, result_outside.force, 1e-10); +}