Change default precision to float and use float4 for force and potential calculations

This commit is contained in:
Alex Selimov 2025-09-12 21:44:41 -04:00
parent dd83fc6330
commit 130b613a7c
Signed by: aselimov
GPG key ID: 3DDB9C3E023F1F31
9 changed files with 151 additions and 362 deletions

View file

@ -3,72 +3,71 @@
#include "potentials/pair_potentials.cuh" #include "potentials/pair_potentials.cuh"
#include "precision.hpp" #include "precision.hpp"
#include <cstdio> #include <cstdio>
#include <type_traits> #include <cuda_runtime.h>
#include <variant>
#include <vector> #include <vector>
namespace CAC { namespace CAC {
inline void reset_forces_and_energies(int n_particles, real *forces, inline void reset_forces_and_energies(int n_particles,
real *energies) { float4 *forces_energies) {
cudaMemset(forces, 0, n_particles * sizeof(real) * 3); cudaMemset(forces_energies, 0, n_particles * sizeof(float4));
cudaMemset(energies, 0, n_particles * sizeof(real));
} }
template <typename PotentialType> template <typename PotentialType>
__global__ void calc_forces_and_energies(real *xs, real *forces, real *energies, __global__ void calc_forces_and_energies(float4 *pos, float4 *force_energies,
int n_particles, real *box_len, int n_particles, real *box_len,
PotentialType potential) { PotentialType potential) {
int i = blockIdx.x * blockDim.x + threadIdx.x; int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n_particles) { if (i < n_particles) {
real xi = xs[3 * i]; float4 my_pos = pos[i]; // Loads 16 bytes in one transaction
real yi = xs[3 * i + 1]; real xi = my_pos.x;
real zi = xs[3 * i + 2]; real yi = my_pos.y;
real zi = my_pos.z;
real total_fx = 0, total_fy = 0, total_fz = 0, total_energy = 0;
for (int j = 0; j < n_particles; j++) { for (int j = 0; j < n_particles; j++) {
if (i != j) { if (i != j) {
real xj = xs[3 * j]; float4 other_pos = pos[j];
real yj = xs[3 * j + 1]; real dx = xi - other_pos.x;
real zj = xs[3 * j + 2]; real dy = yi - other_pos.y;
real dz = zi - other_pos.z;
real dx = xi - xj;
real dy = yi - yj;
real dz = zi - zj;
// Apply periodic boundary conditions // Apply periodic boundary conditions
dx -= box_len[0] * round(dx / box_len[0]); dx -= box_len[0] * round(dx / box_len[0]);
dy -= box_len[1] * round(dy / box_len[1]); dy -= box_len[1] * round(dy / box_len[1]);
dz -= box_len[2] * round(dz / box_len[2]); dz -= box_len[2] * round(dz / box_len[2]);
ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz}); float4 sol = potential.calc_force_and_energy({dx, dy, dz});
forces[3 * i] += sol.force.x; total_fx += sol.x;
forces[3 * i + 1] += sol.force.y; total_fy += sol.y;
forces[3 * i + 2] += sol.force.z; total_fz += sol.z;
energies[i] += sol.energy; total_energy += sol.w;
} }
} }
force_energies[i] = make_float4(total_fx, total_fy, total_fz, total_energy);
} }
} }
inline void launch_force_kernels(float4 *xs, float4 *force_energies,
inline void launch_force_kernels(real *xs, real *forces, real *energies,
int n_particles, real *box_len, int n_particles, real *box_len,
std::vector<PairPotentials> potentials, std::vector<PairPotentials> potentials,
int grid_size, int block_size) { int grid_size, int block_size) {
reset_forces_and_energies(n_particles, forces, energies); reset_forces_and_energies(n_particles, force_energies);
for (const auto &potential : potentials) { for (const auto &potential : potentials) {
std::visit( std::visit(
[&](const auto &potential) { [&](const auto &potential) {
using PotentialType = std::decay_t<decltype(potential)>; using PotentialType = std::decay_t<decltype(potential)>;
calc_forces_and_energies<PotentialType><<<grid_size, block_size>>>( calc_forces_and_energies<PotentialType><<<grid_size, block_size>>>(
xs, forces, energies, n_particles, box_len, potential); xs, force_energies, n_particles, box_len, potential);
}, },
potential); potential);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
} }
} }
} // namespace CAC } // namespace CAC
#endif #endif

View file

@ -5,6 +5,7 @@
#include "vec3.h" #include "vec3.h"
#include <cmath> #include <cmath>
#include <cstdio> #include <cstdio>
#include <cuda_runtime.h>
#include <variant> #include <variant>
#ifdef __CUDACC__ #ifdef __CUDACC__
@ -13,18 +14,6 @@
#define CUDA_CALLABLE #define CUDA_CALLABLE
#endif #endif
/**
* Result struct for the Pair Potential
*/
struct ForceAndEnergy {
real energy;
Vec3<real> force;
CUDA_CALLABLE inline static ForceAndEnergy zero() {
return {0.0, {0.0, 0.0, 0.0}};
};
};
/** /**
* Calculate the Lennard-Jones energy and force for the current particle * Calculate the Lennard-Jones energy and force for the current particle
* pair described by displacement vector r * pair described by displacement vector r
@ -40,7 +29,7 @@ struct LennardJones {
m_rcutoffsq = rcutoff * rcutoff; m_rcutoffsq = rcutoff * rcutoff;
}; };
CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3<real> r) { CUDA_CALLABLE float4 calc_force_and_energy(Vec3<real> r) {
real rmagsq = r.squared_norm2(); real rmagsq = r.squared_norm2();
if (rmagsq < m_rcutoffsq && rmagsq > 0.0) { if (rmagsq < m_rcutoffsq && rmagsq > 0.0) {
real inv_rmag = 1 / sqrt(rmagsq); real inv_rmag = 1 / sqrt(rmagsq);
@ -60,10 +49,10 @@ struct LennardJones {
(12.0 * sigma_r12 * inv_rmag - 6.0 * sigma_r6 * inv_rmag); (12.0 * sigma_r12 * inv_rmag - 6.0 * sigma_r6 * inv_rmag);
Vec3<real> force = r.scale(force_mag * inv_rmag); Vec3<real> force = r.scale(force_mag * inv_rmag);
return {energy, force}; return make_float4(force.x, force.y, force.z, energy);
} else { } else {
return ForceAndEnergy::zero(); return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
} }
}; };
}; };
@ -85,7 +74,7 @@ struct Morse {
m_rcutoffsq = rcutoff * rcutoff; m_rcutoffsq = rcutoff * rcutoff;
}; };
CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3<real> r) { CUDA_CALLABLE float4 calc_force_and_energy(Vec3<real> r) {
real rmagsq = r.squared_norm2(); real rmagsq = r.squared_norm2();
if (rmagsq < m_rcutoffsq && rmagsq > 0.0) { if (rmagsq < m_rcutoffsq && rmagsq > 0.0) {
real rmag = sqrt(rmagsq); real rmag = sqrt(rmagsq);
@ -104,10 +93,10 @@ struct Morse {
// Direction: normalized vector // Direction: normalized vector
Vec3<real> force = r.scale(force_mag / rmag); Vec3<real> force = r.scale(force_mag / rmag);
return {energy, force}; return make_float4(force.x, force.y, force.z, energy);
} else { } else {
return ForceAndEnergy::zero(); return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
} }
}; };
}; };

View file

@ -1,15 +1,15 @@
#ifndef PRECISION_H #ifndef PRECISION_H
#define PRECISION_H #define PRECISION_H
#ifdef USE_FLOATS #ifdef USE_DOUBLE
/* /*
* If macro USE_FLOATS is set then the default type will be floating point * If macro USE_DOUBLE is set then the default type will be double
* precision. Otherwise we use double precision by default * precision. Otherwise we use floats by default
*/ */
typedef float real;
#else
typedef double real; typedef double real;
#else
typedef float real;
#endif #endif
#endif #endif

View file

@ -10,5 +10,4 @@ if(NOT EXISTS ${GOOGLETEST_DIR})
endif() endif()
add_subdirectory(lib/googletest) add_subdirectory(lib/googletest)
add_subdirectory(unit_tests)
add_subdirectory(cuda_unit_tests) add_subdirectory(cuda_unit_tests)

View file

@ -55,33 +55,30 @@ protected:
} }
// Helper function to run the force calculation kernel // Helper function to run the force calculation kernel
std::pair<std::vector<real>, std::vector<real>> std::vector<float4>
run_force_calculation(int n_particles, const std::vector<real> &positions, run_force_calculation(int n_particles, const std::vector<float4> &positions,
const std::vector<real> &box_dimensions) { const std::vector<real> &box_dimensions) {
std::vector<real> forces(3 * n_particles, 0.0); std::vector<float4> force_energies(n_particles,
std::vector<real> energies(n_particles, 0.0); make_float4(0.0, 0.0, 0.0, 0.0));
real *d_positions = allocateAndCopyToGPU(positions); float4 *d_positions = allocateAndCopyToGPU(positions);
real *d_forces = allocateAndCopyToGPU(forces); float4 *d_force_energies = allocateAndCopyToGPU(force_energies);
real *d_energies = allocateAndCopyToGPU(energies);
real *d_box_len = allocateAndCopyToGPU(box_dimensions); real *d_box_len = allocateAndCopyToGPU(box_dimensions);
std::vector<PairPotentials> potentials = {LennardJones(1.0, 1.0, 3.0)}; std::vector<PairPotentials> potentials = {LennardJones(1.0, 1.0, 3.0)};
CAC::launch_force_kernels(d_positions, d_forces, d_energies, n_particles, CAC::launch_force_kernels(d_positions, d_force_energies, n_particles,
d_box_len, potentials, GRID_SIZE, BLOCK_SIZE); d_box_len, potentials, GRID_SIZE, BLOCK_SIZE);
checkCudaError(cudaGetLastError(), "kernel launch"); checkCudaError(cudaGetLastError(), "kernel launch");
checkCudaError(cudaDeviceSynchronize(), "kernel execution"); checkCudaError(cudaDeviceSynchronize(), "kernel execution");
std::vector<real> result_forces = std::vector<float4> result_force_energies =
copyFromGPUAndFree(d_forces, 3 * n_particles); copyFromGPUAndFree(d_force_energies, n_particles);
std::vector<real> result_energies =
copyFromGPUAndFree(d_energies, n_particles);
checkCudaError(cudaFree(d_positions), "cudaFree positions"); checkCudaError(cudaFree(d_positions), "cudaFree positions");
checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
return {result_forces, result_energies}; return result_force_energies;
} }
}; };
@ -90,14 +87,14 @@ TEST_F(CudaForceKernelTest, BasicFunctionalityTest) {
const real tolerance = 1e-5; const real tolerance = 1e-5;
// Set up test data - simple 2x2 grid of particles // Set up test data - simple 2x2 grid of particles
std::vector<real> positions = { std::vector<float4> positions = {
0.0, 0.0, 0.0, // particle 0 make_float4(0.0, 0.0, 0.0, 0.0), // particle 0
0.5, 0.0, 0.0, // particle 1 make_float4(0.5, 0.0, 0.0, 0.0), // particle 1
}; };
std::vector<real> box_dimensions = {10.0, 10.0, 10.0}; std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
auto [result_forces, result_energies] = auto result_force_energies =
run_force_calculation(n_particles, positions, box_dimensions); run_force_calculation(n_particles, positions, box_dimensions);
// Verify results - forces should be non-zero and energies should be // Verify results - forces should be non-zero and energies should be
@ -105,17 +102,14 @@ TEST_F(CudaForceKernelTest, BasicFunctionalityTest) {
bool has_nonzero_force = false; bool has_nonzero_force = false;
bool has_nonzero_energy = false; bool has_nonzero_energy = false;
for (int i = 0; i < 3 * n_particles; i++) {
if (std::abs(result_forces[i]) > tolerance) {
has_nonzero_force = true;
break;
}
}
for (int i = 0; i < n_particles; i++) { for (int i = 0; i < n_particles; i++) {
if (std::abs(result_energies[i]) > tolerance) { if (std::abs(result_force_energies[i].x) > tolerance ||
std::abs(result_force_energies[i].y) > tolerance ||
std::abs(result_force_energies[i].z) > tolerance) {
has_nonzero_force = true;
}
if (std::abs(result_force_energies[i].w) > tolerance) {
has_nonzero_energy = true; has_nonzero_energy = true;
break;
} }
} }
@ -130,60 +124,61 @@ TEST_F(CudaForceKernelTest, PeriodicBoundaryConditionsTest) {
const real tolerance = 1e-5; const real tolerance = 1e-5;
// Place particles near opposite edges of a small box // Place particles near opposite edges of a small box
std::vector<real> positions = { std::vector<float4> positions = {
0.1, 0.0, 0.0, // particle 0 near left edge make_float4(0.1, 0.0, 0.0, 0.0), // particle 0 near left edge
4.9, 0.0, 0.0 // particle 1 near right edge make_float4(4.9, 0.0, 0.0, 0.0) // particle 1 near right edge
}; };
std::vector<real> box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC std::vector<real> box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC
auto [result_forces, result_energies] = auto result_force_energies =
run_force_calculation(n_particles, positions, box_dimensions); run_force_calculation(n_particles, positions, box_dimensions);
// With PBC, particles should interact as if they're close (distance ~0.2) // With PBC, particles should interact as if they're close (distance ~0.2)
// rather than far apart (distance ~4.8) // rather than far apart (distance ~4.8)
EXPECT_GT(std::abs(result_forces[0]), tolerance) EXPECT_GT(std::abs(result_force_energies[0].x), tolerance)
<< "Expected significant force due to PBC"; << "Expected significant force due to PBC";
EXPECT_GT(std::abs(result_energies[0]), tolerance)
<< "Expected significant energy due to PBC";
} }
TEST_F(CudaForceKernelTest, SingleParticleTest) { TEST_F(CudaForceKernelTest, SingleParticleTest) {
const int n_particles = 1; const int n_particles = 1;
std::vector<real> positions = {0.0, 0.0, 0.0}; std::vector<float4> positions = {make_float4(0.0, 0.0, 0.0, 0.0)};
std::vector<real> box_dimensions = {10.0, 10.0, 10.0}; std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
auto [result_forces, result_energies] = auto result_force_energies =
run_force_calculation(n_particles, positions, box_dimensions); run_force_calculation(n_particles, positions, box_dimensions);
// Single particle should have zero force and energy // Single particle should have zero force and energy
EXPECT_NEAR(result_forces[0], 0.0, 1e-10); EXPECT_NEAR(result_force_energies[0].x, 0.0, 1e-10);
EXPECT_NEAR(result_forces[1], 0.0, 1e-10); EXPECT_NEAR(result_force_energies[0].y, 0.0, 1e-10);
EXPECT_NEAR(result_forces[2], 0.0, 1e-10); EXPECT_NEAR(result_force_energies[0].z, 0.0, 1e-10);
EXPECT_NEAR(result_energies[0], 0.0, 1e-10); EXPECT_NEAR(result_force_energies[0].w, 0.0, 1e-10);
} }
TEST_F(CudaForceKernelTest, ForceSymmetryTest) { TEST_F(CudaForceKernelTest, ForceSymmetryTest) {
const int n_particles = 2; const int n_particles = 2;
const real tolerance = 1e-5; const real tolerance = 1e-5;
std::vector<real> positions = { std::vector<float4> positions = {
0.0, 0.0, 0.0, // particle 0 make_float4(0.0, 0.0, 0.0, 0.0), // particle 0
1.5, 0.0, 0.0 // particle 1 make_float4(1.5, 0.0, 0.0, 0.0) // particle 1
}; };
std::vector<real> box_dimensions = {10.0, 10.0, 10.0}; std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
auto [result_forces, result_energies] = auto result_force_energies =
run_force_calculation(n_particles, positions, box_dimensions); run_force_calculation(n_particles, positions, box_dimensions);
// Newton's third law: forces should be equal and opposite // Newton's third law: forces should be equal and opposite
EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance) EXPECT_NEAR(result_force_energies[0].x, -result_force_energies[1].x,
tolerance)
<< "Force x-components should be opposite"; << "Force x-components should be opposite";
EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance) EXPECT_NEAR(result_force_energies[0].y, -result_force_energies[1].y,
tolerance)
<< "Force y-components should be opposite"; << "Force y-components should be opposite";
EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance) EXPECT_NEAR(result_force_energies[0].z, -result_force_energies[1].z,
tolerance)
<< "Force z-components should be opposite"; << "Force z-components should be opposite";
// Energies should be equal for symmetric particles // Energies should be equal for symmetric particles
EXPECT_NEAR(result_energies[0], result_energies[1], tolerance) EXPECT_NEAR(result_force_energies[0].w, result_force_energies[1].w, tolerance)
<< "Energies should be equal"; << "Energies should be equal";
} }

View file

@ -2,6 +2,7 @@
#include "precision.hpp" #include "precision.hpp"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include <cmath> #include <cmath>
#include <cstdio>
#include <cuda_runtime.h> #include <cuda_runtime.h>
// Structure to hold test results from device // Structure to hold test results from device
@ -18,8 +19,7 @@ struct TestResults {
bool near_cutoff_pass; bool near_cutoff_pass;
// Additional result data for exact checks // Additional result data for exact checks
real energy_values[10]; float4 force_energy_values[10];
Vec3<real> force_values[10];
}; };
// Check if two Vec3 values are close within tolerance // Check if two Vec3 values are close within tolerance
@ -35,7 +35,7 @@ __global__ void lennard_jones_test_kernel(TestResults *results) {
real sigma = 1.0; real sigma = 1.0;
real epsilon = 1.0; real epsilon = 1.0;
real r_cutoff = 2.5; real r_cutoff = 2.5;
real tolerance = 1e-10; real tolerance = 1e-5;
// Create LennardJones object on device // Create LennardJones object on device
LennardJones lj(sigma, epsilon, r_cutoff); LennardJones lj(sigma, epsilon, r_cutoff);
@ -43,87 +43,78 @@ __global__ void lennard_jones_test_kernel(TestResults *results) {
// Zero Distance Test // Zero Distance Test
{ {
Vec3<real> r = {0.0, 0.0, 0.0}; Vec3<real> r = {0.0, 0.0, 0.0};
auto result = lj.calc_force_and_energy(r); float4 result = lj.calc_force_and_energy(r);
results->energy_values[0] = result.energy; results->force_energy_values[0] = result;
results->force_values[0] = result.force;
results->zero_distance_pass = results->zero_distance_pass =
(result.energy == 0.0) && (result.w == 0.0) &&
vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result.force, tolerance); vec3_near(Vec3<real>{0.0, 0.0, 0.0},
Vec3<real>{result.x, result.y, result.z}, tolerance);
} }
// Beyond Cutoff Test // Beyond Cutoff Test
{ {
Vec3<real> r = {3.0, 0.0, 0.0}; Vec3<real> r = {3.0, 0.0, 0.0};
auto result = lj.calc_force_and_energy(r); float4 result = lj.calc_force_and_energy(r);
results->energy_values[1] = result.energy; results->force_energy_values[1] = result;
results->force_values[1] = result.force;
results->beyond_cutoff_pass = results->beyond_cutoff_pass =
(result.energy == 0.0) && (result.w == 0.0) &&
vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result.force, tolerance); vec3_near(Vec3<real>{0.0, 0.0, 0.0},
Vec3<real>{result.x, result.y, result.z}, tolerance);
} }
// At Minimum Test // At Minimum Test
{ {
real min_dist = pow(2.0, 1.0 / 6.0) * sigma; real min_dist = pow(2.0, 1.0 / 6.0) * sigma;
Vec3<real> r = {min_dist, 0.0, 0.0}; Vec3<real> r = {min_dist, 0.0, 0.0};
auto result = lj.calc_force_and_energy(r); float4 result = lj.calc_force_and_energy(r);
results->energy_values[2] = result.energy; results->force_energy_values[2] = result;
results->force_values[2] = result.force;
results->at_minimum_pass = results->at_minimum_pass =
(fabs(result.energy + epsilon) < tolerance) && (fabs(result.w + epsilon) < tolerance) &&
vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result.force, tolerance); vec3_near(Vec3<real>{0.0, 0.0, 0.0},
Vec3<real>{result.x, result.y, result.z}, tolerance);
} }
// At Equilibrium Test // At Equilibrium Test
{ {
Vec3<real> r = {sigma, 0.0, 0.0}; Vec3<real> r = {sigma, 0.0, 0.0};
auto result = lj.calc_force_and_energy(r); float4 result = lj.calc_force_and_energy(r);
results->energy_values[3] = result.energy; results->force_energy_values[3] = result;
results->force_values[3] = result.force; results->at_equilibrium_pass =
results->at_equilibrium_pass = (fabs(result.energy) < tolerance) && (fabs(result.w) < tolerance) && (result.x > 0.0) &&
(result.force.x > 0.0) && (fabs(result.y) < tolerance) && (fabs(result.z) < tolerance);
(fabs(result.force.y) < tolerance) &&
(fabs(result.force.z) < tolerance);
} }
// Repulsive Region Test // Repulsive Region Test
{ {
Vec3<real> r = {0.8 * sigma, 0.0, 0.0}; Vec3<real> r = {0.8f * sigma, 0.0, 0.0};
auto result = lj.calc_force_and_energy(r); float4 result = lj.calc_force_and_energy(r);
results->energy_values[4] = result.energy; results->force_energy_values[4] = result;
results->force_values[4] = result.force; results->repulsive_region_pass = (result.w > 0.0) && (result.x > 0.0);
results->repulsive_region_pass =
(result.energy > 0.0) && (result.force.x > 0.0);
} }
// Attractive Region Test // Attractive Region Test
{ {
Vec3<real> r = {1.5 * sigma, 0.0, 0.0}; Vec3<real> r = {1.5f * sigma, 0.0, 0.0};
auto result = lj.calc_force_and_energy(r); float4 result = lj.calc_force_and_energy(r);
results->energy_values[5] = result.energy; results->force_energy_values[5] = result;
results->force_values[5] = result.force; results->attractive_region_pass = (result.w < 0.0) && (result.x < 0.0);
results->attractive_region_pass =
(result.energy < 0.0) && (result.force.x < 0.0);
} }
// Arbitrary Direction Test // Arbitrary Direction Test
{ {
Vec3<real> r = {1.0, 1.0, 1.0}; Vec3<real> r = {1.0, 1.0, 1.0};
auto result = lj.calc_force_and_energy(r); float4 result = lj.calc_force_and_energy(r);
results->energy_values[6] = result.energy; results->force_energy_values[6] = result;
results->force_values[6] = result.force;
real r_mag = sqrt(r.squared_norm2()); real r_mag = sqrt(r.squared_norm2());
Vec3<real> normalized_r = r.scale(1.0 / r_mag); Vec3<real> normalized_r = r.scale(1.0 / r_mag);
real force_dot_r = result.force.x * normalized_r.x + real force_dot_r = result.x * normalized_r.x + result.y * normalized_r.y +
result.force.y * normalized_r.y + result.z * normalized_r.z;
result.force.z * normalized_r.z;
results->arbitrary_direction_pass = results->arbitrary_direction_pass =
(force_dot_r < 0.0) && (force_dot_r < 0.0) && (fabs(result.x - result.y) < tolerance) &&
(fabs(result.force.x - result.force.y) < tolerance) && (fabs(result.y - result.z) < tolerance);
(fabs(result.force.y - result.force.z) < tolerance);
} }
// Parameter Variation Test // Parameter Variation Test
@ -135,34 +126,31 @@ __global__ void lennard_jones_test_kernel(TestResults *results) {
LennardJones lj2(new_sigma, new_epsilon, new_r_cutoff); LennardJones lj2(new_sigma, new_epsilon, new_r_cutoff);
Vec3<real> r = {2.0, 0.0, 0.0}; Vec3<real> r = {2.0, 0.0, 0.0};
auto result1 = lj.calc_force_and_energy(r); float4 result1 = lj.calc_force_and_energy(r);
auto result2 = lj2.calc_force_and_energy(r); float4 result2 = lj2.calc_force_and_energy(r);
results->energy_values[7] = result2.energy; results->force_energy_values[7] = result2;
results->force_values[7] = result2.force;
results->parameter_variation_pass = (result1.energy != result2.energy) && results->parameter_variation_pass =
(result1.force.x != result2.force.x); (result1.w != result2.w) && (result1.x != result2.x);
} }
// Exact Value Check Test // Exact Value Check Test
{ {
LennardJones lj_exact(1.0, 1.0, 3.0); LennardJones lj_exact(1.0, 1.0, 3.0);
Vec3<real> r = {1.5, 0.0, 0.0}; Vec3<real> r = {1.5, 0.0, 0.0};
auto result = lj_exact.calc_force_and_energy(r); float4 result = lj_exact.calc_force_and_energy(r);
results->energy_values[8] = result.energy; results->force_energy_values[8] = result;
results->force_values[8] = result.force;
real expected_energy = 4.0 * (pow(1.0 / 1.5, 12) - pow(1.0 / 1.5, 6)); real expected_energy = 4.0 * (pow(1.0 / 1.5, 12) - pow(1.0 / 1.5, 6));
real expected_force = real expected_force =
24.0 * (pow(1.0 / 1.5, 6) - 2.0 * pow(1.0 / 1.5, 12)) / 1.5; 24.0 * (pow(1.0 / 1.5, 6) - 2.0 * pow(1.0 / 1.5, 12)) / 1.5;
results->exact_value_check_pass = results->exact_value_check_pass =
(fabs(result.energy - expected_energy) < tolerance) && (fabs(result.w - expected_energy) < tolerance) &&
(fabs(result.force.x + expected_force) < tolerance) && (fabs(result.x + expected_force) < tolerance) &&
(fabs(result.force.y) < tolerance) && (fabs(result.y) < tolerance) && (fabs(result.z) < tolerance);
(fabs(result.force.z) < tolerance);
} }
// Near Cutoff Test // Near Cutoff Test
@ -173,16 +161,18 @@ __global__ void lennard_jones_test_kernel(TestResults *results) {
Vec3<real> r_inside = {inside_cutoff, 0.0, 0.0}; Vec3<real> r_inside = {inside_cutoff, 0.0, 0.0};
Vec3<real> r_outside = {outside_cutoff, 0.0, 0.0}; Vec3<real> r_outside = {outside_cutoff, 0.0, 0.0};
auto result_inside = lj.calc_force_and_energy(r_inside); float4 result_inside = lj.calc_force_and_energy(r_inside);
auto result_outside = lj.calc_force_and_energy(r_outside); float4 result_outside = lj.calc_force_and_energy(r_outside);
results->energy_values[9] = result_inside.energy; results->force_energy_values[9] = result_inside;
results->force_values[9] = result_inside.force;
results->near_cutoff_pass = results->near_cutoff_pass =
(result_inside.energy != 0.0) && (result_inside.force.x != 0.0) && (result_inside.w != 0.0) && (result_inside.x != 0.0) &&
(result_outside.energy == 0.0) && (result_outside.w == 0.0) &&
vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result_outside.force, tolerance); vec3_near(
Vec3<real>{0.0, 0.0, 0.0},
Vec3<real>{result_outside.x, result_outside.y, result_outside.z},
tolerance);
} }
} }
@ -250,44 +240,48 @@ TEST_F(LennardJonesCudaTest, DeviceZeroDistance) {
auto results = runDeviceTests(); auto results = runDeviceTests();
EXPECT_TRUE(results.zero_distance_pass) EXPECT_TRUE(results.zero_distance_pass)
<< "Zero distance test failed on device. Energy: " << "Zero distance test failed on device. Energy: "
<< results.energy_values[0] << ", Force: (" << results.force_values[0].x << results.force_energy_values[0].w << ", Force: ("
<< ", " << results.force_values[0].y << ", " << results.force_values[0].z << results.force_energy_values[0].x << ", "
<< ")"; << results.force_energy_values[0].y << ", "
<< results.force_energy_values[0].z << ")";
} }
TEST_F(LennardJonesCudaTest, DeviceBeyondCutoff) { TEST_F(LennardJonesCudaTest, DeviceBeyondCutoff) {
auto results = runDeviceTests(); auto results = runDeviceTests();
EXPECT_TRUE(results.beyond_cutoff_pass) EXPECT_TRUE(results.beyond_cutoff_pass)
<< "Beyond cutoff test failed on device. Energy: " << "Beyond cutoff test failed on device. Energy: "
<< results.energy_values[1]; << results.force_energy_values[1].w;
} }
TEST_F(LennardJonesCudaTest, DeviceAtMinimum) { TEST_F(LennardJonesCudaTest, DeviceAtMinimum) {
auto results = runDeviceTests(); auto results = runDeviceTests();
EXPECT_TRUE(results.at_minimum_pass) EXPECT_TRUE(results.at_minimum_pass)
<< "At minimum test failed on device. Energy: " << "At minimum test failed on device. Energy: "
<< results.energy_values[2]; << results.force_energy_values[2].w;
} }
TEST_F(LennardJonesCudaTest, DeviceAtEquilibrium) { TEST_F(LennardJonesCudaTest, DeviceAtEquilibrium) {
auto results = runDeviceTests(); auto results = runDeviceTests();
EXPECT_TRUE(results.at_equilibrium_pass) EXPECT_TRUE(results.at_equilibrium_pass)
<< "At equilibrium test failed on device. Energy: " << "At equilibrium test failed on device. Energy: "
<< results.energy_values[3] << ", Force x: " << results.force_values[3].x; << results.force_energy_values[3].w
<< ", Force x: " << results.force_energy_values[3].x;
} }
TEST_F(LennardJonesCudaTest, DeviceRepulsiveRegion) { TEST_F(LennardJonesCudaTest, DeviceRepulsiveRegion) {
auto results = runDeviceTests(); auto results = runDeviceTests();
EXPECT_TRUE(results.repulsive_region_pass) EXPECT_TRUE(results.repulsive_region_pass)
<< "Repulsive region test failed on device. Energy: " << "Repulsive region test failed on device. Energy: "
<< results.energy_values[4] << ", Force x: " << results.force_values[4].x; << results.force_energy_values[4].w
<< ", Force x: " << results.force_energy_values[4].x;
} }
TEST_F(LennardJonesCudaTest, DeviceAttractiveRegion) { TEST_F(LennardJonesCudaTest, DeviceAttractiveRegion) {
auto results = runDeviceTests(); auto results = runDeviceTests();
EXPECT_TRUE(results.attractive_region_pass) EXPECT_TRUE(results.attractive_region_pass)
<< "Attractive region test failed on device. Energy: " << "Attractive region test failed on device. Energy: "
<< results.energy_values[5] << ", Force x: " << results.force_values[5].x; << results.force_energy_values[5].w
<< ", Force x: " << results.force_energy_values[5].x;
} }
TEST_F(LennardJonesCudaTest, DeviceArbitraryDirection) { TEST_F(LennardJonesCudaTest, DeviceArbitraryDirection) {
@ -306,12 +300,13 @@ TEST_F(LennardJonesCudaTest, DeviceExactValueCheck) {
auto results = runDeviceTests(); auto results = runDeviceTests();
EXPECT_TRUE(results.exact_value_check_pass) EXPECT_TRUE(results.exact_value_check_pass)
<< "Exact value check test failed on device. Energy: " << "Exact value check test failed on device. Energy: "
<< results.energy_values[8] << ", Force x: " << results.force_values[8].x; << results.force_energy_values[8].w
<< ", Force x: " << results.force_energy_values[8].x;
} }
TEST_F(LennardJonesCudaTest, DeviceNearCutoff) { TEST_F(LennardJonesCudaTest, DeviceNearCutoff) {
auto results = runDeviceTests(); auto results = runDeviceTests();
EXPECT_TRUE(results.near_cutoff_pass) EXPECT_TRUE(results.near_cutoff_pass)
<< "Near cutoff test failed on device. Inside energy: " << "Near cutoff test failed on device. Inside energy: "
<< results.energy_values[9]; << results.force_energy_values[9].w;
} }

View file

@ -1,9 +0,0 @@
include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR})
add_executable(${NAME}_tests
test_potential.cpp
)
target_link_libraries(${NAME}_tests gtest gtest_main)
target_link_libraries(${NAME}_tests ${CMAKE_PROJECT_NAME}_cuda_lib)
add_test(NAME ${NAME}Tests COMMAND ${CMAKE_BINARY_DIR}/tests/unit_tests/${NAME}_tests)

View file

@ -1,5 +0,0 @@
#include "gtest/gtest.h"
TEST(Example, Equals) {
EXPECT_EQ(1, 1);
}

View file

@ -1,174 +0,0 @@
#include "potentials/pair_potentials.cuh"
#include "precision.hpp"
#include "gtest/gtest.h"
#include <cmath>
class LennardJonesTest : public ::testing::Test {
protected:
void SetUp() override {
// Default parameters
sigma = 1.0;
epsilon = 1.0;
r_cutoff = 2.5;
// Create default LennardJones object
lj = new LennardJones(sigma, epsilon, r_cutoff);
}
void TearDown() override { delete lj; }
real sigma;
real epsilon;
real r_cutoff;
LennardJones *lj;
// Helper function to compare Vec3 values with tolerance
void expect_vec3_near(const Vec3<real> &expected, const Vec3<real> &actual,
real tolerance) {
EXPECT_NEAR(expected.x, actual.x, tolerance);
EXPECT_NEAR(expected.y, actual.y, tolerance);
EXPECT_NEAR(expected.z, actual.z, tolerance);
}
};
TEST_F(LennardJonesTest, ZeroDistance) {
// At zero distance, the calculation should return zero force and energy
Vec3<real> r = {0.0, 0.0, 0.0};
auto result = lj->calc_force_and_energy(r);
EXPECT_EQ(0.0, result.energy);
expect_vec3_near({0.0, 0.0, 0.0}, result.force, 1e-10);
}
TEST_F(LennardJonesTest, BeyondCutoff) {
// Distance beyond cutoff should return zero force and energy
Vec3<real> r = {3.0, 0.0, 0.0}; // 3.0 > r_cutoff (2.5)
auto result = lj->calc_force_and_energy(r);
EXPECT_EQ(0.0, result.energy);
expect_vec3_near({0.0, 0.0, 0.0}, result.force, 1e-10);
}
TEST_F(LennardJonesTest, AtMinimum) {
// The LJ potential has a minimum at r = 2^(1/6) * sigma
real min_dist = std::pow(2.0, 1.0 / 6.0) * sigma;
Vec3<real> r = {min_dist, 0.0, 0.0};
auto result = lj->calc_force_and_energy(r);
// At minimum, force should be close to zero
EXPECT_NEAR(-epsilon, result.energy, 1e-10);
expect_vec3_near({0.0, 0.0, 0.0}, result.force, 1e-10);
}
TEST_F(LennardJonesTest, AtEquilibrium) {
// At r = sigma, the energy should be zero and force should be repulsive
Vec3<real> r = {sigma, 0.0, 0.0};
auto result = lj->calc_force_and_energy(r);
EXPECT_NEAR(0.0, result.energy, 1e-10);
EXPECT_GT(result.force.x,
0.0); // Force should be repulsive (positive x-direction)
EXPECT_NEAR(0.0, result.force.y, 1e-10);
EXPECT_NEAR(0.0, result.force.z, 1e-10);
}
TEST_F(LennardJonesTest, RepulsiveRegion) {
// Test in the repulsive region (r < sigma)
Vec3<real> r = {0.8 * sigma, 0.0, 0.0};
auto result = lj->calc_force_and_energy(r);
// Energy should be positive and force should be repulsive
EXPECT_GT(result.energy, 0.0);
EXPECT_GT(result.force.x, 0.0); // Force should be repulsive
}
TEST_F(LennardJonesTest, AttractiveRegion) {
// Test in the attractive region (sigma < r < r_min)
Vec3<real> r = {1.5 * sigma, 0.0, 0.0};
auto result = lj->calc_force_and_energy(r);
// Energy should be negative and force should be attractive
EXPECT_LT(result.energy, 0.0);
EXPECT_LT(result.force.x,
0.0); // Force should be attractive (negative x-direction)
}
TEST_F(LennardJonesTest, ArbitraryDirection) {
// Test with a vector in an arbitrary direction
Vec3<real> r = {1.0, 1.0, 1.0};
auto result = lj->calc_force_and_energy(r);
// The force should be in the same direction as r but opposite sign
// (attractive region)
real r_mag = std::sqrt(r.squared_norm2());
// Calculate expected force direction (should be along -r)
Vec3<real> normalized_r = r.scale(1.0 / r_mag);
real force_dot_r = result.force.x * normalized_r.x +
result.force.y * normalized_r.y +
result.force.z * normalized_r.z;
// In this case, we're at r = sqrt(3) * sigma which is in attractive region
EXPECT_LT(force_dot_r, 0.0); // Force should be attractive
// Force should be symmetric in all dimensions for this vector
EXPECT_NEAR(result.force.x, result.force.y, 1e-10);
EXPECT_NEAR(result.force.y, result.force.z, 1e-10);
}
TEST_F(LennardJonesTest, ParameterVariation) {
// Test with different parameter values
real new_sigma = 2.0;
real new_epsilon = 0.5;
real new_r_cutoff = 5.0;
LennardJones lj2(new_sigma, new_epsilon, new_r_cutoff);
Vec3<real> r = {2.0, 0.0, 0.0};
auto result1 = lj->calc_force_and_energy(r);
auto result2 = lj2.calc_force_and_energy(r);
// Results should be different with different parameters
EXPECT_NE(result1.energy, result2.energy);
EXPECT_NE(result1.force.x, result2.force.x);
}
TEST_F(LennardJonesTest, ExactValueCheck) {
// Test with pre-calculated values for a specific case
LennardJones lj_exact(1.0, 1.0, 3.0);
Vec3<real> r = {1.5, 0.0, 0.0};
auto result = lj_exact.calc_force_and_energy(r);
// Pre-calculated values (you may need to adjust these based on your specific
// implementation)
real expected_energy =
4.0 * (std::pow(1.0 / 1.5, 12) - std::pow(1.0 / 1.5, 6));
real expected_force =
24.0 * (std::pow(1.0 / 1.5, 6) - 2.0 * std::pow(1.0 / 1.5, 12)) / 1.5;
EXPECT_NEAR(expected_energy, result.energy, 1e-10);
EXPECT_NEAR(-expected_force, result.force.x,
1e-10); // Negative because force is attractive
EXPECT_NEAR(0.0, result.force.y, 1e-10);
EXPECT_NEAR(0.0, result.force.z, 1e-10);
}
TEST_F(LennardJonesTest, NearCutoff) {
// Test behavior just inside and just outside the cutoff
real inside_cutoff = r_cutoff - 0.01;
real outside_cutoff = r_cutoff + 0.01;
Vec3<real> r_inside = {inside_cutoff, 0.0, 0.0};
Vec3<real> r_outside = {outside_cutoff, 0.0, 0.0};
auto result_inside = lj->calc_force_and_energy(r_inside);
auto result_outside = lj->calc_force_and_energy(r_outside);
// Inside should have non-zero values
EXPECT_NE(0.0, result_inside.energy);
EXPECT_NE(0.0, result_inside.force.x);
// Outside should be zero
EXPECT_EQ(0.0, result_outside.energy);
expect_vec3_near({0.0, 0.0, 0.0}, result_outside.force, 1e-10);
}