Compare commits
No commits in common. "8ba5714648e9ef25d3ba4ee6ce434ac51e5c53de" and "dd83fc6330e7615e9e19e113426299ba912adbbf" have entirely different histories.
8ba5714648
...
dd83fc6330
12 changed files with 369 additions and 318 deletions
|
@ -3,14 +3,12 @@ project(${NAME}_cuda_lib CUDA CXX)
|
||||||
set(HEADER_FILES
|
set(HEADER_FILES
|
||||||
potentials/pair_potentials.cuh
|
potentials/pair_potentials.cuh
|
||||||
forces.cuh
|
forces.cuh
|
||||||
kernel_config.cuh
|
|
||||||
)
|
)
|
||||||
set(SOURCE_FILES
|
set(SOURCE_FILES
|
||||||
kernel_config.cu
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# The library contains header and source files.
|
# The library contains header and source files.
|
||||||
add_library(${NAME}_cuda_lib STATIC
|
add_library(${NAME}_cuda_lib INTERFACE
|
||||||
${SOURCE_FILES}
|
${SOURCE_FILES}
|
||||||
${HEADER_FILES}
|
${HEADER_FILES}
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,75 +1,74 @@
|
||||||
#ifndef FORCES_CUH
|
#ifndef FORCES_CUH
|
||||||
#define FORCES_CUH
|
#define FORCES_CUH
|
||||||
#include "kernel_config.cuh"
|
|
||||||
#include "potentials/pair_potentials.cuh"
|
#include "potentials/pair_potentials.cuh"
|
||||||
#include "precision.hpp"
|
#include "precision.hpp"
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cuda_runtime.h>
|
#include <type_traits>
|
||||||
|
#include <variant>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace CAC {
|
namespace CAC {
|
||||||
|
|
||||||
inline void reset_forces_and_energies(int n_particles,
|
inline void reset_forces_and_energies(int n_particles, real *forces,
|
||||||
float4 *forces_energies) {
|
real *energies) {
|
||||||
cudaMemset(forces_energies, 0, n_particles * sizeof(float4));
|
cudaMemset(forces, 0, n_particles * sizeof(real) * 3);
|
||||||
|
cudaMemset(energies, 0, n_particles * sizeof(real));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename PotentialType>
|
template <typename PotentialType>
|
||||||
__global__ void calc_forces_and_energies(float4 *pos, float4 *force_energies,
|
__global__ void calc_forces_and_energies(real *xs, real *forces, real *energies,
|
||||||
int n_particles, real *box_len,
|
int n_particles, real *box_len,
|
||||||
PotentialType potential) {
|
PotentialType potential) {
|
||||||
|
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int i = get_thread_id();
|
|
||||||
|
|
||||||
if (i < n_particles) {
|
if (i < n_particles) {
|
||||||
float4 my_pos = pos[i]; // Loads 16 bytes in one transaction
|
real xi = xs[3 * i];
|
||||||
real xi = my_pos.x;
|
real yi = xs[3 * i + 1];
|
||||||
real yi = my_pos.y;
|
real zi = xs[3 * i + 2];
|
||||||
real zi = my_pos.z;
|
|
||||||
|
|
||||||
real total_fx = 0, total_fy = 0, total_fz = 0, total_energy = 0;
|
|
||||||
|
|
||||||
for (int j = 0; j < n_particles; j++) {
|
for (int j = 0; j < n_particles; j++) {
|
||||||
if (i != j) {
|
if (i != j) {
|
||||||
float4 other_pos = pos[j];
|
real xj = xs[3 * j];
|
||||||
real dx = xi - other_pos.x;
|
real yj = xs[3 * j + 1];
|
||||||
real dy = yi - other_pos.y;
|
real zj = xs[3 * j + 2];
|
||||||
real dz = zi - other_pos.z;
|
|
||||||
|
real dx = xi - xj;
|
||||||
|
real dy = yi - yj;
|
||||||
|
real dz = zi - zj;
|
||||||
|
|
||||||
// Apply periodic boundary conditions
|
// Apply periodic boundary conditions
|
||||||
dx -= box_len[0] * round(dx / box_len[0]);
|
dx -= box_len[0] * round(dx / box_len[0]);
|
||||||
dy -= box_len[1] * round(dy / box_len[1]);
|
dy -= box_len[1] * round(dy / box_len[1]);
|
||||||
dz -= box_len[2] * round(dz / box_len[2]);
|
dz -= box_len[2] * round(dz / box_len[2]);
|
||||||
|
|
||||||
float4 sol = potential.calc_force_and_energy({dx, dy, dz});
|
ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz});
|
||||||
total_fx += sol.x;
|
forces[3 * i] += sol.force.x;
|
||||||
total_fy += sol.y;
|
forces[3 * i + 1] += sol.force.y;
|
||||||
total_fz += sol.z;
|
forces[3 * i + 2] += sol.force.z;
|
||||||
total_energy += sol.w;
|
energies[i] += sol.energy;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
force_energies[i] = make_float4(total_fx, total_fy, total_fz, total_energy);
|
inline void launch_force_kernels(real *xs, real *forces, real *energies,
|
||||||
}
|
|
||||||
}
|
|
||||||
inline void launch_force_kernels(float4 *xs, float4 *force_energies,
|
|
||||||
int n_particles, real *box_len,
|
int n_particles, real *box_len,
|
||||||
std::vector<PairPotentials> potentials,
|
std::vector<PairPotentials> potentials,
|
||||||
dim3 blocks, dim3 threads_per_block) {
|
int grid_size, int block_size) {
|
||||||
|
|
||||||
reset_forces_and_energies(n_particles, force_energies);
|
reset_forces_and_energies(n_particles, forces, energies);
|
||||||
|
|
||||||
for (const auto &potential : potentials) {
|
for (const auto &potential : potentials) {
|
||||||
std::visit(
|
std::visit(
|
||||||
[&](const auto &potential) {
|
[&](const auto &potential) {
|
||||||
using PotentialType = std::decay_t<decltype(potential)>;
|
using PotentialType = std::decay_t<decltype(potential)>;
|
||||||
calc_forces_and_energies<PotentialType>
|
calc_forces_and_energies<PotentialType><<<grid_size, block_size>>>(
|
||||||
<<<blocks, threads_per_block>>>(xs, force_energies, n_particles,
|
xs, forces, energies, n_particles, box_len, potential);
|
||||||
box_len, potential);
|
|
||||||
},
|
},
|
||||||
potential);
|
potential);
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace CAC
|
} // namespace CAC
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,73 +0,0 @@
|
||||||
#include "kernel_config.cuh"
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cstdio>
|
|
||||||
|
|
||||||
size_t KernelConfig::total_threads() const {
|
|
||||||
return (size_t)blocks.x * blocks.y * blocks.z * threads.x * threads.y *
|
|
||||||
threads.z;
|
|
||||||
}
|
|
||||||
|
|
||||||
void KernelConfig::print() const {
|
|
||||||
printf("Grid: (%u, %u, %u), Block: (%u, %u, %u), Total threads: %zu\n",
|
|
||||||
blocks.x, blocks.y, blocks.z, threads.x, threads.y, threads.z,
|
|
||||||
total_threads());
|
|
||||||
}
|
|
||||||
|
|
||||||
KernelConfig get_launch_config(size_t n_elements, int threads_per_block,
|
|
||||||
int max_blocks_per_dim) {
|
|
||||||
|
|
||||||
// Ensure threads_per_block is valid
|
|
||||||
threads_per_block = std::min(threads_per_block, 1024);
|
|
||||||
threads_per_block = std::max(threads_per_block, 32);
|
|
||||||
|
|
||||||
// Calculate total blocks needed
|
|
||||||
size_t total_blocks =
|
|
||||||
(n_elements + threads_per_block - 1) / threads_per_block;
|
|
||||||
|
|
||||||
dim3 threads(threads_per_block);
|
|
||||||
dim3 blocks;
|
|
||||||
|
|
||||||
if (total_blocks <= max_blocks_per_dim) {
|
|
||||||
// Simple 1D grid
|
|
||||||
blocks = dim3(total_blocks);
|
|
||||||
} else {
|
|
||||||
// Use 2D grid
|
|
||||||
blocks.x = max_blocks_per_dim;
|
|
||||||
blocks.y = (total_blocks + max_blocks_per_dim - 1) / max_blocks_per_dim;
|
|
||||||
|
|
||||||
// If still too big, use 3D grid
|
|
||||||
if (blocks.y > max_blocks_per_dim) {
|
|
||||||
blocks.y = max_blocks_per_dim;
|
|
||||||
blocks.z =
|
|
||||||
(total_blocks + (size_t)max_blocks_per_dim * max_blocks_per_dim - 1) /
|
|
||||||
((size_t)max_blocks_per_dim * max_blocks_per_dim);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return KernelConfig(blocks, threads);
|
|
||||||
}
|
|
||||||
|
|
||||||
int get_optimal_block_size(int device_id) {
|
|
||||||
cudaDeviceProp prop;
|
|
||||||
cudaGetDeviceProperties(&prop, device_id);
|
|
||||||
|
|
||||||
// Use a fraction of max threads per block for better occupancy
|
|
||||||
// Typically 256 or 512 work well for most kernels
|
|
||||||
if (prop.maxThreadsPerBlock >= 1024) {
|
|
||||||
return 256; // Good balance of occupancy and register usage
|
|
||||||
} else if (prop.maxThreadsPerBlock >= 512) {
|
|
||||||
return 256;
|
|
||||||
} else {
|
|
||||||
return prop.maxThreadsPerBlock / 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
KernelConfig get_launch_config_advanced(size_t n_elements, int device_id) {
|
|
||||||
cudaDeviceProp prop;
|
|
||||||
cudaGetDeviceProperties(&prop, device_id);
|
|
||||||
|
|
||||||
int threads_per_block = get_optimal_block_size(device_id);
|
|
||||||
int max_blocks_per_dim = prop.maxGridSize[0];
|
|
||||||
|
|
||||||
return get_launch_config(n_elements, threads_per_block, max_blocks_per_dim);
|
|
||||||
}
|
|
|
@ -1,83 +0,0 @@
|
||||||
#ifndef KERNEL_CONFIG_CUH
|
|
||||||
#define KERNEL_CONFIG_CUH
|
|
||||||
#include <cstdio>
|
|
||||||
#include <cuda_runtime.h>
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Structure to hold grid launch configuration
|
|
||||||
*/
|
|
||||||
struct KernelConfig {
|
|
||||||
dim3 blocks;
|
|
||||||
dim3 threads;
|
|
||||||
|
|
||||||
// Convenience constructor
|
|
||||||
KernelConfig(dim3 b, dim3 t) : blocks(b), threads(t) {}
|
|
||||||
|
|
||||||
// Total number of threads launched
|
|
||||||
size_t total_threads() const;
|
|
||||||
|
|
||||||
// Print configuration for debugging
|
|
||||||
void print() const;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculate optimal CUDA launch configuration for 1D problem
|
|
||||||
*
|
|
||||||
* @param n_elements Number of elements to process
|
|
||||||
* @param threads_per_block Desired threads per block (default: 256)
|
|
||||||
* @param max_blocks_per_dim Maximum blocks per grid dimension (default: 65535)
|
|
||||||
* @return LaunchConfig with optimal grid and block dimensions
|
|
||||||
*/
|
|
||||||
KernelConfig get_launch_config(size_t n_elements, int threads_per_block = 256,
|
|
||||||
int max_blocks_per_dim = 65535);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculate 1D thread index for kernels launched with get_launch_config()
|
|
||||||
* Use this inside your CUDA kernels
|
|
||||||
*/
|
|
||||||
__device__ inline size_t get_thread_id() {
|
|
||||||
return (size_t)blockIdx.z * gridDim.x * gridDim.y * blockDim.x +
|
|
||||||
(size_t)blockIdx.y * gridDim.x * blockDim.x +
|
|
||||||
(size_t)blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Alternative version that takes grid dimensions as parameters
|
|
||||||
* Useful if you need the index calculation in multiple places
|
|
||||||
*/
|
|
||||||
__device__ inline size_t get_thread_id(dim3 gridDim, dim3 blockDim,
|
|
||||||
dim3 blockIdx, dim3 threadIdx) {
|
|
||||||
return (size_t)blockIdx.z * gridDim.x * gridDim.y * blockDim.x +
|
|
||||||
(size_t)blockIdx.y * gridDim.x * blockDim.x +
|
|
||||||
(size_t)blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GPU device properties helper - gets optimal block size for current device
|
|
||||||
*/
|
|
||||||
int get_optimal_block_size(int device_id = 0);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Advanced version that considers device properties
|
|
||||||
*/
|
|
||||||
KernelConfig get_launch_config_advanced(size_t n_elements, int device_id = 0);
|
|
||||||
|
|
||||||
// Example usage in your kernel:
|
|
||||||
/*
|
|
||||||
template <typename PotentialType>
|
|
||||||
__global__ void calc_forces_and_energies(float4 *pos, float4 *force_energies,
|
|
||||||
size_t n_particles, real *box_len,
|
|
||||||
PotentialType potential) {
|
|
||||||
|
|
||||||
size_t i = get_thread_id();
|
|
||||||
|
|
||||||
if (i < n_particles) {
|
|
||||||
// Your existing force calculation code here...
|
|
||||||
float4 my_pos = pos[i];
|
|
||||||
// ... rest of kernel unchanged
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -5,7 +5,6 @@
|
||||||
#include "vec3.h"
|
#include "vec3.h"
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cuda_runtime.h>
|
|
||||||
#include <variant>
|
#include <variant>
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
|
@ -14,6 +13,18 @@
|
||||||
#define CUDA_CALLABLE
|
#define CUDA_CALLABLE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result struct for the Pair Potential
|
||||||
|
*/
|
||||||
|
struct ForceAndEnergy {
|
||||||
|
real energy;
|
||||||
|
Vec3<real> force;
|
||||||
|
|
||||||
|
CUDA_CALLABLE inline static ForceAndEnergy zero() {
|
||||||
|
return {0.0, {0.0, 0.0, 0.0}};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculate the Lennard-Jones energy and force for the current particle
|
* Calculate the Lennard-Jones energy and force for the current particle
|
||||||
* pair described by displacement vector r
|
* pair described by displacement vector r
|
||||||
|
@ -29,7 +40,7 @@ struct LennardJones {
|
||||||
m_rcutoffsq = rcutoff * rcutoff;
|
m_rcutoffsq = rcutoff * rcutoff;
|
||||||
};
|
};
|
||||||
|
|
||||||
CUDA_CALLABLE float4 calc_force_and_energy(Vec3<real> r) {
|
CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3<real> r) {
|
||||||
real rmagsq = r.squared_norm2();
|
real rmagsq = r.squared_norm2();
|
||||||
if (rmagsq < m_rcutoffsq && rmagsq > 0.0) {
|
if (rmagsq < m_rcutoffsq && rmagsq > 0.0) {
|
||||||
real inv_rmag = 1 / sqrt(rmagsq);
|
real inv_rmag = 1 / sqrt(rmagsq);
|
||||||
|
@ -49,10 +60,10 @@ struct LennardJones {
|
||||||
(12.0 * sigma_r12 * inv_rmag - 6.0 * sigma_r6 * inv_rmag);
|
(12.0 * sigma_r12 * inv_rmag - 6.0 * sigma_r6 * inv_rmag);
|
||||||
Vec3<real> force = r.scale(force_mag * inv_rmag);
|
Vec3<real> force = r.scale(force_mag * inv_rmag);
|
||||||
|
|
||||||
return make_float4(force.x, force.y, force.z, energy);
|
return {energy, force};
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return ForceAndEnergy::zero();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
@ -74,7 +85,7 @@ struct Morse {
|
||||||
m_rcutoffsq = rcutoff * rcutoff;
|
m_rcutoffsq = rcutoff * rcutoff;
|
||||||
};
|
};
|
||||||
|
|
||||||
CUDA_CALLABLE float4 calc_force_and_energy(Vec3<real> r) {
|
CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3<real> r) {
|
||||||
real rmagsq = r.squared_norm2();
|
real rmagsq = r.squared_norm2();
|
||||||
if (rmagsq < m_rcutoffsq && rmagsq > 0.0) {
|
if (rmagsq < m_rcutoffsq && rmagsq > 0.0) {
|
||||||
real rmag = sqrt(rmagsq);
|
real rmag = sqrt(rmagsq);
|
||||||
|
@ -93,10 +104,10 @@ struct Morse {
|
||||||
// Direction: normalized vector
|
// Direction: normalized vector
|
||||||
Vec3<real> force = r.scale(force_mag / rmag);
|
Vec3<real> force = r.scale(force_mag / rmag);
|
||||||
|
|
||||||
return make_float4(force.x, force.y, force.z, energy);
|
return {energy, force};
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
return ForceAndEnergy::zero();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,15 +1,15 @@
|
||||||
#ifndef PRECISION_H
|
#ifndef PRECISION_H
|
||||||
#define PRECISION_H
|
#define PRECISION_H
|
||||||
|
|
||||||
#ifdef USE_DOUBLE
|
#ifdef USE_FLOATS
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If macro USE_DOUBLE is set then the default type will be double
|
* If macro USE_FLOATS is set then the default type will be floating point
|
||||||
* precision. Otherwise we use floats by default
|
* precision. Otherwise we use double precision by default
|
||||||
*/
|
*/
|
||||||
typedef double real;
|
|
||||||
#else
|
|
||||||
typedef float real;
|
typedef float real;
|
||||||
|
#else
|
||||||
|
typedef double real;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -10,4 +10,5 @@ if(NOT EXISTS ${GOOGLETEST_DIR})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_subdirectory(lib/googletest)
|
add_subdirectory(lib/googletest)
|
||||||
|
add_subdirectory(unit_tests)
|
||||||
add_subdirectory(cuda_unit_tests)
|
add_subdirectory(cuda_unit_tests)
|
||||||
|
|
|
@ -5,12 +5,14 @@
|
||||||
|
|
||||||
// Include your header files
|
// Include your header files
|
||||||
#include "forces.cuh"
|
#include "forces.cuh"
|
||||||
#include "kernel_config.cuh"
|
|
||||||
#include "potentials/pair_potentials.cuh"
|
#include "potentials/pair_potentials.cuh"
|
||||||
#include "precision.hpp"
|
#include "precision.hpp"
|
||||||
|
|
||||||
class CudaForceKernelTest : public ::testing::Test {
|
class CudaForceKernelTest : public ::testing::Test {
|
||||||
protected:
|
protected:
|
||||||
|
const int GRID_SIZE = 1;
|
||||||
|
const int BLOCK_SIZE = 4;
|
||||||
|
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
// Set up CUDA device
|
// Set up CUDA device
|
||||||
cudaError_t err = cudaSetDevice(0);
|
cudaError_t err = cudaSetDevice(0);
|
||||||
|
@ -53,32 +55,33 @@ protected:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper function to run the force calculation kernel
|
// Helper function to run the force calculation kernel
|
||||||
std::vector<float4>
|
std::pair<std::vector<real>, std::vector<real>>
|
||||||
run_force_calculation(int n_particles, const std::vector<float4> &positions,
|
run_force_calculation(int n_particles, const std::vector<real> &positions,
|
||||||
const std::vector<real> &box_dimensions) {
|
const std::vector<real> &box_dimensions) {
|
||||||
std::vector<float4> force_energies(n_particles,
|
std::vector<real> forces(3 * n_particles, 0.0);
|
||||||
make_float4(0.0, 0.0, 0.0, 0.0));
|
std::vector<real> energies(n_particles, 0.0);
|
||||||
|
|
||||||
KernelConfig kernel_config = get_launch_config(n_particles);
|
real *d_positions = allocateAndCopyToGPU(positions);
|
||||||
float4 *d_positions = allocateAndCopyToGPU(positions);
|
real *d_forces = allocateAndCopyToGPU(forces);
|
||||||
float4 *d_force_energies = allocateAndCopyToGPU(force_energies);
|
real *d_energies = allocateAndCopyToGPU(energies);
|
||||||
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
||||||
|
|
||||||
std::vector<PairPotentials> potentials = {LennardJones(1.0, 1.0, 3.0)};
|
std::vector<PairPotentials> potentials = {LennardJones(1.0, 1.0, 3.0)};
|
||||||
CAC::launch_force_kernels(d_positions, d_force_energies, n_particles,
|
CAC::launch_force_kernels(d_positions, d_forces, d_energies, n_particles,
|
||||||
d_box_len, potentials, kernel_config.blocks,
|
d_box_len, potentials, GRID_SIZE, BLOCK_SIZE);
|
||||||
kernel_config.threads);
|
|
||||||
|
|
||||||
checkCudaError(cudaGetLastError(), "kernel launch");
|
checkCudaError(cudaGetLastError(), "kernel launch");
|
||||||
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
||||||
|
|
||||||
std::vector<float4> result_force_energies =
|
std::vector<real> result_forces =
|
||||||
copyFromGPUAndFree(d_force_energies, n_particles);
|
copyFromGPUAndFree(d_forces, 3 * n_particles);
|
||||||
|
std::vector<real> result_energies =
|
||||||
|
copyFromGPUAndFree(d_energies, n_particles);
|
||||||
|
|
||||||
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
||||||
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
||||||
|
|
||||||
return result_force_energies;
|
return {result_forces, result_energies};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -87,14 +90,14 @@ TEST_F(CudaForceKernelTest, BasicFunctionalityTest) {
|
||||||
const real tolerance = 1e-5;
|
const real tolerance = 1e-5;
|
||||||
|
|
||||||
// Set up test data - simple 2x2 grid of particles
|
// Set up test data - simple 2x2 grid of particles
|
||||||
std::vector<float4> positions = {
|
std::vector<real> positions = {
|
||||||
make_float4(0.0, 0.0, 0.0, 0.0), // particle 0
|
0.0, 0.0, 0.0, // particle 0
|
||||||
make_float4(0.5, 0.0, 0.0, 0.0), // particle 1
|
0.5, 0.0, 0.0, // particle 1
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
||||||
|
|
||||||
auto result_force_energies =
|
auto [result_forces, result_energies] =
|
||||||
run_force_calculation(n_particles, positions, box_dimensions);
|
run_force_calculation(n_particles, positions, box_dimensions);
|
||||||
|
|
||||||
// Verify results - forces should be non-zero and energies should be
|
// Verify results - forces should be non-zero and energies should be
|
||||||
|
@ -102,14 +105,17 @@ TEST_F(CudaForceKernelTest, BasicFunctionalityTest) {
|
||||||
bool has_nonzero_force = false;
|
bool has_nonzero_force = false;
|
||||||
bool has_nonzero_energy = false;
|
bool has_nonzero_energy = false;
|
||||||
|
|
||||||
for (int i = 0; i < n_particles; i++) {
|
for (int i = 0; i < 3 * n_particles; i++) {
|
||||||
if (std::abs(result_force_energies[i].x) > tolerance ||
|
if (std::abs(result_forces[i]) > tolerance) {
|
||||||
std::abs(result_force_energies[i].y) > tolerance ||
|
|
||||||
std::abs(result_force_energies[i].z) > tolerance) {
|
|
||||||
has_nonzero_force = true;
|
has_nonzero_force = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
if (std::abs(result_force_energies[i].w) > tolerance) {
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < n_particles; i++) {
|
||||||
|
if (std::abs(result_energies[i]) > tolerance) {
|
||||||
has_nonzero_energy = true;
|
has_nonzero_energy = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -124,61 +130,60 @@ TEST_F(CudaForceKernelTest, PeriodicBoundaryConditionsTest) {
|
||||||
const real tolerance = 1e-5;
|
const real tolerance = 1e-5;
|
||||||
|
|
||||||
// Place particles near opposite edges of a small box
|
// Place particles near opposite edges of a small box
|
||||||
std::vector<float4> positions = {
|
std::vector<real> positions = {
|
||||||
make_float4(0.1, 0.0, 0.0, 0.0), // particle 0 near left edge
|
0.1, 0.0, 0.0, // particle 0 near left edge
|
||||||
make_float4(4.9, 0.0, 0.0, 0.0) // particle 1 near right edge
|
4.9, 0.0, 0.0 // particle 1 near right edge
|
||||||
};
|
};
|
||||||
std::vector<real> box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC
|
std::vector<real> box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC
|
||||||
|
|
||||||
auto result_force_energies =
|
auto [result_forces, result_energies] =
|
||||||
run_force_calculation(n_particles, positions, box_dimensions);
|
run_force_calculation(n_particles, positions, box_dimensions);
|
||||||
|
|
||||||
// With PBC, particles should interact as if they're close (distance ~0.2)
|
// With PBC, particles should interact as if they're close (distance ~0.2)
|
||||||
// rather than far apart (distance ~4.8)
|
// rather than far apart (distance ~4.8)
|
||||||
EXPECT_GT(std::abs(result_force_energies[0].x), tolerance)
|
EXPECT_GT(std::abs(result_forces[0]), tolerance)
|
||||||
<< "Expected significant force due to PBC";
|
<< "Expected significant force due to PBC";
|
||||||
|
EXPECT_GT(std::abs(result_energies[0]), tolerance)
|
||||||
|
<< "Expected significant energy due to PBC";
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(CudaForceKernelTest, SingleParticleTest) {
|
TEST_F(CudaForceKernelTest, SingleParticleTest) {
|
||||||
const int n_particles = 1;
|
const int n_particles = 1;
|
||||||
|
|
||||||
std::vector<float4> positions = {make_float4(0.0, 0.0, 0.0, 0.0)};
|
std::vector<real> positions = {0.0, 0.0, 0.0};
|
||||||
std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
||||||
|
|
||||||
auto result_force_energies =
|
auto [result_forces, result_energies] =
|
||||||
run_force_calculation(n_particles, positions, box_dimensions);
|
run_force_calculation(n_particles, positions, box_dimensions);
|
||||||
// Single particle should have zero force and energy
|
// Single particle should have zero force and energy
|
||||||
EXPECT_NEAR(result_force_energies[0].x, 0.0, 1e-10);
|
EXPECT_NEAR(result_forces[0], 0.0, 1e-10);
|
||||||
EXPECT_NEAR(result_force_energies[0].y, 0.0, 1e-10);
|
EXPECT_NEAR(result_forces[1], 0.0, 1e-10);
|
||||||
EXPECT_NEAR(result_force_energies[0].z, 0.0, 1e-10);
|
EXPECT_NEAR(result_forces[2], 0.0, 1e-10);
|
||||||
EXPECT_NEAR(result_force_energies[0].w, 0.0, 1e-10);
|
EXPECT_NEAR(result_energies[0], 0.0, 1e-10);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(CudaForceKernelTest, ForceSymmetryTest) {
|
TEST_F(CudaForceKernelTest, ForceSymmetryTest) {
|
||||||
const int n_particles = 2;
|
const int n_particles = 2;
|
||||||
const real tolerance = 1e-5;
|
const real tolerance = 1e-5;
|
||||||
|
|
||||||
std::vector<float4> positions = {
|
std::vector<real> positions = {
|
||||||
make_float4(0.0, 0.0, 0.0, 0.0), // particle 0
|
0.0, 0.0, 0.0, // particle 0
|
||||||
make_float4(1.5, 0.0, 0.0, 0.0) // particle 1
|
1.5, 0.0, 0.0 // particle 1
|
||||||
};
|
};
|
||||||
std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
||||||
|
|
||||||
auto result_force_energies =
|
auto [result_forces, result_energies] =
|
||||||
run_force_calculation(n_particles, positions, box_dimensions);
|
run_force_calculation(n_particles, positions, box_dimensions);
|
||||||
|
|
||||||
// Newton's third law: forces should be equal and opposite
|
// Newton's third law: forces should be equal and opposite
|
||||||
EXPECT_NEAR(result_force_energies[0].x, -result_force_energies[1].x,
|
EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance)
|
||||||
tolerance)
|
|
||||||
<< "Force x-components should be opposite";
|
<< "Force x-components should be opposite";
|
||||||
EXPECT_NEAR(result_force_energies[0].y, -result_force_energies[1].y,
|
EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance)
|
||||||
tolerance)
|
|
||||||
<< "Force y-components should be opposite";
|
<< "Force y-components should be opposite";
|
||||||
EXPECT_NEAR(result_force_energies[0].z, -result_force_energies[1].z,
|
EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance)
|
||||||
tolerance)
|
|
||||||
<< "Force z-components should be opposite";
|
<< "Force z-components should be opposite";
|
||||||
|
|
||||||
// Energies should be equal for symmetric particles
|
// Energies should be equal for symmetric particles
|
||||||
EXPECT_NEAR(result_force_energies[0].w, result_force_energies[1].w, tolerance)
|
EXPECT_NEAR(result_energies[0], result_energies[1], tolerance)
|
||||||
<< "Energies should be equal";
|
<< "Energies should be equal";
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
#include "precision.hpp"
|
#include "precision.hpp"
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdio>
|
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
|
||||||
// Structure to hold test results from device
|
// Structure to hold test results from device
|
||||||
|
@ -19,7 +18,8 @@ struct TestResults {
|
||||||
bool near_cutoff_pass;
|
bool near_cutoff_pass;
|
||||||
|
|
||||||
// Additional result data for exact checks
|
// Additional result data for exact checks
|
||||||
float4 force_energy_values[10];
|
real energy_values[10];
|
||||||
|
Vec3<real> force_values[10];
|
||||||
};
|
};
|
||||||
|
|
||||||
// Check if two Vec3 values are close within tolerance
|
// Check if two Vec3 values are close within tolerance
|
||||||
|
@ -35,7 +35,7 @@ __global__ void lennard_jones_test_kernel(TestResults *results) {
|
||||||
real sigma = 1.0;
|
real sigma = 1.0;
|
||||||
real epsilon = 1.0;
|
real epsilon = 1.0;
|
||||||
real r_cutoff = 2.5;
|
real r_cutoff = 2.5;
|
||||||
real tolerance = 1e-5;
|
real tolerance = 1e-10;
|
||||||
|
|
||||||
// Create LennardJones object on device
|
// Create LennardJones object on device
|
||||||
LennardJones lj(sigma, epsilon, r_cutoff);
|
LennardJones lj(sigma, epsilon, r_cutoff);
|
||||||
|
@ -43,78 +43,87 @@ __global__ void lennard_jones_test_kernel(TestResults *results) {
|
||||||
// Zero Distance Test
|
// Zero Distance Test
|
||||||
{
|
{
|
||||||
Vec3<real> r = {0.0, 0.0, 0.0};
|
Vec3<real> r = {0.0, 0.0, 0.0};
|
||||||
float4 result = lj.calc_force_and_energy(r);
|
auto result = lj.calc_force_and_energy(r);
|
||||||
results->force_energy_values[0] = result;
|
results->energy_values[0] = result.energy;
|
||||||
|
results->force_values[0] = result.force;
|
||||||
results->zero_distance_pass =
|
results->zero_distance_pass =
|
||||||
(result.w == 0.0) &&
|
(result.energy == 0.0) &&
|
||||||
vec3_near(Vec3<real>{0.0, 0.0, 0.0},
|
vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result.force, tolerance);
|
||||||
Vec3<real>{result.x, result.y, result.z}, tolerance);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Beyond Cutoff Test
|
// Beyond Cutoff Test
|
||||||
{
|
{
|
||||||
Vec3<real> r = {3.0, 0.0, 0.0};
|
Vec3<real> r = {3.0, 0.0, 0.0};
|
||||||
float4 result = lj.calc_force_and_energy(r);
|
auto result = lj.calc_force_and_energy(r);
|
||||||
results->force_energy_values[1] = result;
|
results->energy_values[1] = result.energy;
|
||||||
|
results->force_values[1] = result.force;
|
||||||
results->beyond_cutoff_pass =
|
results->beyond_cutoff_pass =
|
||||||
(result.w == 0.0) &&
|
(result.energy == 0.0) &&
|
||||||
vec3_near(Vec3<real>{0.0, 0.0, 0.0},
|
vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result.force, tolerance);
|
||||||
Vec3<real>{result.x, result.y, result.z}, tolerance);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// At Minimum Test
|
// At Minimum Test
|
||||||
{
|
{
|
||||||
real min_dist = pow(2.0, 1.0 / 6.0) * sigma;
|
real min_dist = pow(2.0, 1.0 / 6.0) * sigma;
|
||||||
Vec3<real> r = {min_dist, 0.0, 0.0};
|
Vec3<real> r = {min_dist, 0.0, 0.0};
|
||||||
float4 result = lj.calc_force_and_energy(r);
|
auto result = lj.calc_force_and_energy(r);
|
||||||
results->force_energy_values[2] = result;
|
results->energy_values[2] = result.energy;
|
||||||
|
results->force_values[2] = result.force;
|
||||||
|
|
||||||
results->at_minimum_pass =
|
results->at_minimum_pass =
|
||||||
(fabs(result.w + epsilon) < tolerance) &&
|
(fabs(result.energy + epsilon) < tolerance) &&
|
||||||
vec3_near(Vec3<real>{0.0, 0.0, 0.0},
|
vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result.force, tolerance);
|
||||||
Vec3<real>{result.x, result.y, result.z}, tolerance);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// At Equilibrium Test
|
// At Equilibrium Test
|
||||||
{
|
{
|
||||||
Vec3<real> r = {sigma, 0.0, 0.0};
|
Vec3<real> r = {sigma, 0.0, 0.0};
|
||||||
float4 result = lj.calc_force_and_energy(r);
|
auto result = lj.calc_force_and_energy(r);
|
||||||
results->force_energy_values[3] = result;
|
results->energy_values[3] = result.energy;
|
||||||
results->at_equilibrium_pass =
|
results->force_values[3] = result.force;
|
||||||
(fabs(result.w) < tolerance) && (result.x > 0.0) &&
|
results->at_equilibrium_pass = (fabs(result.energy) < tolerance) &&
|
||||||
(fabs(result.y) < tolerance) && (fabs(result.z) < tolerance);
|
(result.force.x > 0.0) &&
|
||||||
|
(fabs(result.force.y) < tolerance) &&
|
||||||
|
(fabs(result.force.z) < tolerance);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Repulsive Region Test
|
// Repulsive Region Test
|
||||||
{
|
{
|
||||||
Vec3<real> r = {0.8f * sigma, 0.0, 0.0};
|
Vec3<real> r = {0.8 * sigma, 0.0, 0.0};
|
||||||
float4 result = lj.calc_force_and_energy(r);
|
auto result = lj.calc_force_and_energy(r);
|
||||||
results->force_energy_values[4] = result;
|
results->energy_values[4] = result.energy;
|
||||||
results->repulsive_region_pass = (result.w > 0.0) && (result.x > 0.0);
|
results->force_values[4] = result.force;
|
||||||
|
results->repulsive_region_pass =
|
||||||
|
(result.energy > 0.0) && (result.force.x > 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Attractive Region Test
|
// Attractive Region Test
|
||||||
{
|
{
|
||||||
Vec3<real> r = {1.5f * sigma, 0.0, 0.0};
|
Vec3<real> r = {1.5 * sigma, 0.0, 0.0};
|
||||||
float4 result = lj.calc_force_and_energy(r);
|
auto result = lj.calc_force_and_energy(r);
|
||||||
results->force_energy_values[5] = result;
|
results->energy_values[5] = result.energy;
|
||||||
results->attractive_region_pass = (result.w < 0.0) && (result.x < 0.0);
|
results->force_values[5] = result.force;
|
||||||
|
results->attractive_region_pass =
|
||||||
|
(result.energy < 0.0) && (result.force.x < 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Arbitrary Direction Test
|
// Arbitrary Direction Test
|
||||||
{
|
{
|
||||||
Vec3<real> r = {1.0, 1.0, 1.0};
|
Vec3<real> r = {1.0, 1.0, 1.0};
|
||||||
float4 result = lj.calc_force_and_energy(r);
|
auto result = lj.calc_force_and_energy(r);
|
||||||
results->force_energy_values[6] = result;
|
results->energy_values[6] = result.energy;
|
||||||
|
results->force_values[6] = result.force;
|
||||||
|
|
||||||
real r_mag = sqrt(r.squared_norm2());
|
real r_mag = sqrt(r.squared_norm2());
|
||||||
Vec3<real> normalized_r = r.scale(1.0 / r_mag);
|
Vec3<real> normalized_r = r.scale(1.0 / r_mag);
|
||||||
real force_dot_r = result.x * normalized_r.x + result.y * normalized_r.y +
|
real force_dot_r = result.force.x * normalized_r.x +
|
||||||
result.z * normalized_r.z;
|
result.force.y * normalized_r.y +
|
||||||
|
result.force.z * normalized_r.z;
|
||||||
|
|
||||||
results->arbitrary_direction_pass =
|
results->arbitrary_direction_pass =
|
||||||
(force_dot_r < 0.0) && (fabs(result.x - result.y) < tolerance) &&
|
(force_dot_r < 0.0) &&
|
||||||
(fabs(result.y - result.z) < tolerance);
|
(fabs(result.force.x - result.force.y) < tolerance) &&
|
||||||
|
(fabs(result.force.y - result.force.z) < tolerance);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parameter Variation Test
|
// Parameter Variation Test
|
||||||
|
@ -126,31 +135,34 @@ __global__ void lennard_jones_test_kernel(TestResults *results) {
|
||||||
LennardJones lj2(new_sigma, new_epsilon, new_r_cutoff);
|
LennardJones lj2(new_sigma, new_epsilon, new_r_cutoff);
|
||||||
|
|
||||||
Vec3<real> r = {2.0, 0.0, 0.0};
|
Vec3<real> r = {2.0, 0.0, 0.0};
|
||||||
float4 result1 = lj.calc_force_and_energy(r);
|
auto result1 = lj.calc_force_and_energy(r);
|
||||||
float4 result2 = lj2.calc_force_and_energy(r);
|
auto result2 = lj2.calc_force_and_energy(r);
|
||||||
|
|
||||||
results->force_energy_values[7] = result2;
|
results->energy_values[7] = result2.energy;
|
||||||
|
results->force_values[7] = result2.force;
|
||||||
|
|
||||||
results->parameter_variation_pass =
|
results->parameter_variation_pass = (result1.energy != result2.energy) &&
|
||||||
(result1.w != result2.w) && (result1.x != result2.x);
|
(result1.force.x != result2.force.x);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Exact Value Check Test
|
// Exact Value Check Test
|
||||||
{
|
{
|
||||||
LennardJones lj_exact(1.0, 1.0, 3.0);
|
LennardJones lj_exact(1.0, 1.0, 3.0);
|
||||||
Vec3<real> r = {1.5, 0.0, 0.0};
|
Vec3<real> r = {1.5, 0.0, 0.0};
|
||||||
float4 result = lj_exact.calc_force_and_energy(r);
|
auto result = lj_exact.calc_force_and_energy(r);
|
||||||
|
|
||||||
results->force_energy_values[8] = result;
|
results->energy_values[8] = result.energy;
|
||||||
|
results->force_values[8] = result.force;
|
||||||
|
|
||||||
real expected_energy = 4.0 * (pow(1.0 / 1.5, 12) - pow(1.0 / 1.5, 6));
|
real expected_energy = 4.0 * (pow(1.0 / 1.5, 12) - pow(1.0 / 1.5, 6));
|
||||||
real expected_force =
|
real expected_force =
|
||||||
24.0 * (pow(1.0 / 1.5, 6) - 2.0 * pow(1.0 / 1.5, 12)) / 1.5;
|
24.0 * (pow(1.0 / 1.5, 6) - 2.0 * pow(1.0 / 1.5, 12)) / 1.5;
|
||||||
|
|
||||||
results->exact_value_check_pass =
|
results->exact_value_check_pass =
|
||||||
(fabs(result.w - expected_energy) < tolerance) &&
|
(fabs(result.energy - expected_energy) < tolerance) &&
|
||||||
(fabs(result.x + expected_force) < tolerance) &&
|
(fabs(result.force.x + expected_force) < tolerance) &&
|
||||||
(fabs(result.y) < tolerance) && (fabs(result.z) < tolerance);
|
(fabs(result.force.y) < tolerance) &&
|
||||||
|
(fabs(result.force.z) < tolerance);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Near Cutoff Test
|
// Near Cutoff Test
|
||||||
|
@ -161,18 +173,16 @@ __global__ void lennard_jones_test_kernel(TestResults *results) {
|
||||||
Vec3<real> r_inside = {inside_cutoff, 0.0, 0.0};
|
Vec3<real> r_inside = {inside_cutoff, 0.0, 0.0};
|
||||||
Vec3<real> r_outside = {outside_cutoff, 0.0, 0.0};
|
Vec3<real> r_outside = {outside_cutoff, 0.0, 0.0};
|
||||||
|
|
||||||
float4 result_inside = lj.calc_force_and_energy(r_inside);
|
auto result_inside = lj.calc_force_and_energy(r_inside);
|
||||||
float4 result_outside = lj.calc_force_and_energy(r_outside);
|
auto result_outside = lj.calc_force_and_energy(r_outside);
|
||||||
|
|
||||||
results->force_energy_values[9] = result_inside;
|
results->energy_values[9] = result_inside.energy;
|
||||||
|
results->force_values[9] = result_inside.force;
|
||||||
|
|
||||||
results->near_cutoff_pass =
|
results->near_cutoff_pass =
|
||||||
(result_inside.w != 0.0) && (result_inside.x != 0.0) &&
|
(result_inside.energy != 0.0) && (result_inside.force.x != 0.0) &&
|
||||||
(result_outside.w == 0.0) &&
|
(result_outside.energy == 0.0) &&
|
||||||
vec3_near(
|
vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result_outside.force, tolerance);
|
||||||
Vec3<real>{0.0, 0.0, 0.0},
|
|
||||||
Vec3<real>{result_outside.x, result_outside.y, result_outside.z},
|
|
||||||
tolerance);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -240,48 +250,44 @@ TEST_F(LennardJonesCudaTest, DeviceZeroDistance) {
|
||||||
auto results = runDeviceTests();
|
auto results = runDeviceTests();
|
||||||
EXPECT_TRUE(results.zero_distance_pass)
|
EXPECT_TRUE(results.zero_distance_pass)
|
||||||
<< "Zero distance test failed on device. Energy: "
|
<< "Zero distance test failed on device. Energy: "
|
||||||
<< results.force_energy_values[0].w << ", Force: ("
|
<< results.energy_values[0] << ", Force: (" << results.force_values[0].x
|
||||||
<< results.force_energy_values[0].x << ", "
|
<< ", " << results.force_values[0].y << ", " << results.force_values[0].z
|
||||||
<< results.force_energy_values[0].y << ", "
|
<< ")";
|
||||||
<< results.force_energy_values[0].z << ")";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LennardJonesCudaTest, DeviceBeyondCutoff) {
|
TEST_F(LennardJonesCudaTest, DeviceBeyondCutoff) {
|
||||||
auto results = runDeviceTests();
|
auto results = runDeviceTests();
|
||||||
EXPECT_TRUE(results.beyond_cutoff_pass)
|
EXPECT_TRUE(results.beyond_cutoff_pass)
|
||||||
<< "Beyond cutoff test failed on device. Energy: "
|
<< "Beyond cutoff test failed on device. Energy: "
|
||||||
<< results.force_energy_values[1].w;
|
<< results.energy_values[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LennardJonesCudaTest, DeviceAtMinimum) {
|
TEST_F(LennardJonesCudaTest, DeviceAtMinimum) {
|
||||||
auto results = runDeviceTests();
|
auto results = runDeviceTests();
|
||||||
EXPECT_TRUE(results.at_minimum_pass)
|
EXPECT_TRUE(results.at_minimum_pass)
|
||||||
<< "At minimum test failed on device. Energy: "
|
<< "At minimum test failed on device. Energy: "
|
||||||
<< results.force_energy_values[2].w;
|
<< results.energy_values[2];
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LennardJonesCudaTest, DeviceAtEquilibrium) {
|
TEST_F(LennardJonesCudaTest, DeviceAtEquilibrium) {
|
||||||
auto results = runDeviceTests();
|
auto results = runDeviceTests();
|
||||||
EXPECT_TRUE(results.at_equilibrium_pass)
|
EXPECT_TRUE(results.at_equilibrium_pass)
|
||||||
<< "At equilibrium test failed on device. Energy: "
|
<< "At equilibrium test failed on device. Energy: "
|
||||||
<< results.force_energy_values[3].w
|
<< results.energy_values[3] << ", Force x: " << results.force_values[3].x;
|
||||||
<< ", Force x: " << results.force_energy_values[3].x;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LennardJonesCudaTest, DeviceRepulsiveRegion) {
|
TEST_F(LennardJonesCudaTest, DeviceRepulsiveRegion) {
|
||||||
auto results = runDeviceTests();
|
auto results = runDeviceTests();
|
||||||
EXPECT_TRUE(results.repulsive_region_pass)
|
EXPECT_TRUE(results.repulsive_region_pass)
|
||||||
<< "Repulsive region test failed on device. Energy: "
|
<< "Repulsive region test failed on device. Energy: "
|
||||||
<< results.force_energy_values[4].w
|
<< results.energy_values[4] << ", Force x: " << results.force_values[4].x;
|
||||||
<< ", Force x: " << results.force_energy_values[4].x;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LennardJonesCudaTest, DeviceAttractiveRegion) {
|
TEST_F(LennardJonesCudaTest, DeviceAttractiveRegion) {
|
||||||
auto results = runDeviceTests();
|
auto results = runDeviceTests();
|
||||||
EXPECT_TRUE(results.attractive_region_pass)
|
EXPECT_TRUE(results.attractive_region_pass)
|
||||||
<< "Attractive region test failed on device. Energy: "
|
<< "Attractive region test failed on device. Energy: "
|
||||||
<< results.force_energy_values[5].w
|
<< results.energy_values[5] << ", Force x: " << results.force_values[5].x;
|
||||||
<< ", Force x: " << results.force_energy_values[5].x;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LennardJonesCudaTest, DeviceArbitraryDirection) {
|
TEST_F(LennardJonesCudaTest, DeviceArbitraryDirection) {
|
||||||
|
@ -300,13 +306,12 @@ TEST_F(LennardJonesCudaTest, DeviceExactValueCheck) {
|
||||||
auto results = runDeviceTests();
|
auto results = runDeviceTests();
|
||||||
EXPECT_TRUE(results.exact_value_check_pass)
|
EXPECT_TRUE(results.exact_value_check_pass)
|
||||||
<< "Exact value check test failed on device. Energy: "
|
<< "Exact value check test failed on device. Energy: "
|
||||||
<< results.force_energy_values[8].w
|
<< results.energy_values[8] << ", Force x: " << results.force_values[8].x;
|
||||||
<< ", Force x: " << results.force_energy_values[8].x;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LennardJonesCudaTest, DeviceNearCutoff) {
|
TEST_F(LennardJonesCudaTest, DeviceNearCutoff) {
|
||||||
auto results = runDeviceTests();
|
auto results = runDeviceTests();
|
||||||
EXPECT_TRUE(results.near_cutoff_pass)
|
EXPECT_TRUE(results.near_cutoff_pass)
|
||||||
<< "Near cutoff test failed on device. Inside energy: "
|
<< "Near cutoff test failed on device. Inside energy: "
|
||||||
<< results.force_energy_values[9].w;
|
<< results.energy_values[9];
|
||||||
}
|
}
|
||||||
|
|
9
tests/unit_tests/CMakeLists.txt
Normal file
9
tests/unit_tests/CMakeLists.txt
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR})
|
||||||
|
|
||||||
|
add_executable(${NAME}_tests
|
||||||
|
test_potential.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
target_link_libraries(${NAME}_tests gtest gtest_main)
|
||||||
|
target_link_libraries(${NAME}_tests ${CMAKE_PROJECT_NAME}_cuda_lib)
|
||||||
|
add_test(NAME ${NAME}Tests COMMAND ${CMAKE_BINARY_DIR}/tests/unit_tests/${NAME}_tests)
|
5
tests/unit_tests/test_example.cpp
Normal file
5
tests/unit_tests/test_example.cpp
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
|
TEST(Example, Equals) {
|
||||||
|
EXPECT_EQ(1, 1);
|
||||||
|
}
|
174
tests/unit_tests/test_potential.cpp
Normal file
174
tests/unit_tests/test_potential.cpp
Normal file
|
@ -0,0 +1,174 @@
|
||||||
|
#include "potentials/pair_potentials.cuh"
|
||||||
|
#include "precision.hpp"
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
class LennardJonesTest : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
void SetUp() override {
|
||||||
|
// Default parameters
|
||||||
|
sigma = 1.0;
|
||||||
|
epsilon = 1.0;
|
||||||
|
r_cutoff = 2.5;
|
||||||
|
|
||||||
|
// Create default LennardJones object
|
||||||
|
lj = new LennardJones(sigma, epsilon, r_cutoff);
|
||||||
|
}
|
||||||
|
|
||||||
|
void TearDown() override { delete lj; }
|
||||||
|
|
||||||
|
real sigma;
|
||||||
|
real epsilon;
|
||||||
|
real r_cutoff;
|
||||||
|
LennardJones *lj;
|
||||||
|
|
||||||
|
// Helper function to compare Vec3 values with tolerance
|
||||||
|
void expect_vec3_near(const Vec3<real> &expected, const Vec3<real> &actual,
|
||||||
|
real tolerance) {
|
||||||
|
EXPECT_NEAR(expected.x, actual.x, tolerance);
|
||||||
|
EXPECT_NEAR(expected.y, actual.y, tolerance);
|
||||||
|
EXPECT_NEAR(expected.z, actual.z, tolerance);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(LennardJonesTest, ZeroDistance) {
|
||||||
|
// At zero distance, the calculation should return zero force and energy
|
||||||
|
Vec3<real> r = {0.0, 0.0, 0.0};
|
||||||
|
auto result = lj->calc_force_and_energy(r);
|
||||||
|
|
||||||
|
EXPECT_EQ(0.0, result.energy);
|
||||||
|
expect_vec3_near({0.0, 0.0, 0.0}, result.force, 1e-10);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LennardJonesTest, BeyondCutoff) {
|
||||||
|
// Distance beyond cutoff should return zero force and energy
|
||||||
|
Vec3<real> r = {3.0, 0.0, 0.0}; // 3.0 > r_cutoff (2.5)
|
||||||
|
auto result = lj->calc_force_and_energy(r);
|
||||||
|
|
||||||
|
EXPECT_EQ(0.0, result.energy);
|
||||||
|
expect_vec3_near({0.0, 0.0, 0.0}, result.force, 1e-10);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LennardJonesTest, AtMinimum) {
|
||||||
|
// The LJ potential has a minimum at r = 2^(1/6) * sigma
|
||||||
|
real min_dist = std::pow(2.0, 1.0 / 6.0) * sigma;
|
||||||
|
Vec3<real> r = {min_dist, 0.0, 0.0};
|
||||||
|
auto result = lj->calc_force_and_energy(r);
|
||||||
|
|
||||||
|
// At minimum, force should be close to zero
|
||||||
|
EXPECT_NEAR(-epsilon, result.energy, 1e-10);
|
||||||
|
expect_vec3_near({0.0, 0.0, 0.0}, result.force, 1e-10);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LennardJonesTest, AtEquilibrium) {
|
||||||
|
// At r = sigma, the energy should be zero and force should be repulsive
|
||||||
|
Vec3<real> r = {sigma, 0.0, 0.0};
|
||||||
|
auto result = lj->calc_force_and_energy(r);
|
||||||
|
|
||||||
|
EXPECT_NEAR(0.0, result.energy, 1e-10);
|
||||||
|
EXPECT_GT(result.force.x,
|
||||||
|
0.0); // Force should be repulsive (positive x-direction)
|
||||||
|
EXPECT_NEAR(0.0, result.force.y, 1e-10);
|
||||||
|
EXPECT_NEAR(0.0, result.force.z, 1e-10);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LennardJonesTest, RepulsiveRegion) {
|
||||||
|
// Test in the repulsive region (r < sigma)
|
||||||
|
Vec3<real> r = {0.8 * sigma, 0.0, 0.0};
|
||||||
|
auto result = lj->calc_force_and_energy(r);
|
||||||
|
|
||||||
|
// Energy should be positive and force should be repulsive
|
||||||
|
EXPECT_GT(result.energy, 0.0);
|
||||||
|
EXPECT_GT(result.force.x, 0.0); // Force should be repulsive
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LennardJonesTest, AttractiveRegion) {
|
||||||
|
// Test in the attractive region (sigma < r < r_min)
|
||||||
|
Vec3<real> r = {1.5 * sigma, 0.0, 0.0};
|
||||||
|
auto result = lj->calc_force_and_energy(r);
|
||||||
|
|
||||||
|
// Energy should be negative and force should be attractive
|
||||||
|
EXPECT_LT(result.energy, 0.0);
|
||||||
|
EXPECT_LT(result.force.x,
|
||||||
|
0.0); // Force should be attractive (negative x-direction)
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LennardJonesTest, ArbitraryDirection) {
|
||||||
|
// Test with a vector in an arbitrary direction
|
||||||
|
Vec3<real> r = {1.0, 1.0, 1.0};
|
||||||
|
auto result = lj->calc_force_and_energy(r);
|
||||||
|
|
||||||
|
// The force should be in the same direction as r but opposite sign
|
||||||
|
// (attractive region)
|
||||||
|
real r_mag = std::sqrt(r.squared_norm2());
|
||||||
|
|
||||||
|
// Calculate expected force direction (should be along -r)
|
||||||
|
Vec3<real> normalized_r = r.scale(1.0 / r_mag);
|
||||||
|
real force_dot_r = result.force.x * normalized_r.x +
|
||||||
|
result.force.y * normalized_r.y +
|
||||||
|
result.force.z * normalized_r.z;
|
||||||
|
|
||||||
|
// In this case, we're at r = sqrt(3) * sigma which is in attractive region
|
||||||
|
EXPECT_LT(force_dot_r, 0.0); // Force should be attractive
|
||||||
|
|
||||||
|
// Force should be symmetric in all dimensions for this vector
|
||||||
|
EXPECT_NEAR(result.force.x, result.force.y, 1e-10);
|
||||||
|
EXPECT_NEAR(result.force.y, result.force.z, 1e-10);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LennardJonesTest, ParameterVariation) {
|
||||||
|
// Test with different parameter values
|
||||||
|
real new_sigma = 2.0;
|
||||||
|
real new_epsilon = 0.5;
|
||||||
|
real new_r_cutoff = 5.0;
|
||||||
|
|
||||||
|
LennardJones lj2(new_sigma, new_epsilon, new_r_cutoff);
|
||||||
|
|
||||||
|
Vec3<real> r = {2.0, 0.0, 0.0};
|
||||||
|
auto result1 = lj->calc_force_and_energy(r);
|
||||||
|
auto result2 = lj2.calc_force_and_energy(r);
|
||||||
|
|
||||||
|
// Results should be different with different parameters
|
||||||
|
EXPECT_NE(result1.energy, result2.energy);
|
||||||
|
EXPECT_NE(result1.force.x, result2.force.x);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LennardJonesTest, ExactValueCheck) {
|
||||||
|
// Test with pre-calculated values for a specific case
|
||||||
|
LennardJones lj_exact(1.0, 1.0, 3.0);
|
||||||
|
Vec3<real> r = {1.5, 0.0, 0.0};
|
||||||
|
auto result = lj_exact.calc_force_and_energy(r);
|
||||||
|
|
||||||
|
// Pre-calculated values (you may need to adjust these based on your specific
|
||||||
|
// implementation)
|
||||||
|
real expected_energy =
|
||||||
|
4.0 * (std::pow(1.0 / 1.5, 12) - std::pow(1.0 / 1.5, 6));
|
||||||
|
real expected_force =
|
||||||
|
24.0 * (std::pow(1.0 / 1.5, 6) - 2.0 * std::pow(1.0 / 1.5, 12)) / 1.5;
|
||||||
|
|
||||||
|
EXPECT_NEAR(expected_energy, result.energy, 1e-10);
|
||||||
|
EXPECT_NEAR(-expected_force, result.force.x,
|
||||||
|
1e-10); // Negative because force is attractive
|
||||||
|
EXPECT_NEAR(0.0, result.force.y, 1e-10);
|
||||||
|
EXPECT_NEAR(0.0, result.force.z, 1e-10);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LennardJonesTest, NearCutoff) {
|
||||||
|
// Test behavior just inside and just outside the cutoff
|
||||||
|
real inside_cutoff = r_cutoff - 0.01;
|
||||||
|
real outside_cutoff = r_cutoff + 0.01;
|
||||||
|
|
||||||
|
Vec3<real> r_inside = {inside_cutoff, 0.0, 0.0};
|
||||||
|
Vec3<real> r_outside = {outside_cutoff, 0.0, 0.0};
|
||||||
|
|
||||||
|
auto result_inside = lj->calc_force_and_energy(r_inside);
|
||||||
|
auto result_outside = lj->calc_force_and_energy(r_outside);
|
||||||
|
|
||||||
|
// Inside should have non-zero values
|
||||||
|
EXPECT_NE(0.0, result_inside.energy);
|
||||||
|
EXPECT_NE(0.0, result_inside.force.x);
|
||||||
|
|
||||||
|
// Outside should be zero
|
||||||
|
EXPECT_EQ(0.0, result_outside.energy);
|
||||||
|
expect_vec3_near({0.0, 0.0, 0.0}, result_outside.force, 1e-10);
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue