Compare commits
3 commits
7f04ae793a
...
ac44ceaab1
Author | SHA1 | Date | |
---|---|---|---|
ac44ceaab1 | |||
2d948a7e76 | |||
a638c4f388 |
8 changed files with 300 additions and 335 deletions
|
@ -1,15 +1,14 @@
|
||||||
project(${NAME}_cuda_lib CUDA CXX)
|
project(${NAME}_cuda_lib CUDA CXX)
|
||||||
|
|
||||||
set(HEADER_FILES
|
set(HEADER_FILES
|
||||||
pair_potentials.cuh
|
potentials/pair_potentials.cuh
|
||||||
forces.cuh
|
forces.cuh
|
||||||
)
|
)
|
||||||
set(SOURCE_FILES
|
set(SOURCE_FILES
|
||||||
forces.cu
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# The library contains header and source files.
|
# The library contains header and source files.
|
||||||
add_library(${NAME}_cuda_lib STATIC
|
add_library(${NAME}_cuda_lib INTERFACE
|
||||||
${SOURCE_FILES}
|
${SOURCE_FILES}
|
||||||
${HEADER_FILES}
|
${HEADER_FILES}
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,36 +0,0 @@
|
||||||
#include "forces.cuh"
|
|
||||||
|
|
||||||
__global__ void CAC::calc_forces_and_energies(real *xs, real *forces,
|
|
||||||
real *energies, int n_particles,
|
|
||||||
real *box_len,
|
|
||||||
PairPotential &potential) {
|
|
||||||
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
if (i < n_particles) {
|
|
||||||
real xi = xs[3 * i];
|
|
||||||
real yi = xs[3 * i + 1];
|
|
||||||
real zi = xs[3 * i + 2];
|
|
||||||
|
|
||||||
for (int j = 0; j < n_particles; j++) {
|
|
||||||
if (i != j) {
|
|
||||||
real xj = xs[3 * j];
|
|
||||||
real yj = xs[3 * j + 1];
|
|
||||||
real zj = xs[3 * j + 2];
|
|
||||||
|
|
||||||
real dx = xi - xj;
|
|
||||||
real dy = yi - yj;
|
|
||||||
real dz = zi - zj;
|
|
||||||
|
|
||||||
// Apply periodic boundary conditions
|
|
||||||
dx -= box_len[0] * round(dx / box_len[0]);
|
|
||||||
dy -= box_len[1] * round(dy / box_len[1]);
|
|
||||||
dz -= box_len[2] * round(dz / box_len[2]);
|
|
||||||
|
|
||||||
ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz});
|
|
||||||
forces[3 * i] += sol.force.x;
|
|
||||||
forces[3 * i + 1] += sol.force.y;
|
|
||||||
forces[3 * i + 2] += sol.force.z;
|
|
||||||
energies[i] = sol.energy;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,19 +1,80 @@
|
||||||
#ifndef FORCES_CUH
|
#ifndef FORCES_CUH
|
||||||
#define FORCES_CUH
|
#define FORCES_CUH
|
||||||
|
#include "potentials/pair_potentials.cuh"
|
||||||
#include "pair_potentials.cuh"
|
|
||||||
#include "precision.hpp"
|
#include "precision.hpp"
|
||||||
|
#include <cstdio>
|
||||||
|
#include <type_traits>
|
||||||
|
#include <variant>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
namespace CAC {
|
namespace CAC {
|
||||||
/**
|
|
||||||
* Calculate forces and energies using CUDA for acceleration
|
inline void reset_forces_and_energies(int n_particles, real *forces,
|
||||||
* This code currently only accepts a single PairPotential object and does an
|
real *energies) {
|
||||||
* n^2 force calculation. Future improvements will:
|
cudaMemset(forces, 0, n_particles * sizeof(real) * 3);
|
||||||
* - Allow for neighbor listing
|
cudaMemset(energies, 0, n_particles * sizeof(real));
|
||||||
* - Allow for overlaid force calculations
|
}
|
||||||
*/
|
|
||||||
|
template <typename PotentialType>
|
||||||
__global__ void calc_forces_and_energies(real *xs, real *forces, real *energies,
|
__global__ void calc_forces_and_energies(real *xs, real *forces, real *energies,
|
||||||
int n_particles, real *box_bd,
|
int n_particles, real *box_len,
|
||||||
PairPotential &potential);
|
PotentialType potential) {
|
||||||
|
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
|
if (i == 0) {
|
||||||
|
printf("n_particles: %d\n", n_particles);
|
||||||
|
printf("box_len: %f %f %f\n", box_len[0], box_len[1], box_len[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i < n_particles) {
|
||||||
|
printf("Thread %d, Block %d\n", threadIdx.x, blockIdx.x);
|
||||||
|
real xi = xs[3 * i];
|
||||||
|
real yi = xs[3 * i + 1];
|
||||||
|
real zi = xs[3 * i + 2];
|
||||||
|
|
||||||
|
for (int j = 0; j < n_particles; j++) {
|
||||||
|
if (i != j) {
|
||||||
|
real xj = xs[3 * j];
|
||||||
|
real yj = xs[3 * j + 1];
|
||||||
|
real zj = xs[3 * j + 2];
|
||||||
|
|
||||||
|
real dx = xi - xj;
|
||||||
|
real dy = yi - yj;
|
||||||
|
real dz = zi - zj;
|
||||||
|
|
||||||
|
// Apply periodic boundary conditions
|
||||||
|
dx -= box_len[0] * round(dx / box_len[0]);
|
||||||
|
dy -= box_len[1] * round(dy / box_len[1]);
|
||||||
|
dz -= box_len[2] * round(dz / box_len[2]);
|
||||||
|
|
||||||
|
ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz});
|
||||||
|
forces[3 * i] += sol.force.x;
|
||||||
|
forces[3 * i + 1] += sol.force.y;
|
||||||
|
forces[3 * i + 2] += sol.force.z;
|
||||||
|
energies[i] += sol.energy;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void launch_force_kernels(real *xs, real *forces, real *energies,
|
||||||
|
int n_particles, real *box_len,
|
||||||
|
std::vector<PairPotentials> potentials,
|
||||||
|
int grid_size, int block_size) {
|
||||||
|
|
||||||
|
reset_forces_and_energies(n_particles, forces, energies);
|
||||||
|
|
||||||
|
for (const auto &potential : potentials) {
|
||||||
|
std::visit(
|
||||||
|
[&](const auto &potential) {
|
||||||
|
using PotentialType = std::decay_t<decltype(potential)>;
|
||||||
|
calc_forces_and_energies<PotentialType><<<grid_size, block_size>>>(
|
||||||
|
xs, forces, energies, n_particles, box_len, potential);
|
||||||
|
},
|
||||||
|
potential);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
}
|
||||||
|
}
|
||||||
} // namespace CAC
|
} // namespace CAC
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,91 +0,0 @@
|
||||||
#ifndef POTENTIALS_CUH
|
|
||||||
#define POTENTIALS_CUH
|
|
||||||
|
|
||||||
#include "precision.hpp"
|
|
||||||
#include "vec3.h"
|
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
|
||||||
#define CUDA_CALLABLE __host__ __device__
|
|
||||||
#else
|
|
||||||
#define CUDA_CALLABLE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Result struct for the Pair Potential
|
|
||||||
*/
|
|
||||||
struct ForceAndEnergy {
|
|
||||||
real energy;
|
|
||||||
Vec3<real> force;
|
|
||||||
|
|
||||||
CUDA_CALLABLE inline static ForceAndEnergy zero() {
|
|
||||||
return {0.0, {0.0, 0.0, 0.0}};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Abstract implementation of a Pair Potential.
|
|
||||||
* Pair potentials are potentials which depend solely on the distance
|
|
||||||
* between two particles. These do not include multi-body potentials such as
|
|
||||||
* EAM
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
struct PairPotential {
|
|
||||||
real m_rcutoffsq;
|
|
||||||
|
|
||||||
CUDA_CALLABLE PairPotential(real rcutoff) : m_rcutoffsq(rcutoff * rcutoff) {};
|
|
||||||
#ifdef __CUDACC__
|
|
||||||
CUDA_CALLABLE ~PairPotential();
|
|
||||||
#else
|
|
||||||
virtual ~PairPotential() = 0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculate the force and energy for a specific atom pair based on a
|
|
||||||
* displacement vector r.
|
|
||||||
*/
|
|
||||||
CUDA_CALLABLE virtual ForceAndEnergy calc_force_and_energy(Vec3<real> r) = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculate the Lennard-Jones energy and force for the current particle pair
|
|
||||||
* described by displacement vector r
|
|
||||||
*/
|
|
||||||
struct LennardJones : PairPotential {
|
|
||||||
real m_epsilon;
|
|
||||||
real m_sigma;
|
|
||||||
|
|
||||||
CUDA_CALLABLE LennardJones(real sigma, real epsilon, real rcutoff)
|
|
||||||
: PairPotential(rcutoff), m_epsilon(epsilon), m_sigma(sigma) {};
|
|
||||||
|
|
||||||
CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3<real> r) {
|
|
||||||
real rmagsq = r.squared_norm2();
|
|
||||||
if (rmagsq < this->m_rcutoffsq && rmagsq > 0.0) {
|
|
||||||
real inv_rmag = 1 / std::sqrt(rmagsq);
|
|
||||||
|
|
||||||
// Pre-Compute the terms (doing this saves on multiple devisions/pow
|
|
||||||
// function call)
|
|
||||||
real sigma_r = m_sigma * inv_rmag;
|
|
||||||
real sigma_r6 = sigma_r * sigma_r * sigma_r * sigma_r * sigma_r * sigma_r;
|
|
||||||
real sigma_r12 = sigma_r6 * sigma_r6;
|
|
||||||
|
|
||||||
// Get the energy
|
|
||||||
real energy = 4.0 * m_epsilon * (sigma_r12 - sigma_r6);
|
|
||||||
|
|
||||||
// Get the force vector
|
|
||||||
real force_mag =
|
|
||||||
4.0 * m_epsilon *
|
|
||||||
(12.0 * sigma_r12 * inv_rmag - 6.0 * sigma_r6 * inv_rmag);
|
|
||||||
Vec3<real> force = r.scale(force_mag * inv_rmag);
|
|
||||||
|
|
||||||
return {energy, force};
|
|
||||||
|
|
||||||
} else {
|
|
||||||
return ForceAndEnergy::zero();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
CUDA_CALLABLE inline ~LennardJones(){};
|
|
||||||
};
|
|
||||||
|
|
||||||
inline PairPotential::~PairPotential() {};
|
|
||||||
#endif
|
|
118
kernels/potentials/pair_potentials.cuh
Normal file
118
kernels/potentials/pair_potentials.cuh
Normal file
|
@ -0,0 +1,118 @@
|
||||||
|
#ifndef POTENTIALS_CUH
|
||||||
|
#define POTENTIALS_CUH
|
||||||
|
|
||||||
|
#include "precision.hpp"
|
||||||
|
#include "vec3.h"
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <variant>
|
||||||
|
|
||||||
|
#ifdef __CUDACC__
|
||||||
|
#define CUDA_CALLABLE __host__ __device__
|
||||||
|
#else
|
||||||
|
#define CUDA_CALLABLE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result struct for the Pair Potential
|
||||||
|
*/
|
||||||
|
struct ForceAndEnergy {
|
||||||
|
real energy;
|
||||||
|
Vec3<real> force;
|
||||||
|
|
||||||
|
CUDA_CALLABLE inline static ForceAndEnergy zero() {
|
||||||
|
return {0.0, {0.0, 0.0, 0.0}};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate the Lennard-Jones energy and force for the current particle
|
||||||
|
* pair described by displacement vector r
|
||||||
|
*/
|
||||||
|
struct LennardJones {
|
||||||
|
real m_sigma;
|
||||||
|
real m_epsilon;
|
||||||
|
real m_rcutoffsq;
|
||||||
|
|
||||||
|
CUDA_CALLABLE LennardJones(real sigma, real epsilon, real rcutoff) {
|
||||||
|
m_sigma = sigma;
|
||||||
|
m_epsilon = epsilon;
|
||||||
|
m_rcutoffsq = rcutoff * rcutoff;
|
||||||
|
};
|
||||||
|
|
||||||
|
CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3<real> r) {
|
||||||
|
real rmagsq = r.squared_norm2();
|
||||||
|
if (rmagsq < m_rcutoffsq && rmagsq > 0.0) {
|
||||||
|
real inv_rmag = 1 / sqrt(rmagsq);
|
||||||
|
|
||||||
|
// Pre-Compute the terms (doing this saves on multiple devisions/pow
|
||||||
|
// function call)
|
||||||
|
real sigma_r = m_sigma * inv_rmag;
|
||||||
|
real sigma_r6 = sigma_r * sigma_r * sigma_r * sigma_r * sigma_r * sigma_r;
|
||||||
|
real sigma_r12 = sigma_r6 * sigma_r6;
|
||||||
|
|
||||||
|
// Get the energy
|
||||||
|
real energy = 4.0 * m_epsilon * (sigma_r12 - sigma_r6);
|
||||||
|
|
||||||
|
// Get the force vector
|
||||||
|
real force_mag =
|
||||||
|
4.0 * m_epsilon *
|
||||||
|
(12.0 * sigma_r12 * inv_rmag - 6.0 * sigma_r6 * inv_rmag);
|
||||||
|
Vec3<real> force = r.scale(force_mag * inv_rmag);
|
||||||
|
|
||||||
|
return {energy, force};
|
||||||
|
|
||||||
|
} else {
|
||||||
|
return ForceAndEnergy::zero();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate the Morse potential energy and force for the current particle pair
|
||||||
|
* described by displacement vector r
|
||||||
|
*/
|
||||||
|
struct Morse {
|
||||||
|
real m_D; // Depth of the potential well
|
||||||
|
real m_a; // Width of the potential
|
||||||
|
real m_r0; // Equilibrium bond distance
|
||||||
|
real m_rcutoffsq; // Cutoff distance squared
|
||||||
|
|
||||||
|
CUDA_CALLABLE Morse(real D, real a, real r0, real rcutoff) {
|
||||||
|
m_D = D;
|
||||||
|
m_a = a;
|
||||||
|
m_r0 = r0;
|
||||||
|
m_rcutoffsq = rcutoff * rcutoff;
|
||||||
|
};
|
||||||
|
|
||||||
|
CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3<real> r) {
|
||||||
|
real rmagsq = r.squared_norm2();
|
||||||
|
if (rmagsq < m_rcutoffsq && rmagsq > 0.0) {
|
||||||
|
real rmag = sqrt(rmagsq);
|
||||||
|
real dr = rmag - m_r0;
|
||||||
|
|
||||||
|
// Compute exponentials
|
||||||
|
real exp_a_dr = exp(-m_a * dr);
|
||||||
|
real exp_2a_dr = exp_a_dr * exp_a_dr;
|
||||||
|
|
||||||
|
// Energy: V(r) = D * (exp(-2a(r - r0)) - 2*exp(-a(r - r0)))
|
||||||
|
real energy = m_D * (exp_2a_dr - 2.0 * exp_a_dr);
|
||||||
|
|
||||||
|
// Force magnitude: F(r) = 2aD * (exp(-2a(r - r0)) - exp(-a(r - r0)))
|
||||||
|
real force_mag = 2.0 * m_a * m_D * (exp_2a_dr - exp_a_dr);
|
||||||
|
|
||||||
|
// Direction: normalized vector
|
||||||
|
Vec3<real> force = r.scale(force_mag / rmag);
|
||||||
|
|
||||||
|
return {energy, force};
|
||||||
|
|
||||||
|
} else {
|
||||||
|
return ForceAndEnergy::zero();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
// Variant type for storing pair potential types
|
||||||
|
using PairPotentials = std::variant<LennardJones, Morse>;
|
||||||
|
|
||||||
|
#endif
|
|
@ -5,11 +5,14 @@
|
||||||
|
|
||||||
// Include your header files
|
// Include your header files
|
||||||
#include "forces.cuh"
|
#include "forces.cuh"
|
||||||
#include "pair_potentials.cuh"
|
#include "potentials/pair_potentials.cuh"
|
||||||
#include "precision.hpp"
|
#include "precision.hpp"
|
||||||
|
|
||||||
class CudaKernelTest : public ::testing::Test {
|
class CudaForceKernelTest : public ::testing::Test {
|
||||||
protected:
|
protected:
|
||||||
|
const int GRID_SIZE = 1;
|
||||||
|
const int BLOCK_SIZE = 4;
|
||||||
|
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
// Set up CUDA device
|
// Set up CUDA device
|
||||||
cudaError_t err = cudaSetDevice(0);
|
cudaError_t err = cudaSetDevice(0);
|
||||||
|
@ -50,53 +53,52 @@ protected:
|
||||||
checkCudaError(cudaFree(device_ptr), "cudaFree");
|
checkCudaError(cudaFree(device_ptr), "cudaFree");
|
||||||
return host_data;
|
return host_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper function to run the force calculation kernel
|
||||||
|
std::pair<std::vector<real>, std::vector<real>>
|
||||||
|
run_force_calculation(int n_particles, const std::vector<real> &positions,
|
||||||
|
const std::vector<real> &box_dimensions) {
|
||||||
|
std::vector<real> forces(3 * n_particles, 0.0);
|
||||||
|
std::vector<real> energies(n_particles, 0.0);
|
||||||
|
|
||||||
|
real *d_positions = allocateAndCopyToGPU(positions);
|
||||||
|
real *d_forces = allocateAndCopyToGPU(forces);
|
||||||
|
real *d_energies = allocateAndCopyToGPU(energies);
|
||||||
|
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
||||||
|
|
||||||
|
std::vector<PairPotentials> potentials = {LennardJones(1.0, 1.0, 3.0)};
|
||||||
|
CAC::launch_force_kernels(d_positions, d_forces, d_energies, n_particles,
|
||||||
|
d_box_len, potentials, GRID_SIZE, BLOCK_SIZE);
|
||||||
|
|
||||||
|
checkCudaError(cudaGetLastError(), "kernel launch");
|
||||||
|
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
||||||
|
|
||||||
|
std::vector<real> result_forces =
|
||||||
|
copyFromGPUAndFree(d_forces, 3 * n_particles);
|
||||||
|
std::vector<real> result_energies =
|
||||||
|
copyFromGPUAndFree(d_energies, n_particles);
|
||||||
|
|
||||||
|
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
||||||
|
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
||||||
|
|
||||||
|
return {result_forces, result_energies};
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_F(CudaKernelTest, BasicFunctionalityTest) {
|
TEST_F(CudaForceKernelTest, BasicFunctionalityTest) {
|
||||||
const int n_particles = 4;
|
const int n_particles = 2;
|
||||||
const real tolerance = 1e-5;
|
const real tolerance = 1e-5;
|
||||||
|
|
||||||
// Set up test data - simple 2x2 grid of particles
|
// Set up test data - simple 2x2 grid of particles
|
||||||
std::vector<real> positions = {
|
std::vector<real> positions = {
|
||||||
0.0, 0.0, 0.0, // particle 0
|
0.0, 0.0, 0.0, // particle 0
|
||||||
1.0, 0.0, 0.0, // particle 1
|
1.0, 0.0, 0.0, // particle 1
|
||||||
0.0, 1.0, 0.0, // particle 2
|
|
||||||
1.0, 1.0, 0.0 // particle 3
|
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<real> forces(3 * n_particles, 0.0);
|
std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
||||||
std::vector<real> energies(n_particles, 0.0);
|
|
||||||
std::vector<real> box_dimensions = {10.0, 10.0,
|
|
||||||
10.0}; // Large box to avoid PBC effects
|
|
||||||
|
|
||||||
// Allocate GPU memory and copy data
|
auto [result_forces, result_energies] =
|
||||||
real *d_positions = allocateAndCopyToGPU(positions);
|
run_force_calculation(n_particles, positions, box_dimensions);
|
||||||
real *d_forces = allocateAndCopyToGPU(forces);
|
|
||||||
real *d_energies = allocateAndCopyToGPU(energies);
|
|
||||||
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
|
||||||
|
|
||||||
// Create Lennard-Jones potential (sigma=1.0, epsilon=1.0, rcutoff=3.0)
|
|
||||||
LennardJones potential(1.0, 1.0, 3.0);
|
|
||||||
|
|
||||||
// Launch kernel
|
|
||||||
dim3 blockSize(256);
|
|
||||||
dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
|
|
||||||
|
|
||||||
CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
|
|
||||||
d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
|
|
||||||
|
|
||||||
checkCudaError(cudaGetLastError(), "kernel launch");
|
|
||||||
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
|
||||||
|
|
||||||
// Copy results back to host
|
|
||||||
std::vector<real> result_forces =
|
|
||||||
copyFromGPUAndFree(d_forces, 3 * n_particles);
|
|
||||||
std::vector<real> result_energies =
|
|
||||||
copyFromGPUAndFree(d_energies, n_particles);
|
|
||||||
|
|
||||||
// Clean up remaining GPU memory
|
|
||||||
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
|
||||||
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
|
||||||
|
|
||||||
// Verify results - forces should be non-zero and energies should be
|
// Verify results - forces should be non-zero and energies should be
|
||||||
// calculated
|
// calculated
|
||||||
|
@ -117,161 +119,72 @@ TEST_F(CudaKernelTest, BasicFunctionalityTest) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPECT_FALSE(has_nonzero_force)
|
EXPECT_TRUE(has_nonzero_force)
|
||||||
<< "Expected non-zero forces between particles";
|
<< "Expected non-zero forces between particles";
|
||||||
EXPECT_TRUE(has_nonzero_energy) << "Expected non-zero energies for particles";
|
EXPECT_TRUE(has_nonzero_energy)
|
||||||
|
<< "Expected non-zero energies for particles ";
|
||||||
}
|
}
|
||||||
|
//
|
||||||
|
// TEST_F(CudaKernelTest, PeriodicBoundaryConditionsTest) {
|
||||||
|
// const int n_particles = 2;
|
||||||
|
// const real tolerance = 1e-5;
|
||||||
|
//
|
||||||
|
// // Place particles near opposite edges of a small box
|
||||||
|
// std::vector<real> positions = {
|
||||||
|
// 0.1, 0.0, 0.0, // particle 0 near left edge
|
||||||
|
// 4.9, 0.0, 0.0 // particle 1 near right edge
|
||||||
|
// };
|
||||||
|
// std::vector<real> box_dimensions = {5.0, 5.0, 5.0}; // Small box to test
|
||||||
|
// PBC
|
||||||
|
//
|
||||||
|
// auto [result_forces, result_energies] =
|
||||||
|
// run_force_calculation(n_particles, &positions, &box_dimensions);
|
||||||
|
//
|
||||||
|
// // With PBC, particles should interact as if they're close (distance ~0.2)
|
||||||
|
// // rather than far apart (distance ~4.8)
|
||||||
|
// EXPECT_GT(std::abs(result_forces[0]), tolerance)
|
||||||
|
// << "Expected significant force due to PBC";
|
||||||
|
// EXPECT_GT(std::abs(result_energies[0]), tolerance)
|
||||||
|
// << "Expected significant energy due to PBC";
|
||||||
|
// }
|
||||||
|
|
||||||
TEST_F(CudaKernelTest, PeriodicBoundaryConditionsTest) {
|
// TEST_F(CudaForceKernelTest, SingleParticleTest) {
|
||||||
const int n_particles = 2;
|
// const int n_particles = 1;
|
||||||
const real tolerance = 1e-5;
|
//
|
||||||
|
// std::vector<real> positions = {0.0, 0.0, 0.0};
|
||||||
|
// std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
||||||
|
//
|
||||||
|
// auto [result_forces, result_energies] =
|
||||||
|
// run_force_calculation(n_particles, positions, box_dimensions);
|
||||||
|
// // Single particle should have zero force and energy
|
||||||
|
// EXPECT_NEAR(result_forces[0], 0.0, 1e-10);
|
||||||
|
// EXPECT_NEAR(result_forces[1], 0.0, 1e-10);
|
||||||
|
// EXPECT_NEAR(result_forces[2], 0.0, 1e-10);
|
||||||
|
// EXPECT_NEAR(result_energies[0], 0.0, 1e-10);
|
||||||
|
// }
|
||||||
|
|
||||||
// Place particles near opposite edges of a small box
|
// TEST_F(CudaKernelTest, ForceSymmetryTest) {
|
||||||
std::vector<real> positions = {
|
// const int n_particles = 2;
|
||||||
0.1, 0.0, 0.0, // particle 0 near left edge
|
// const real tolerance = 1e-5;
|
||||||
4.9, 0.0, 0.0 // particle 1 near right edge
|
//
|
||||||
};
|
// std::vector<real> positions = {
|
||||||
|
// 0.0, 0.0, 0.0, // particle 0
|
||||||
std::vector<real> forces(3 * n_particles, 0.0);
|
// 1.5, 0.0, 0.0 // particle 1
|
||||||
std::vector<real> energies(n_particles, 0.0);
|
// };
|
||||||
std::vector<real> box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC
|
// std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
||||||
|
//
|
||||||
// Allocate GPU memory and copy data
|
// auto [result_forces, result_energies] =
|
||||||
real *d_positions = allocateAndCopyToGPU(positions);
|
// run_force_calculation(n_particles, &positions, &box_dimensions);
|
||||||
real *d_forces = allocateAndCopyToGPU(forces);
|
//
|
||||||
real *d_energies = allocateAndCopyToGPU(energies);
|
// // Newton's third law: forces should be equal and opposite
|
||||||
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
// EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance)
|
||||||
|
// << "Force x-components should be opposite";
|
||||||
// Create Lennard-Jones potential with large cutoff to ensure interaction
|
// EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance)
|
||||||
LennardJones potential(1.0, 1.0, 3.0);
|
// << "Force y-components should be opposite";
|
||||||
|
// EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance)
|
||||||
// Launch kernel
|
// << "Force z-components should be opposite";
|
||||||
dim3 blockSize(256);
|
//
|
||||||
dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
|
// // Energies should be equal for symmetric particles
|
||||||
|
// EXPECT_NEAR(result_energies[0], result_energies[1], tolerance)
|
||||||
CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
|
// << "Energies should be equal";
|
||||||
d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
|
// }
|
||||||
|
|
||||||
checkCudaError(cudaGetLastError(), "kernel launch");
|
|
||||||
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
|
||||||
|
|
||||||
// Copy results back to host
|
|
||||||
std::vector<real> result_forces =
|
|
||||||
copyFromGPUAndFree(d_forces, 3 * n_particles);
|
|
||||||
std::vector<real> result_energies =
|
|
||||||
copyFromGPUAndFree(d_energies, n_particles);
|
|
||||||
|
|
||||||
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
|
||||||
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
|
||||||
|
|
||||||
// With PBC, particles should interact as if they're close (distance ~0.2)
|
|
||||||
// rather than far apart (distance ~4.8)
|
|
||||||
EXPECT_GT(std::abs(result_forces[0]), tolerance)
|
|
||||||
<< "Expected significant force due to PBC";
|
|
||||||
EXPECT_GT(std::abs(result_energies[0]), tolerance)
|
|
||||||
<< "Expected significant energy due to PBC";
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(CudaKernelTest, SingleParticleTest) {
|
|
||||||
const int n_particles = 1;
|
|
||||||
|
|
||||||
std::vector<real> positions = {0.0, 0.0, 0.0};
|
|
||||||
std::vector<real> forces(3 * n_particles, 0.0);
|
|
||||||
std::vector<real> energies(n_particles, 0.0);
|
|
||||||
std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
|
||||||
|
|
||||||
real *d_positions = allocateAndCopyToGPU(positions);
|
|
||||||
real *d_forces = allocateAndCopyToGPU(forces);
|
|
||||||
real *d_energies = allocateAndCopyToGPU(energies);
|
|
||||||
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
|
||||||
|
|
||||||
LennardJones potential(1.0, 1.0, 3.0);
|
|
||||||
|
|
||||||
dim3 blockSize(256);
|
|
||||||
dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
|
|
||||||
|
|
||||||
CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
|
|
||||||
d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
|
|
||||||
|
|
||||||
checkCudaError(cudaGetLastError(), "kernel launch");
|
|
||||||
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
|
||||||
|
|
||||||
std::vector<real> result_forces =
|
|
||||||
copyFromGPUAndFree(d_forces, 3 * n_particles);
|
|
||||||
std::vector<real> result_energies =
|
|
||||||
copyFromGPUAndFree(d_energies, n_particles);
|
|
||||||
|
|
||||||
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
|
||||||
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
|
||||||
|
|
||||||
// Single particle should have zero force and energy
|
|
||||||
EXPECT_NEAR(result_forces[0], 0.0, 1e-10);
|
|
||||||
EXPECT_NEAR(result_forces[1], 0.0, 1e-10);
|
|
||||||
EXPECT_NEAR(result_forces[2], 0.0, 1e-10);
|
|
||||||
EXPECT_NEAR(result_energies[0], 0.0, 1e-10);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(CudaKernelTest, ForceSymmetryTest) {
|
|
||||||
const int n_particles = 2;
|
|
||||||
const real tolerance = 1e-5;
|
|
||||||
|
|
||||||
std::vector<real> positions = {
|
|
||||||
0.0, 0.0, 0.0, // particle 0
|
|
||||||
1.5, 0.0, 0.0 // particle 1
|
|
||||||
};
|
|
||||||
|
|
||||||
std::vector<real> forces(3 * n_particles, 0.0);
|
|
||||||
std::vector<real> energies(n_particles, 0.0);
|
|
||||||
std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
|
||||||
|
|
||||||
real *d_positions = allocateAndCopyToGPU(positions);
|
|
||||||
real *d_forces = allocateAndCopyToGPU(forces);
|
|
||||||
real *d_energies = allocateAndCopyToGPU(energies);
|
|
||||||
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
|
||||||
|
|
||||||
LennardJones potential(1.0, 1.0, 3.0);
|
|
||||||
|
|
||||||
dim3 blockSize(256);
|
|
||||||
dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
|
|
||||||
|
|
||||||
CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
|
|
||||||
d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
|
|
||||||
|
|
||||||
checkCudaError(cudaGetLastError(), "kernel launch");
|
|
||||||
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
|
||||||
|
|
||||||
std::vector<real> result_forces =
|
|
||||||
copyFromGPUAndFree(d_forces, 3 * n_particles);
|
|
||||||
std::vector<real> result_energies =
|
|
||||||
copyFromGPUAndFree(d_energies, n_particles);
|
|
||||||
|
|
||||||
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
|
||||||
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
|
||||||
|
|
||||||
// Newton's third law: forces should be equal and opposite
|
|
||||||
EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance)
|
|
||||||
<< "Force x-components should be opposite";
|
|
||||||
EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance)
|
|
||||||
<< "Force y-components should be opposite";
|
|
||||||
EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance)
|
|
||||||
<< "Force z-components should be opposite";
|
|
||||||
|
|
||||||
// Energies should be equal for symmetric particles
|
|
||||||
EXPECT_NEAR(result_energies[0], result_energies[1], tolerance)
|
|
||||||
<< "Energies should be equal";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Main function to run tests
|
|
||||||
int main(int argc, char **argv) {
|
|
||||||
::testing::InitGoogleTest(&argc, argv);
|
|
||||||
|
|
||||||
// Check if CUDA is available
|
|
||||||
int deviceCount;
|
|
||||||
cudaError_t err = cudaGetDeviceCount(&deviceCount);
|
|
||||||
if (err != cudaSuccess || deviceCount == 0) {
|
|
||||||
std::cout << "No CUDA devices available. Skipping CUDA tests." << std::endl;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return RUN_ALL_TESTS();
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#include "pair_potentials.cuh"
|
#include "potentials/pair_potentials.cuh"
|
||||||
#include "precision.hpp"
|
#include "precision.hpp"
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
@ -69,6 +69,7 @@ __global__ void lennard_jones_test_kernel(TestResults *results) {
|
||||||
auto result = lj.calc_force_and_energy(r);
|
auto result = lj.calc_force_and_energy(r);
|
||||||
results->energy_values[2] = result.energy;
|
results->energy_values[2] = result.energy;
|
||||||
results->force_values[2] = result.force;
|
results->force_values[2] = result.force;
|
||||||
|
|
||||||
results->at_minimum_pass =
|
results->at_minimum_pass =
|
||||||
(fabs(result.energy + epsilon) < tolerance) &&
|
(fabs(result.energy + epsilon) < tolerance) &&
|
||||||
vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result.force, tolerance);
|
vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result.force, tolerance);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#include "pair_potentials.cuh"
|
#include "potentials/pair_potentials.cuh"
|
||||||
#include "precision.hpp"
|
#include "precision.hpp"
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue