Fix missing summation for energy
This commit is contained in:
parent
7f04ae793a
commit
a638c4f388
2 changed files with 114 additions and 192 deletions
|
@ -29,7 +29,7 @@ __global__ void CAC::calc_forces_and_energies(real *xs, real *forces,
|
||||||
forces[3 * i] += sol.force.x;
|
forces[3 * i] += sol.force.x;
|
||||||
forces[3 * i + 1] += sol.force.y;
|
forces[3 * i + 1] += sol.force.y;
|
||||||
forces[3 * i + 2] += sol.force.z;
|
forces[3 * i + 2] += sol.force.z;
|
||||||
energies[i] = sol.energy;
|
energies[i] += sol.energy;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,8 +8,11 @@
|
||||||
#include "pair_potentials.cuh"
|
#include "pair_potentials.cuh"
|
||||||
#include "precision.hpp"
|
#include "precision.hpp"
|
||||||
|
|
||||||
class CudaKernelTest : public ::testing::Test {
|
class CudaForceKernelTest : public ::testing::Test {
|
||||||
protected:
|
protected:
|
||||||
|
const int BLOCK_SIZE = 1;
|
||||||
|
const int THREADS_PER_BLOCK = 4;
|
||||||
|
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
// Set up CUDA device
|
// Set up CUDA device
|
||||||
cudaError_t err = cudaSetDevice(0);
|
cudaError_t err = cudaSetDevice(0);
|
||||||
|
@ -50,53 +53,61 @@ protected:
|
||||||
checkCudaError(cudaFree(device_ptr), "cudaFree");
|
checkCudaError(cudaFree(device_ptr), "cudaFree");
|
||||||
return host_data;
|
return host_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper function to run the force calculation kernel
|
||||||
|
std::pair<std::vector<real>, std::vector<real>>
|
||||||
|
run_force_calculation(int n_particles, const std::vector<real> &positions,
|
||||||
|
const std::vector<real> &box_dimensions) {
|
||||||
|
std::vector<real> forces(3 * n_particles, 0.0);
|
||||||
|
std::vector<real> energies(n_particles, 0.0);
|
||||||
|
|
||||||
|
real *d_positions = allocateAndCopyToGPU(positions);
|
||||||
|
real *d_forces = allocateAndCopyToGPU(forces);
|
||||||
|
real *d_energies = allocateAndCopyToGPU(energies);
|
||||||
|
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
||||||
|
|
||||||
|
// Allocate potential on the GPU
|
||||||
|
LennardJones h_potential(1.0, 1.0, 3.0);
|
||||||
|
LennardJones *d_potential;
|
||||||
|
checkCudaError(cudaMalloc(&d_potential, sizeof(LennardJones)),
|
||||||
|
"cudaMalloc potential");
|
||||||
|
checkCudaError(cudaMemcpy(d_potential, &h_potential, sizeof(LennardJones),
|
||||||
|
cudaMemcpyHostToDevice),
|
||||||
|
"cudaMemcpy H2D potential");
|
||||||
|
|
||||||
|
CAC::calc_forces_and_energies<<<BLOCK_SIZE, THREADS_PER_BLOCK>>>(
|
||||||
|
d_positions, d_forces, d_energies, n_particles, d_box_len, d_potential);
|
||||||
|
|
||||||
|
checkCudaError(cudaGetLastError(), "kernel launch");
|
||||||
|
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
||||||
|
|
||||||
|
std::vector<real> result_forces =
|
||||||
|
copyFromGPUAndFree(d_forces, 3 * n_particles);
|
||||||
|
std::vector<real> result_energies =
|
||||||
|
copyFromGPUAndFree(d_energies, n_particles);
|
||||||
|
|
||||||
|
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
||||||
|
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
||||||
|
checkCudaError(cudaFree(d_potential), "cudaFree potential");
|
||||||
|
|
||||||
|
return {result_forces, result_energies};
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_F(CudaKernelTest, BasicFunctionalityTest) {
|
TEST_F(CudaForceKernelTest, BasicFunctionalityTest) {
|
||||||
const int n_particles = 4;
|
const int n_particles = 2;
|
||||||
const real tolerance = 1e-5;
|
const real tolerance = 1e-5;
|
||||||
|
|
||||||
// Set up test data - simple 2x2 grid of particles
|
// Set up test data - simple 2x2 grid of particles
|
||||||
std::vector<real> positions = {
|
std::vector<real> positions = {
|
||||||
0.0, 0.0, 0.0, // particle 0
|
0.0, 0.0, 0.0, // particle 0
|
||||||
1.0, 0.0, 0.0, // particle 1
|
1.0, 0.0, 0.0, // particle 1
|
||||||
0.0, 1.0, 0.0, // particle 2
|
|
||||||
1.0, 1.0, 0.0 // particle 3
|
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<real> forces(3 * n_particles, 0.0);
|
std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
||||||
std::vector<real> energies(n_particles, 0.0);
|
|
||||||
std::vector<real> box_dimensions = {10.0, 10.0,
|
|
||||||
10.0}; // Large box to avoid PBC effects
|
|
||||||
|
|
||||||
// Allocate GPU memory and copy data
|
auto [result_forces, result_energies] =
|
||||||
real *d_positions = allocateAndCopyToGPU(positions);
|
run_force_calculation(n_particles, positions, box_dimensions);
|
||||||
real *d_forces = allocateAndCopyToGPU(forces);
|
|
||||||
real *d_energies = allocateAndCopyToGPU(energies);
|
|
||||||
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
|
||||||
|
|
||||||
// Create Lennard-Jones potential (sigma=1.0, epsilon=1.0, rcutoff=3.0)
|
|
||||||
LennardJones potential(1.0, 1.0, 3.0);
|
|
||||||
|
|
||||||
// Launch kernel
|
|
||||||
dim3 blockSize(256);
|
|
||||||
dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
|
|
||||||
|
|
||||||
CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
|
|
||||||
d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
|
|
||||||
|
|
||||||
checkCudaError(cudaGetLastError(), "kernel launch");
|
|
||||||
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
|
||||||
|
|
||||||
// Copy results back to host
|
|
||||||
std::vector<real> result_forces =
|
|
||||||
copyFromGPUAndFree(d_forces, 3 * n_particles);
|
|
||||||
std::vector<real> result_energies =
|
|
||||||
copyFromGPUAndFree(d_energies, n_particles);
|
|
||||||
|
|
||||||
// Clean up remaining GPU memory
|
|
||||||
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
|
||||||
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
|
||||||
|
|
||||||
// Verify results - forces should be non-zero and energies should be
|
// Verify results - forces should be non-zero and energies should be
|
||||||
// calculated
|
// calculated
|
||||||
|
@ -117,161 +128,72 @@ TEST_F(CudaKernelTest, BasicFunctionalityTest) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPECT_FALSE(has_nonzero_force)
|
EXPECT_TRUE(has_nonzero_force)
|
||||||
<< "Expected non-zero forces between particles";
|
<< "Expected non-zero forces between particles";
|
||||||
EXPECT_TRUE(has_nonzero_energy) << "Expected non-zero energies for particles";
|
EXPECT_TRUE(has_nonzero_energy)
|
||||||
|
<< "Expected non-zero energies for particles ";
|
||||||
}
|
}
|
||||||
|
//
|
||||||
|
// TEST_F(CudaKernelTest, PeriodicBoundaryConditionsTest) {
|
||||||
|
// const int n_particles = 2;
|
||||||
|
// const real tolerance = 1e-5;
|
||||||
|
//
|
||||||
|
// // Place particles near opposite edges of a small box
|
||||||
|
// std::vector<real> positions = {
|
||||||
|
// 0.1, 0.0, 0.0, // particle 0 near left edge
|
||||||
|
// 4.9, 0.0, 0.0 // particle 1 near right edge
|
||||||
|
// };
|
||||||
|
// std::vector<real> box_dimensions = {5.0, 5.0, 5.0}; // Small box to test
|
||||||
|
// PBC
|
||||||
|
//
|
||||||
|
// auto [result_forces, result_energies] =
|
||||||
|
// run_force_calculation(n_particles, &positions, &box_dimensions);
|
||||||
|
//
|
||||||
|
// // With PBC, particles should interact as if they're close (distance ~0.2)
|
||||||
|
// // rather than far apart (distance ~4.8)
|
||||||
|
// EXPECT_GT(std::abs(result_forces[0]), tolerance)
|
||||||
|
// << "Expected significant force due to PBC";
|
||||||
|
// EXPECT_GT(std::abs(result_energies[0]), tolerance)
|
||||||
|
// << "Expected significant energy due to PBC";
|
||||||
|
// }
|
||||||
|
|
||||||
TEST_F(CudaKernelTest, PeriodicBoundaryConditionsTest) {
|
// TEST_F(CudaForceKernelTest, SingleParticleTest) {
|
||||||
const int n_particles = 2;
|
// const int n_particles = 1;
|
||||||
const real tolerance = 1e-5;
|
//
|
||||||
|
// std::vector<real> positions = {0.0, 0.0, 0.0};
|
||||||
|
// std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
||||||
|
//
|
||||||
|
// auto [result_forces, result_energies] =
|
||||||
|
// run_force_calculation(n_particles, positions, box_dimensions);
|
||||||
|
// // Single particle should have zero force and energy
|
||||||
|
// EXPECT_NEAR(result_forces[0], 0.0, 1e-10);
|
||||||
|
// EXPECT_NEAR(result_forces[1], 0.0, 1e-10);
|
||||||
|
// EXPECT_NEAR(result_forces[2], 0.0, 1e-10);
|
||||||
|
// EXPECT_NEAR(result_energies[0], 0.0, 1e-10);
|
||||||
|
// }
|
||||||
|
|
||||||
// Place particles near opposite edges of a small box
|
// TEST_F(CudaKernelTest, ForceSymmetryTest) {
|
||||||
std::vector<real> positions = {
|
// const int n_particles = 2;
|
||||||
0.1, 0.0, 0.0, // particle 0 near left edge
|
// const real tolerance = 1e-5;
|
||||||
4.9, 0.0, 0.0 // particle 1 near right edge
|
//
|
||||||
};
|
// std::vector<real> positions = {
|
||||||
|
// 0.0, 0.0, 0.0, // particle 0
|
||||||
std::vector<real> forces(3 * n_particles, 0.0);
|
// 1.5, 0.0, 0.0 // particle 1
|
||||||
std::vector<real> energies(n_particles, 0.0);
|
// };
|
||||||
std::vector<real> box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC
|
// std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
||||||
|
//
|
||||||
// Allocate GPU memory and copy data
|
// auto [result_forces, result_energies] =
|
||||||
real *d_positions = allocateAndCopyToGPU(positions);
|
// run_force_calculation(n_particles, &positions, &box_dimensions);
|
||||||
real *d_forces = allocateAndCopyToGPU(forces);
|
//
|
||||||
real *d_energies = allocateAndCopyToGPU(energies);
|
// // Newton's third law: forces should be equal and opposite
|
||||||
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
// EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance)
|
||||||
|
// << "Force x-components should be opposite";
|
||||||
// Create Lennard-Jones potential with large cutoff to ensure interaction
|
// EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance)
|
||||||
LennardJones potential(1.0, 1.0, 3.0);
|
// << "Force y-components should be opposite";
|
||||||
|
// EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance)
|
||||||
// Launch kernel
|
// << "Force z-components should be opposite";
|
||||||
dim3 blockSize(256);
|
//
|
||||||
dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
|
// // Energies should be equal for symmetric particles
|
||||||
|
// EXPECT_NEAR(result_energies[0], result_energies[1], tolerance)
|
||||||
CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
|
// << "Energies should be equal";
|
||||||
d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
|
// }
|
||||||
|
|
||||||
checkCudaError(cudaGetLastError(), "kernel launch");
|
|
||||||
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
|
||||||
|
|
||||||
// Copy results back to host
|
|
||||||
std::vector<real> result_forces =
|
|
||||||
copyFromGPUAndFree(d_forces, 3 * n_particles);
|
|
||||||
std::vector<real> result_energies =
|
|
||||||
copyFromGPUAndFree(d_energies, n_particles);
|
|
||||||
|
|
||||||
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
|
||||||
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
|
||||||
|
|
||||||
// With PBC, particles should interact as if they're close (distance ~0.2)
|
|
||||||
// rather than far apart (distance ~4.8)
|
|
||||||
EXPECT_GT(std::abs(result_forces[0]), tolerance)
|
|
||||||
<< "Expected significant force due to PBC";
|
|
||||||
EXPECT_GT(std::abs(result_energies[0]), tolerance)
|
|
||||||
<< "Expected significant energy due to PBC";
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(CudaKernelTest, SingleParticleTest) {
|
|
||||||
const int n_particles = 1;
|
|
||||||
|
|
||||||
std::vector<real> positions = {0.0, 0.0, 0.0};
|
|
||||||
std::vector<real> forces(3 * n_particles, 0.0);
|
|
||||||
std::vector<real> energies(n_particles, 0.0);
|
|
||||||
std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
|
||||||
|
|
||||||
real *d_positions = allocateAndCopyToGPU(positions);
|
|
||||||
real *d_forces = allocateAndCopyToGPU(forces);
|
|
||||||
real *d_energies = allocateAndCopyToGPU(energies);
|
|
||||||
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
|
||||||
|
|
||||||
LennardJones potential(1.0, 1.0, 3.0);
|
|
||||||
|
|
||||||
dim3 blockSize(256);
|
|
||||||
dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
|
|
||||||
|
|
||||||
CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
|
|
||||||
d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
|
|
||||||
|
|
||||||
checkCudaError(cudaGetLastError(), "kernel launch");
|
|
||||||
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
|
||||||
|
|
||||||
std::vector<real> result_forces =
|
|
||||||
copyFromGPUAndFree(d_forces, 3 * n_particles);
|
|
||||||
std::vector<real> result_energies =
|
|
||||||
copyFromGPUAndFree(d_energies, n_particles);
|
|
||||||
|
|
||||||
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
|
||||||
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
|
||||||
|
|
||||||
// Single particle should have zero force and energy
|
|
||||||
EXPECT_NEAR(result_forces[0], 0.0, 1e-10);
|
|
||||||
EXPECT_NEAR(result_forces[1], 0.0, 1e-10);
|
|
||||||
EXPECT_NEAR(result_forces[2], 0.0, 1e-10);
|
|
||||||
EXPECT_NEAR(result_energies[0], 0.0, 1e-10);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(CudaKernelTest, ForceSymmetryTest) {
|
|
||||||
const int n_particles = 2;
|
|
||||||
const real tolerance = 1e-5;
|
|
||||||
|
|
||||||
std::vector<real> positions = {
|
|
||||||
0.0, 0.0, 0.0, // particle 0
|
|
||||||
1.5, 0.0, 0.0 // particle 1
|
|
||||||
};
|
|
||||||
|
|
||||||
std::vector<real> forces(3 * n_particles, 0.0);
|
|
||||||
std::vector<real> energies(n_particles, 0.0);
|
|
||||||
std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
|
||||||
|
|
||||||
real *d_positions = allocateAndCopyToGPU(positions);
|
|
||||||
real *d_forces = allocateAndCopyToGPU(forces);
|
|
||||||
real *d_energies = allocateAndCopyToGPU(energies);
|
|
||||||
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
|
||||||
|
|
||||||
LennardJones potential(1.0, 1.0, 3.0);
|
|
||||||
|
|
||||||
dim3 blockSize(256);
|
|
||||||
dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
|
|
||||||
|
|
||||||
CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
|
|
||||||
d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
|
|
||||||
|
|
||||||
checkCudaError(cudaGetLastError(), "kernel launch");
|
|
||||||
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
|
||||||
|
|
||||||
std::vector<real> result_forces =
|
|
||||||
copyFromGPUAndFree(d_forces, 3 * n_particles);
|
|
||||||
std::vector<real> result_energies =
|
|
||||||
copyFromGPUAndFree(d_energies, n_particles);
|
|
||||||
|
|
||||||
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
|
||||||
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
|
||||||
|
|
||||||
// Newton's third law: forces should be equal and opposite
|
|
||||||
EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance)
|
|
||||||
<< "Force x-components should be opposite";
|
|
||||||
EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance)
|
|
||||||
<< "Force y-components should be opposite";
|
|
||||||
EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance)
|
|
||||||
<< "Force z-components should be opposite";
|
|
||||||
|
|
||||||
// Energies should be equal for symmetric particles
|
|
||||||
EXPECT_NEAR(result_energies[0], result_energies[1], tolerance)
|
|
||||||
<< "Energies should be equal";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Main function to run tests
|
|
||||||
int main(int argc, char **argv) {
|
|
||||||
::testing::InitGoogleTest(&argc, argv);
|
|
||||||
|
|
||||||
// Check if CUDA is available
|
|
||||||
int deviceCount;
|
|
||||||
cudaError_t err = cudaGetDeviceCount(&deviceCount);
|
|
||||||
if (err != cudaSuccess || deviceCount == 0) {
|
|
||||||
std::cout << "No CUDA devices available. Skipping CUDA tests." << std::endl;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return RUN_ALL_TESTS();
|
|
||||||
}
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue