Compare commits
2 commits
746face829
...
dc74e4e5c0
Author | SHA1 | Date | |
---|---|---|---|
dc74e4e5c0 | |||
cad74747bf |
8 changed files with 382 additions and 17 deletions
|
@ -25,7 +25,7 @@ set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_OBJECTS 0)
|
||||||
# Add Vec3 as a dependency
|
# Add Vec3 as a dependency
|
||||||
include(FetchContent)
|
include(FetchContent)
|
||||||
FetchContent_Declare(Vec3
|
FetchContent_Declare(Vec3
|
||||||
GIT_REPOSITORY https://www.alexselimov.com/git/aselimov/Vec3.git
|
GIT_REPOSITORY https://forge.alexselimov.com/aselimov/Vec3.git
|
||||||
)
|
)
|
||||||
|
|
||||||
FetchContent_GetProperties(Vec3)
|
FetchContent_GetProperties(Vec3)
|
||||||
|
|
43
README.md
43
README.md
|
@ -1,12 +1,35 @@
|
||||||
# C++ Project Template
|
# ⚛️ CudaCAC
|
||||||
When setting out on a new project in C++ there are a few configuration steps
|
|
||||||
which need to be completed prior to actually getting down to writing code.
|
|
||||||
This repository is going to be a C++ project template that already has the
|
|
||||||
following components:
|
|
||||||
|
|
||||||
- Directory Structure
|
CudaCAC is a Cuda accelerated implementation of the Concurrent Atomistic-Continuum (CAC) method.
|
||||||
- Make Build (CMake)
|
|
||||||
- CUDA integration
|
|
||||||
- Unit Test Framework (Google Test)
|
|
||||||
- API Documentation (Doxygen)
|
|
||||||
|
|
||||||
|
## Background
|
||||||
|
|
||||||
|
### Molecular Dynamics
|
||||||
|
|
||||||
|
Molecular dynamics (MD) is a computer simulation method for analyzing the physical movements of atoms and molecules. The atoms and molecules are allowed to interact for a fixed period of time, giving a view of the dynamic evolution of the system. In the most common version, the trajectories of atoms and molecules are determined by numerically solving Newton's equations of motion for a system of interacting particles, where forces between the particles and their potential energies are often calculated using interatomic potentials or molecular mechanics force fields.
|
||||||
|
|
||||||
|
### Concurrent Atomistic-Continuum (CAC) Method
|
||||||
|
|
||||||
|
The Concurrent Atomistic-Continuum (CAC) method is a multiscale modeling technique used for simulating materials at the nano and micro-scale. It partitions a simulation into a coarse-grained domain and an atomistic domain. This allows for the detailed, fully-resolved atomistic simulation of important regions, like those with lattice defects, while more efficiently modeling the rest of the material as a continuum. A key feature of the CAC method is its use of a unified set of governing equations and interatomic potentials across both the atomistic and continuum domains. This avoids the need for complex coupling procedures at the interface of the two regions.
|
||||||
|
|
||||||
|
## Tech Stack
|
||||||
|
|
||||||
|
This project leverages a high-performance computing stack for its simulations:
|
||||||
|
|
||||||
|
* **C++:** The core application logic is written in modern C++, providing a balance of performance and high-level abstractions.
|
||||||
|
* **CUDA:** NVIDIA's CUDA platform is used to accelerate the computationally intensive parts of the simulation on the GPU.
|
||||||
|
* **CMake:** A cross-platform build system used to manage the compilation and linking of the project.
|
||||||
|
* **Google Test:** A testing framework for writing C++ tests.
|
||||||
|
* **Doxygen:** A documentation generator for C++ code.
|
||||||
|
|
||||||
|
## Roadmap
|
||||||
|
|
||||||
|
- [ ] Complete basic molecular dynamics atomistic solver using Cuda using Lennard-Jones pair potential with order O(n^2) calculations
|
||||||
|
- [ ] Implement CAC rhombohedral finite element solver
|
||||||
|
- [ ] Adding neighbor lists with cutoff distances to reduce runtime complexity
|
||||||
|
- [ ] Adding multi-body potential support
|
||||||
|
- [ ] Adding support for overlaying multiple potentials
|
||||||
|
|
||||||
|
## Contact
|
||||||
|
|
||||||
|
For any questions or inquiries, please contact Alex Selimov at [alex@alexselimov.com](mailto:alex@alexselimov.com) or visit his website at [alexselimov.com](https://alexselimov.com).
|
||||||
|
|
|
@ -2,12 +2,14 @@ project(${NAME}_cuda_lib CUDA CXX)
|
||||||
|
|
||||||
set(HEADER_FILES
|
set(HEADER_FILES
|
||||||
pair_potentials.cuh
|
pair_potentials.cuh
|
||||||
|
forces.cuh
|
||||||
)
|
)
|
||||||
set(SOURCE_FILES
|
set(SOURCE_FILES
|
||||||
|
forces.cu
|
||||||
)
|
)
|
||||||
|
|
||||||
# The library contains header and source files.
|
# The library contains header and source files.
|
||||||
add_library(${NAME}_cuda_lib INTERFACE
|
add_library(${NAME}_cuda_lib STATIC
|
||||||
${SOURCE_FILES}
|
${SOURCE_FILES}
|
||||||
${HEADER_FILES}
|
${HEADER_FILES}
|
||||||
)
|
)
|
||||||
|
|
36
kernels/forces.cu
Normal file
36
kernels/forces.cu
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
#include "forces.cuh"
|
||||||
|
|
||||||
|
__global__ void CAC::calc_forces_and_energies(real *xs, real *forces,
|
||||||
|
real *energies, int n_particles,
|
||||||
|
real *box_len,
|
||||||
|
PairPotential &potential) {
|
||||||
|
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (i < n_particles) {
|
||||||
|
real xi = xs[3 * i];
|
||||||
|
real yi = xs[3 * i + 1];
|
||||||
|
real zi = xs[3 * i + 2];
|
||||||
|
|
||||||
|
for (int j = 0; j < n_particles; j++) {
|
||||||
|
if (i != j) {
|
||||||
|
real xj = xs[3 * j];
|
||||||
|
real yj = xs[3 * j + 1];
|
||||||
|
real zj = xs[3 * j + 2];
|
||||||
|
|
||||||
|
real dx = xi - xj;
|
||||||
|
real dy = yi - yj;
|
||||||
|
real dz = zi - zj;
|
||||||
|
|
||||||
|
// Apply periodic boundary conditions
|
||||||
|
dx -= box_len[0] * round(dx / box_len[0]);
|
||||||
|
dy -= box_len[1] * round(dy / box_len[1]);
|
||||||
|
dz -= box_len[2] * round(dz / box_len[2]);
|
||||||
|
|
||||||
|
ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz});
|
||||||
|
forces[3 * i] += sol.force.x;
|
||||||
|
forces[3 * i + 1] += sol.force.y;
|
||||||
|
forces[3 * i + 2] += sol.force.z;
|
||||||
|
energies[i] = sol.energy;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
19
kernels/forces.cuh
Normal file
19
kernels/forces.cuh
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
#ifndef FORCES_CUH
|
||||||
|
#define FORCES_CUH
|
||||||
|
|
||||||
|
#include "pair_potentials.cuh"
|
||||||
|
#include "precision.hpp"
|
||||||
|
namespace CAC {
|
||||||
|
/**
|
||||||
|
* Calculate forces and energies using CUDA for acceleration
|
||||||
|
* This code currently only accepts a single PairPotential object and does an
|
||||||
|
* n^2 force calculation. Future improvements will:
|
||||||
|
* - Allow for neighbor listing
|
||||||
|
* - Allow for overlaid force calculations
|
||||||
|
*/
|
||||||
|
__global__ void calc_forces_and_energies(real *xs, real *forces, real *energies,
|
||||||
|
int n_particles, real *box_bd,
|
||||||
|
PairPotential &potential);
|
||||||
|
} // namespace CAC
|
||||||
|
|
||||||
|
#endif
|
|
@ -1,5 +1,5 @@
|
||||||
#ifndef POTENTIALS_H
|
#ifndef POTENTIALS_CUH
|
||||||
#define POTENTIALS_H
|
#define POTENTIALS_CUH
|
||||||
|
|
||||||
#include "precision.hpp"
|
#include "precision.hpp"
|
||||||
#include "vec3.h"
|
#include "vec3.h"
|
||||||
|
@ -84,8 +84,8 @@ struct LennardJones : PairPotential {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
CUDA_CALLABLE ~LennardJones(){};
|
CUDA_CALLABLE inline ~LennardJones(){};
|
||||||
};
|
};
|
||||||
|
|
||||||
PairPotential::~PairPotential() {};
|
inline PairPotential::~PairPotential() {};
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -2,8 +2,16 @@ include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR})
|
||||||
|
|
||||||
add_executable(${NAME}_cuda_tests
|
add_executable(${NAME}_cuda_tests
|
||||||
test_potential.cu
|
test_potential.cu
|
||||||
|
test_forces.cu
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(${NAME}_cuda_tests gtest gtest_main)
|
target_link_libraries(${NAME}_cuda_tests gtest gtest_main)
|
||||||
target_link_libraries(${NAME}_cuda_tests ${CMAKE_PROJECT_NAME}_cuda_lib)
|
target_link_libraries(${NAME}_cuda_tests ${CMAKE_PROJECT_NAME}_cuda_lib)
|
||||||
add_test(NAME ${NAME}CudaTests COMMAND ${CMAKE_BINARY_DIR}/tests/unit_tests/${NAME}_tests)
|
add_test(NAME ${NAME}CudaTests COMMAND ${CMAKE_BINARY_DIR}/tests/cuda_unit_tests/${NAME}_cuda_tests)
|
||||||
|
|
||||||
|
# Add environment variables for NVIDIA GPU selection. Useful for facilitating testing on multi gpu
|
||||||
|
# systems
|
||||||
|
set_property(TEST ${NAME}CudaTests PROPERTY ENVIRONMENT
|
||||||
|
"__NV_PRIME_RENDER_OFFLOAD=1"
|
||||||
|
"__GLX_VENDOR_LIBRARY_NAME=nvidia"
|
||||||
|
)
|
||||||
|
|
277
tests/cuda_unit_tests/test_forces.cu
Normal file
277
tests/cuda_unit_tests/test_forces.cu
Normal file
|
@ -0,0 +1,277 @@
|
||||||
|
#include <cmath>
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
// Include your header files
|
||||||
|
#include "forces.cuh"
|
||||||
|
#include "pair_potentials.cuh"
|
||||||
|
#include "precision.hpp"
|
||||||
|
|
||||||
|
class CudaKernelTest : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
void SetUp() override {
|
||||||
|
// Set up CUDA device
|
||||||
|
cudaError_t err = cudaSetDevice(0);
|
||||||
|
ASSERT_EQ(err, cudaSuccess) << "Failed to set CUDA device";
|
||||||
|
}
|
||||||
|
|
||||||
|
void TearDown() override {
|
||||||
|
// Clean up any remaining GPU memory
|
||||||
|
cudaDeviceReset();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function to check CUDA errors
|
||||||
|
void checkCudaError(cudaError_t err, const std::string &operation) {
|
||||||
|
ASSERT_EQ(err, cudaSuccess)
|
||||||
|
<< "CUDA error in " << operation << ": " << cudaGetErrorString(err);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function to allocate and copy data to GPU
|
||||||
|
template <typename T>
|
||||||
|
T *allocateAndCopyToGPU(const std::vector<T> &host_data) {
|
||||||
|
T *device_ptr;
|
||||||
|
size_t size = host_data.size() * sizeof(T);
|
||||||
|
checkCudaError(cudaMalloc(&device_ptr, size), "cudaMalloc");
|
||||||
|
checkCudaError(
|
||||||
|
cudaMemcpy(device_ptr, host_data.data(), size, cudaMemcpyHostToDevice),
|
||||||
|
"cudaMemcpy H2D");
|
||||||
|
return device_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function to copy data from GPU and free GPU memory
|
||||||
|
template <typename T>
|
||||||
|
std::vector<T> copyFromGPUAndFree(T *device_ptr, size_t count) {
|
||||||
|
std::vector<T> host_data(count);
|
||||||
|
size_t size = count * sizeof(T);
|
||||||
|
checkCudaError(
|
||||||
|
cudaMemcpy(host_data.data(), device_ptr, size, cudaMemcpyDeviceToHost),
|
||||||
|
"cudaMemcpy D2H");
|
||||||
|
checkCudaError(cudaFree(device_ptr), "cudaFree");
|
||||||
|
return host_data;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(CudaKernelTest, BasicFunctionalityTest) {
|
||||||
|
const int n_particles = 4;
|
||||||
|
const real tolerance = 1e-5;
|
||||||
|
|
||||||
|
// Set up test data - simple 2x2 grid of particles
|
||||||
|
std::vector<real> positions = {
|
||||||
|
0.0, 0.0, 0.0, // particle 0
|
||||||
|
1.0, 0.0, 0.0, // particle 1
|
||||||
|
0.0, 1.0, 0.0, // particle 2
|
||||||
|
1.0, 1.0, 0.0 // particle 3
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<real> forces(3 * n_particles, 0.0);
|
||||||
|
std::vector<real> energies(n_particles, 0.0);
|
||||||
|
std::vector<real> box_dimensions = {10.0, 10.0,
|
||||||
|
10.0}; // Large box to avoid PBC effects
|
||||||
|
|
||||||
|
// Allocate GPU memory and copy data
|
||||||
|
real *d_positions = allocateAndCopyToGPU(positions);
|
||||||
|
real *d_forces = allocateAndCopyToGPU(forces);
|
||||||
|
real *d_energies = allocateAndCopyToGPU(energies);
|
||||||
|
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
||||||
|
|
||||||
|
// Create Lennard-Jones potential (sigma=1.0, epsilon=1.0, rcutoff=3.0)
|
||||||
|
LennardJones potential(1.0, 1.0, 3.0);
|
||||||
|
|
||||||
|
// Launch kernel
|
||||||
|
dim3 blockSize(256);
|
||||||
|
dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
|
||||||
|
|
||||||
|
CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
|
||||||
|
d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
|
||||||
|
|
||||||
|
checkCudaError(cudaGetLastError(), "kernel launch");
|
||||||
|
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
||||||
|
|
||||||
|
// Copy results back to host
|
||||||
|
std::vector<real> result_forces =
|
||||||
|
copyFromGPUAndFree(d_forces, 3 * n_particles);
|
||||||
|
std::vector<real> result_energies =
|
||||||
|
copyFromGPUAndFree(d_energies, n_particles);
|
||||||
|
|
||||||
|
// Clean up remaining GPU memory
|
||||||
|
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
||||||
|
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
||||||
|
|
||||||
|
// Verify results - forces should be non-zero and energies should be
|
||||||
|
// calculated
|
||||||
|
bool has_nonzero_force = false;
|
||||||
|
bool has_nonzero_energy = false;
|
||||||
|
|
||||||
|
for (int i = 0; i < 3 * n_particles; i++) {
|
||||||
|
if (std::abs(result_forces[i]) > tolerance) {
|
||||||
|
has_nonzero_force = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < n_particles; i++) {
|
||||||
|
if (std::abs(result_energies[i]) > tolerance) {
|
||||||
|
has_nonzero_energy = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_FALSE(has_nonzero_force)
|
||||||
|
<< "Expected non-zero forces between particles";
|
||||||
|
EXPECT_TRUE(has_nonzero_energy) << "Expected non-zero energies for particles";
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(CudaKernelTest, PeriodicBoundaryConditionsTest) {
|
||||||
|
const int n_particles = 2;
|
||||||
|
const real tolerance = 1e-5;
|
||||||
|
|
||||||
|
// Place particles near opposite edges of a small box
|
||||||
|
std::vector<real> positions = {
|
||||||
|
0.1, 0.0, 0.0, // particle 0 near left edge
|
||||||
|
4.9, 0.0, 0.0 // particle 1 near right edge
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<real> forces(3 * n_particles, 0.0);
|
||||||
|
std::vector<real> energies(n_particles, 0.0);
|
||||||
|
std::vector<real> box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC
|
||||||
|
|
||||||
|
// Allocate GPU memory and copy data
|
||||||
|
real *d_positions = allocateAndCopyToGPU(positions);
|
||||||
|
real *d_forces = allocateAndCopyToGPU(forces);
|
||||||
|
real *d_energies = allocateAndCopyToGPU(energies);
|
||||||
|
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
||||||
|
|
||||||
|
// Create Lennard-Jones potential with large cutoff to ensure interaction
|
||||||
|
LennardJones potential(1.0, 1.0, 3.0);
|
||||||
|
|
||||||
|
// Launch kernel
|
||||||
|
dim3 blockSize(256);
|
||||||
|
dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
|
||||||
|
|
||||||
|
CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
|
||||||
|
d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
|
||||||
|
|
||||||
|
checkCudaError(cudaGetLastError(), "kernel launch");
|
||||||
|
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
||||||
|
|
||||||
|
// Copy results back to host
|
||||||
|
std::vector<real> result_forces =
|
||||||
|
copyFromGPUAndFree(d_forces, 3 * n_particles);
|
||||||
|
std::vector<real> result_energies =
|
||||||
|
copyFromGPUAndFree(d_energies, n_particles);
|
||||||
|
|
||||||
|
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
||||||
|
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
||||||
|
|
||||||
|
// With PBC, particles should interact as if they're close (distance ~0.2)
|
||||||
|
// rather than far apart (distance ~4.8)
|
||||||
|
EXPECT_GT(std::abs(result_forces[0]), tolerance)
|
||||||
|
<< "Expected significant force due to PBC";
|
||||||
|
EXPECT_GT(std::abs(result_energies[0]), tolerance)
|
||||||
|
<< "Expected significant energy due to PBC";
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(CudaKernelTest, SingleParticleTest) {
|
||||||
|
const int n_particles = 1;
|
||||||
|
|
||||||
|
std::vector<real> positions = {0.0, 0.0, 0.0};
|
||||||
|
std::vector<real> forces(3 * n_particles, 0.0);
|
||||||
|
std::vector<real> energies(n_particles, 0.0);
|
||||||
|
std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
||||||
|
|
||||||
|
real *d_positions = allocateAndCopyToGPU(positions);
|
||||||
|
real *d_forces = allocateAndCopyToGPU(forces);
|
||||||
|
real *d_energies = allocateAndCopyToGPU(energies);
|
||||||
|
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
||||||
|
|
||||||
|
LennardJones potential(1.0, 1.0, 3.0);
|
||||||
|
|
||||||
|
dim3 blockSize(256);
|
||||||
|
dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
|
||||||
|
|
||||||
|
CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
|
||||||
|
d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
|
||||||
|
|
||||||
|
checkCudaError(cudaGetLastError(), "kernel launch");
|
||||||
|
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
||||||
|
|
||||||
|
std::vector<real> result_forces =
|
||||||
|
copyFromGPUAndFree(d_forces, 3 * n_particles);
|
||||||
|
std::vector<real> result_energies =
|
||||||
|
copyFromGPUAndFree(d_energies, n_particles);
|
||||||
|
|
||||||
|
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
||||||
|
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
||||||
|
|
||||||
|
// Single particle should have zero force and energy
|
||||||
|
EXPECT_NEAR(result_forces[0], 0.0, 1e-10);
|
||||||
|
EXPECT_NEAR(result_forces[1], 0.0, 1e-10);
|
||||||
|
EXPECT_NEAR(result_forces[2], 0.0, 1e-10);
|
||||||
|
EXPECT_NEAR(result_energies[0], 0.0, 1e-10);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(CudaKernelTest, ForceSymmetryTest) {
|
||||||
|
const int n_particles = 2;
|
||||||
|
const real tolerance = 1e-5;
|
||||||
|
|
||||||
|
std::vector<real> positions = {
|
||||||
|
0.0, 0.0, 0.0, // particle 0
|
||||||
|
1.5, 0.0, 0.0 // particle 1
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<real> forces(3 * n_particles, 0.0);
|
||||||
|
std::vector<real> energies(n_particles, 0.0);
|
||||||
|
std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
|
||||||
|
|
||||||
|
real *d_positions = allocateAndCopyToGPU(positions);
|
||||||
|
real *d_forces = allocateAndCopyToGPU(forces);
|
||||||
|
real *d_energies = allocateAndCopyToGPU(energies);
|
||||||
|
real *d_box_len = allocateAndCopyToGPU(box_dimensions);
|
||||||
|
|
||||||
|
LennardJones potential(1.0, 1.0, 3.0);
|
||||||
|
|
||||||
|
dim3 blockSize(256);
|
||||||
|
dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
|
||||||
|
|
||||||
|
CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
|
||||||
|
d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
|
||||||
|
|
||||||
|
checkCudaError(cudaGetLastError(), "kernel launch");
|
||||||
|
checkCudaError(cudaDeviceSynchronize(), "kernel execution");
|
||||||
|
|
||||||
|
std::vector<real> result_forces =
|
||||||
|
copyFromGPUAndFree(d_forces, 3 * n_particles);
|
||||||
|
std::vector<real> result_energies =
|
||||||
|
copyFromGPUAndFree(d_energies, n_particles);
|
||||||
|
|
||||||
|
checkCudaError(cudaFree(d_positions), "cudaFree positions");
|
||||||
|
checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
|
||||||
|
|
||||||
|
// Newton's third law: forces should be equal and opposite
|
||||||
|
EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance)
|
||||||
|
<< "Force x-components should be opposite";
|
||||||
|
EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance)
|
||||||
|
<< "Force y-components should be opposite";
|
||||||
|
EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance)
|
||||||
|
<< "Force z-components should be opposite";
|
||||||
|
|
||||||
|
// Energies should be equal for symmetric particles
|
||||||
|
EXPECT_NEAR(result_energies[0], result_energies[1], tolerance)
|
||||||
|
<< "Energies should be equal";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main function to run tests
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
::testing::InitGoogleTest(&argc, argv);
|
||||||
|
|
||||||
|
// Check if CUDA is available
|
||||||
|
int deviceCount;
|
||||||
|
cudaError_t err = cudaGetDeviceCount(&deviceCount);
|
||||||
|
if (err != cudaSuccess || deviceCount == 0) {
|
||||||
|
std::cout << "No CUDA devices available. Skipping CUDA tests." << std::endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return RUN_ALL_TESTS();
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue