diff --git a/CMakeLists.txt b/CMakeLists.txt index b7394b0..fb27a81 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_OBJECTS 0) # Add Vec3 as a dependency include(FetchContent) FetchContent_Declare(Vec3 - GIT_REPOSITORY https://forge.alexselimov.com/aselimov/Vec3.git + GIT_REPOSITORY https://www.alexselimov.com/git/aselimov/Vec3.git ) FetchContent_GetProperties(Vec3) diff --git a/README.md b/README.md index d8435ec..9236f1b 100644 --- a/README.md +++ b/README.md @@ -1,35 +1,12 @@ -# ⚛️ CudaCAC +# C++ Project Template +When setting out on a new project in C++ there are a few configuration steps +which need to be completed prior to actually getting down to writing code. +This repository is going to be a C++ project template that already has the +following components: -CudaCAC is a Cuda accelerated implementation of the Concurrent Atomistic-Continuum (CAC) method. +- Directory Structure +- Make Build (CMake) +- CUDA integration +- Unit Test Framework (Google Test) +- API Documentation (Doxygen) -## Background - -### Molecular Dynamics - -Molecular dynamics (MD) is a computer simulation method for analyzing the physical movements of atoms and molecules. The atoms and molecules are allowed to interact for a fixed period of time, giving a view of the dynamic evolution of the system. In the most common version, the trajectories of atoms and molecules are determined by numerically solving Newton's equations of motion for a system of interacting particles, where forces between the particles and their potential energies are often calculated using interatomic potentials or molecular mechanics force fields. - -### Concurrent Atomistic-Continuum (CAC) Method - -The Concurrent Atomistic-Continuum (CAC) method is a multiscale modeling technique used for simulating materials at the nano and micro-scale. It partitions a simulation into a coarse-grained domain and an atomistic domain. This allows for the detailed, fully-resolved atomistic simulation of important regions, like those with lattice defects, while more efficiently modeling the rest of the material as a continuum. A key feature of the CAC method is its use of a unified set of governing equations and interatomic potentials across both the atomistic and continuum domains. This avoids the need for complex coupling procedures at the interface of the two regions. - -## Tech Stack - -This project leverages a high-performance computing stack for its simulations: - -* **C++:** The core application logic is written in modern C++, providing a balance of performance and high-level abstractions. -* **CUDA:** NVIDIA's CUDA platform is used to accelerate the computationally intensive parts of the simulation on the GPU. -* **CMake:** A cross-platform build system used to manage the compilation and linking of the project. -* **Google Test:** A testing framework for writing C++ tests. -* **Doxygen:** A documentation generator for C++ code. - -## Roadmap - -- [ ] Complete basic molecular dynamics atomistic solver using Cuda using Lennard-Jones pair potential with order O(n^2) calculations -- [ ] Implement CAC rhombohedral finite element solver -- [ ] Adding neighbor lists with cutoff distances to reduce runtime complexity -- [ ] Adding multi-body potential support -- [ ] Adding support for overlaying multiple potentials - -## Contact - -For any questions or inquiries, please contact Alex Selimov at [alex@alexselimov.com](mailto:alex@alexselimov.com) or visit his website at [alexselimov.com](https://alexselimov.com). diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt index fac4474..baa8a60 100644 --- a/kernels/CMakeLists.txt +++ b/kernels/CMakeLists.txt @@ -2,14 +2,12 @@ project(${NAME}_cuda_lib CUDA CXX) set(HEADER_FILES pair_potentials.cuh - forces.cuh ) set(SOURCE_FILES - forces.cu ) # The library contains header and source files. -add_library(${NAME}_cuda_lib STATIC +add_library(${NAME}_cuda_lib INTERFACE ${SOURCE_FILES} ${HEADER_FILES} ) diff --git a/kernels/forces.cu b/kernels/forces.cu deleted file mode 100644 index 2251bd5..0000000 --- a/kernels/forces.cu +++ /dev/null @@ -1,36 +0,0 @@ -#include "forces.cuh" - -__global__ void CAC::calc_forces_and_energies(real *xs, real *forces, - real *energies, int n_particles, - real *box_len, - PairPotential &potential) { - int i = blockIdx.x * blockDim.x + threadIdx.x; - if (i < n_particles) { - real xi = xs[3 * i]; - real yi = xs[3 * i + 1]; - real zi = xs[3 * i + 2]; - - for (int j = 0; j < n_particles; j++) { - if (i != j) { - real xj = xs[3 * j]; - real yj = xs[3 * j + 1]; - real zj = xs[3 * j + 2]; - - real dx = xi - xj; - real dy = yi - yj; - real dz = zi - zj; - - // Apply periodic boundary conditions - dx -= box_len[0] * round(dx / box_len[0]); - dy -= box_len[1] * round(dy / box_len[1]); - dz -= box_len[2] * round(dz / box_len[2]); - - ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz}); - forces[3 * i] += sol.force.x; - forces[3 * i + 1] += sol.force.y; - forces[3 * i + 2] += sol.force.z; - energies[i] = sol.energy; - } - } - } -} diff --git a/kernels/forces.cuh b/kernels/forces.cuh deleted file mode 100644 index 87a610f..0000000 --- a/kernels/forces.cuh +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef FORCES_CUH -#define FORCES_CUH - -#include "pair_potentials.cuh" -#include "precision.hpp" -namespace CAC { -/** - * Calculate forces and energies using CUDA for acceleration - * This code currently only accepts a single PairPotential object and does an - * n^2 force calculation. Future improvements will: - * - Allow for neighbor listing - * - Allow for overlaid force calculations - */ -__global__ void calc_forces_and_energies(real *xs, real *forces, real *energies, - int n_particles, real *box_bd, - PairPotential &potential); -} // namespace CAC - -#endif diff --git a/kernels/pair_potentials.cuh b/kernels/pair_potentials.cuh index d5d8566..052a079 100644 --- a/kernels/pair_potentials.cuh +++ b/kernels/pair_potentials.cuh @@ -1,5 +1,5 @@ -#ifndef POTENTIALS_CUH -#define POTENTIALS_CUH +#ifndef POTENTIALS_H +#define POTENTIALS_H #include "precision.hpp" #include "vec3.h" @@ -84,8 +84,8 @@ struct LennardJones : PairPotential { } }; - CUDA_CALLABLE inline ~LennardJones(){}; + CUDA_CALLABLE ~LennardJones(){}; }; -inline PairPotential::~PairPotential() {}; +PairPotential::~PairPotential() {}; #endif diff --git a/tests/cuda_unit_tests/CMakeLists.txt b/tests/cuda_unit_tests/CMakeLists.txt index 3419e5e..27490a0 100644 --- a/tests/cuda_unit_tests/CMakeLists.txt +++ b/tests/cuda_unit_tests/CMakeLists.txt @@ -2,16 +2,8 @@ include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR}) add_executable(${NAME}_cuda_tests test_potential.cu - test_forces.cu ) target_link_libraries(${NAME}_cuda_tests gtest gtest_main) target_link_libraries(${NAME}_cuda_tests ${CMAKE_PROJECT_NAME}_cuda_lib) -add_test(NAME ${NAME}CudaTests COMMAND ${CMAKE_BINARY_DIR}/tests/cuda_unit_tests/${NAME}_cuda_tests) - -# Add environment variables for NVIDIA GPU selection. Useful for facilitating testing on multi gpu -# systems -set_property(TEST ${NAME}CudaTests PROPERTY ENVIRONMENT - "__NV_PRIME_RENDER_OFFLOAD=1" - "__GLX_VENDOR_LIBRARY_NAME=nvidia" -) +add_test(NAME ${NAME}CudaTests COMMAND ${CMAKE_BINARY_DIR}/tests/unit_tests/${NAME}_tests) diff --git a/tests/cuda_unit_tests/test_forces.cu b/tests/cuda_unit_tests/test_forces.cu deleted file mode 100644 index ca84e55..0000000 --- a/tests/cuda_unit_tests/test_forces.cu +++ /dev/null @@ -1,277 +0,0 @@ -#include -#include -#include -#include - -// Include your header files -#include "forces.cuh" -#include "pair_potentials.cuh" -#include "precision.hpp" - -class CudaKernelTest : public ::testing::Test { -protected: - void SetUp() override { - // Set up CUDA device - cudaError_t err = cudaSetDevice(0); - ASSERT_EQ(err, cudaSuccess) << "Failed to set CUDA device"; - } - - void TearDown() override { - // Clean up any remaining GPU memory - cudaDeviceReset(); - } - - // Helper function to check CUDA errors - void checkCudaError(cudaError_t err, const std::string &operation) { - ASSERT_EQ(err, cudaSuccess) - << "CUDA error in " << operation << ": " << cudaGetErrorString(err); - } - - // Helper function to allocate and copy data to GPU - template - T *allocateAndCopyToGPU(const std::vector &host_data) { - T *device_ptr; - size_t size = host_data.size() * sizeof(T); - checkCudaError(cudaMalloc(&device_ptr, size), "cudaMalloc"); - checkCudaError( - cudaMemcpy(device_ptr, host_data.data(), size, cudaMemcpyHostToDevice), - "cudaMemcpy H2D"); - return device_ptr; - } - - // Helper function to copy data from GPU and free GPU memory - template - std::vector copyFromGPUAndFree(T *device_ptr, size_t count) { - std::vector host_data(count); - size_t size = count * sizeof(T); - checkCudaError( - cudaMemcpy(host_data.data(), device_ptr, size, cudaMemcpyDeviceToHost), - "cudaMemcpy D2H"); - checkCudaError(cudaFree(device_ptr), "cudaFree"); - return host_data; - } -}; - -TEST_F(CudaKernelTest, BasicFunctionalityTest) { - const int n_particles = 4; - const real tolerance = 1e-5; - - // Set up test data - simple 2x2 grid of particles - std::vector positions = { - 0.0, 0.0, 0.0, // particle 0 - 1.0, 0.0, 0.0, // particle 1 - 0.0, 1.0, 0.0, // particle 2 - 1.0, 1.0, 0.0 // particle 3 - }; - - std::vector forces(3 * n_particles, 0.0); - std::vector energies(n_particles, 0.0); - std::vector box_dimensions = {10.0, 10.0, - 10.0}; // Large box to avoid PBC effects - - // Allocate GPU memory and copy data - real *d_positions = allocateAndCopyToGPU(positions); - real *d_forces = allocateAndCopyToGPU(forces); - real *d_energies = allocateAndCopyToGPU(energies); - real *d_box_len = allocateAndCopyToGPU(box_dimensions); - - // Create Lennard-Jones potential (sigma=1.0, epsilon=1.0, rcutoff=3.0) - LennardJones potential(1.0, 1.0, 3.0); - - // Launch kernel - dim3 blockSize(256); - dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x); - - CAC::calc_forces_and_energies<<>>( - d_positions, d_forces, d_energies, n_particles, d_box_len, potential); - - checkCudaError(cudaGetLastError(), "kernel launch"); - checkCudaError(cudaDeviceSynchronize(), "kernel execution"); - - // Copy results back to host - std::vector result_forces = - copyFromGPUAndFree(d_forces, 3 * n_particles); - std::vector result_energies = - copyFromGPUAndFree(d_energies, n_particles); - - // Clean up remaining GPU memory - checkCudaError(cudaFree(d_positions), "cudaFree positions"); - checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); - - // Verify results - forces should be non-zero and energies should be - // calculated - bool has_nonzero_force = false; - bool has_nonzero_energy = false; - - for (int i = 0; i < 3 * n_particles; i++) { - if (std::abs(result_forces[i]) > tolerance) { - has_nonzero_force = true; - break; - } - } - - for (int i = 0; i < n_particles; i++) { - if (std::abs(result_energies[i]) > tolerance) { - has_nonzero_energy = true; - break; - } - } - - EXPECT_FALSE(has_nonzero_force) - << "Expected non-zero forces between particles"; - EXPECT_TRUE(has_nonzero_energy) << "Expected non-zero energies for particles"; -} - -TEST_F(CudaKernelTest, PeriodicBoundaryConditionsTest) { - const int n_particles = 2; - const real tolerance = 1e-5; - - // Place particles near opposite edges of a small box - std::vector positions = { - 0.1, 0.0, 0.0, // particle 0 near left edge - 4.9, 0.0, 0.0 // particle 1 near right edge - }; - - std::vector forces(3 * n_particles, 0.0); - std::vector energies(n_particles, 0.0); - std::vector box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC - - // Allocate GPU memory and copy data - real *d_positions = allocateAndCopyToGPU(positions); - real *d_forces = allocateAndCopyToGPU(forces); - real *d_energies = allocateAndCopyToGPU(energies); - real *d_box_len = allocateAndCopyToGPU(box_dimensions); - - // Create Lennard-Jones potential with large cutoff to ensure interaction - LennardJones potential(1.0, 1.0, 3.0); - - // Launch kernel - dim3 blockSize(256); - dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x); - - CAC::calc_forces_and_energies<<>>( - d_positions, d_forces, d_energies, n_particles, d_box_len, potential); - - checkCudaError(cudaGetLastError(), "kernel launch"); - checkCudaError(cudaDeviceSynchronize(), "kernel execution"); - - // Copy results back to host - std::vector result_forces = - copyFromGPUAndFree(d_forces, 3 * n_particles); - std::vector result_energies = - copyFromGPUAndFree(d_energies, n_particles); - - checkCudaError(cudaFree(d_positions), "cudaFree positions"); - checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); - - // With PBC, particles should interact as if they're close (distance ~0.2) - // rather than far apart (distance ~4.8) - EXPECT_GT(std::abs(result_forces[0]), tolerance) - << "Expected significant force due to PBC"; - EXPECT_GT(std::abs(result_energies[0]), tolerance) - << "Expected significant energy due to PBC"; -} - -TEST_F(CudaKernelTest, SingleParticleTest) { - const int n_particles = 1; - - std::vector positions = {0.0, 0.0, 0.0}; - std::vector forces(3 * n_particles, 0.0); - std::vector energies(n_particles, 0.0); - std::vector box_dimensions = {10.0, 10.0, 10.0}; - - real *d_positions = allocateAndCopyToGPU(positions); - real *d_forces = allocateAndCopyToGPU(forces); - real *d_energies = allocateAndCopyToGPU(energies); - real *d_box_len = allocateAndCopyToGPU(box_dimensions); - - LennardJones potential(1.0, 1.0, 3.0); - - dim3 blockSize(256); - dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x); - - CAC::calc_forces_and_energies<<>>( - d_positions, d_forces, d_energies, n_particles, d_box_len, potential); - - checkCudaError(cudaGetLastError(), "kernel launch"); - checkCudaError(cudaDeviceSynchronize(), "kernel execution"); - - std::vector result_forces = - copyFromGPUAndFree(d_forces, 3 * n_particles); - std::vector result_energies = - copyFromGPUAndFree(d_energies, n_particles); - - checkCudaError(cudaFree(d_positions), "cudaFree positions"); - checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); - - // Single particle should have zero force and energy - EXPECT_NEAR(result_forces[0], 0.0, 1e-10); - EXPECT_NEAR(result_forces[1], 0.0, 1e-10); - EXPECT_NEAR(result_forces[2], 0.0, 1e-10); - EXPECT_NEAR(result_energies[0], 0.0, 1e-10); -} - -TEST_F(CudaKernelTest, ForceSymmetryTest) { - const int n_particles = 2; - const real tolerance = 1e-5; - - std::vector positions = { - 0.0, 0.0, 0.0, // particle 0 - 1.5, 0.0, 0.0 // particle 1 - }; - - std::vector forces(3 * n_particles, 0.0); - std::vector energies(n_particles, 0.0); - std::vector box_dimensions = {10.0, 10.0, 10.0}; - - real *d_positions = allocateAndCopyToGPU(positions); - real *d_forces = allocateAndCopyToGPU(forces); - real *d_energies = allocateAndCopyToGPU(energies); - real *d_box_len = allocateAndCopyToGPU(box_dimensions); - - LennardJones potential(1.0, 1.0, 3.0); - - dim3 blockSize(256); - dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x); - - CAC::calc_forces_and_energies<<>>( - d_positions, d_forces, d_energies, n_particles, d_box_len, potential); - - checkCudaError(cudaGetLastError(), "kernel launch"); - checkCudaError(cudaDeviceSynchronize(), "kernel execution"); - - std::vector result_forces = - copyFromGPUAndFree(d_forces, 3 * n_particles); - std::vector result_energies = - copyFromGPUAndFree(d_energies, n_particles); - - checkCudaError(cudaFree(d_positions), "cudaFree positions"); - checkCudaError(cudaFree(d_box_len), "cudaFree box_len"); - - // Newton's third law: forces should be equal and opposite - EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance) - << "Force x-components should be opposite"; - EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance) - << "Force y-components should be opposite"; - EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance) - << "Force z-components should be opposite"; - - // Energies should be equal for symmetric particles - EXPECT_NEAR(result_energies[0], result_energies[1], tolerance) - << "Energies should be equal"; -} - -// Main function to run tests -int main(int argc, char **argv) { - ::testing::InitGoogleTest(&argc, argv); - - // Check if CUDA is available - int deviceCount; - cudaError_t err = cudaGetDeviceCount(&deviceCount); - if (err != cudaSuccess || deviceCount == 0) { - std::cout << "No CUDA devices available. Skipping CUDA tests." << std::endl; - return 0; - } - - return RUN_ALL_TESTS(); -}