cudaCAC/tests/cuda_unit_tests/test_forces.cu

#include <cmath>
#include <cuda_runtime.h>
#include <gtest/gtest.h>
#include <vector>

// Include your header files
#include "forces.cuh"
#include "pair_potentials.cuh"
#include "precision.hpp"

class CudaKernelTest : public ::testing::Test {
protected:
  void SetUp() override {
    // Set up CUDA device
    cudaError_t err = cudaSetDevice(0);
    ASSERT_EQ(err, cudaSuccess) << "Failed to set CUDA device";
  }

  void TearDown() override {
    // Clean up any remaining GPU memory
    cudaDeviceReset();
  }

  // Helper function to check CUDA errors
  void checkCudaError(cudaError_t err, const std::string &operation) {
    ASSERT_EQ(err, cudaSuccess)
        << "CUDA error in " << operation << ": " << cudaGetErrorString(err);
  }

  // Helper function to allocate and copy data to GPU
  template <typename T>
  T *allocateAndCopyToGPU(const std::vector<T> &host_data) {
    T *device_ptr;
    size_t size = host_data.size() * sizeof(T);
    checkCudaError(cudaMalloc(&device_ptr, size), "cudaMalloc");
    checkCudaError(
        cudaMemcpy(device_ptr, host_data.data(), size, cudaMemcpyHostToDevice),
        "cudaMemcpy H2D");
    return device_ptr;
  }

  // Helper function to copy data from GPU and free GPU memory
  template <typename T>
  std::vector<T> copyFromGPUAndFree(T *device_ptr, size_t count) {
    std::vector<T> host_data(count);
    size_t size = count * sizeof(T);
    checkCudaError(
        cudaMemcpy(host_data.data(), device_ptr, size, cudaMemcpyDeviceToHost),
        "cudaMemcpy D2H");
    checkCudaError(cudaFree(device_ptr), "cudaFree");
    return host_data;
  }
};

TEST_F(CudaKernelTest, BasicFunctionalityTest) {
  const int n_particles = 4;
  const real tolerance = 1e-5;

  // Set up test data - simple 2x2 grid of particles
  std::vector<real> positions = {
      0.0, 0.0, 0.0, // particle 0
      1.0, 0.0, 0.0, // particle 1
      0.0, 1.0, 0.0, // particle 2
      1.0, 1.0, 0.0  // particle 3
  };

  std::vector<real> forces(3 * n_particles, 0.0);
  std::vector<real> energies(n_particles, 0.0);
  std::vector<real> box_dimensions = {10.0, 10.0,
                                      10.0}; // Large box to avoid PBC effects

  // Allocate GPU memory and copy data
  real *d_positions = allocateAndCopyToGPU(positions);
  real *d_forces = allocateAndCopyToGPU(forces);
  real *d_energies = allocateAndCopyToGPU(energies);
  real *d_box_len = allocateAndCopyToGPU(box_dimensions);

  // Create Lennard-Jones potential (sigma=1.0, epsilon=1.0, rcutoff=3.0)
  LennardJones potential(1.0, 1.0, 3.0);

  // Launch kernel
  dim3 blockSize(256);
  dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);

  CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
      d_positions, d_forces, d_energies, n_particles, d_box_len, potential);

  checkCudaError(cudaGetLastError(), "kernel launch");
  checkCudaError(cudaDeviceSynchronize(), "kernel execution");

  // Copy results back to host
  std::vector<real> result_forces =
      copyFromGPUAndFree(d_forces, 3 * n_particles);
  std::vector<real> result_energies =
      copyFromGPUAndFree(d_energies, n_particles);

  // Clean up remaining GPU memory
  checkCudaError(cudaFree(d_positions), "cudaFree positions");
  checkCudaError(cudaFree(d_box_len), "cudaFree box_len");

  // Verify results - forces should be non-zero and energies should be
  // calculated
  bool has_nonzero_force = false;
  bool has_nonzero_energy = false;

  for (int i = 0; i < 3 * n_particles; i++) {
    if (std::abs(result_forces[i]) > tolerance) {
      has_nonzero_force = true;
      break;
    }
  }

  for (int i = 0; i < n_particles; i++) {
    if (std::abs(result_energies[i]) > tolerance) {
      has_nonzero_energy = true;
      break;
    }
  }

  EXPECT_FALSE(has_nonzero_force)
      << "Expected non-zero forces between particles";
  EXPECT_TRUE(has_nonzero_energy) << "Expected non-zero energies for particles";
}

TEST_F(CudaKernelTest, PeriodicBoundaryConditionsTest) {
  const int n_particles = 2;
  const real tolerance = 1e-5;

  // Place particles near opposite edges of a small box
  std::vector<real> positions = {
      0.1, 0.0, 0.0, // particle 0 near left edge
      4.9, 0.0, 0.0  // particle 1 near right edge
  };

  std::vector<real> forces(3 * n_particles, 0.0);
  std::vector<real> energies(n_particles, 0.0);
  std::vector<real> box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC

  // Allocate GPU memory and copy data
  real *d_positions = allocateAndCopyToGPU(positions);
  real *d_forces = allocateAndCopyToGPU(forces);
  real *d_energies = allocateAndCopyToGPU(energies);
  real *d_box_len = allocateAndCopyToGPU(box_dimensions);

  // Create Lennard-Jones potential with large cutoff to ensure interaction
  LennardJones potential(1.0, 1.0, 3.0);

  // Launch kernel
  dim3 blockSize(256);
  dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);

  CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
      d_positions, d_forces, d_energies, n_particles, d_box_len, potential);

  checkCudaError(cudaGetLastError(), "kernel launch");
  checkCudaError(cudaDeviceSynchronize(), "kernel execution");

  // Copy results back to host
  std::vector<real> result_forces =
      copyFromGPUAndFree(d_forces, 3 * n_particles);
  std::vector<real> result_energies =
      copyFromGPUAndFree(d_energies, n_particles);

  checkCudaError(cudaFree(d_positions), "cudaFree positions");
  checkCudaError(cudaFree(d_box_len), "cudaFree box_len");

  // With PBC, particles should interact as if they're close (distance ~0.2)
  // rather than far apart (distance ~4.8)
  EXPECT_GT(std::abs(result_forces[0]), tolerance)
      << "Expected significant force due to PBC";
  EXPECT_GT(std::abs(result_energies[0]), tolerance)
      << "Expected significant energy due to PBC";
}

TEST_F(CudaKernelTest, SingleParticleTest) {
  const int n_particles = 1;

  std::vector<real> positions = {0.0, 0.0, 0.0};
  std::vector<real> forces(3 * n_particles, 0.0);
  std::vector<real> energies(n_particles, 0.0);
  std::vector<real> box_dimensions = {10.0, 10.0, 10.0};

  real *d_positions = allocateAndCopyToGPU(positions);
  real *d_forces = allocateAndCopyToGPU(forces);
  real *d_energies = allocateAndCopyToGPU(energies);
  real *d_box_len = allocateAndCopyToGPU(box_dimensions);

  LennardJones potential(1.0, 1.0, 3.0);

  dim3 blockSize(256);
  dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);

  CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
      d_positions, d_forces, d_energies, n_particles, d_box_len, potential);

  checkCudaError(cudaGetLastError(), "kernel launch");
  checkCudaError(cudaDeviceSynchronize(), "kernel execution");

  std::vector<real> result_forces =
      copyFromGPUAndFree(d_forces, 3 * n_particles);
  std::vector<real> result_energies =
      copyFromGPUAndFree(d_energies, n_particles);

  checkCudaError(cudaFree(d_positions), "cudaFree positions");
  checkCudaError(cudaFree(d_box_len), "cudaFree box_len");

  // Single particle should have zero force and energy
  EXPECT_NEAR(result_forces[0], 0.0, 1e-10);
  EXPECT_NEAR(result_forces[1], 0.0, 1e-10);
  EXPECT_NEAR(result_forces[2], 0.0, 1e-10);
  EXPECT_NEAR(result_energies[0], 0.0, 1e-10);
}

TEST_F(CudaKernelTest, ForceSymmetryTest) {
  const int n_particles = 2;
  const real tolerance = 1e-5;

  std::vector<real> positions = {
      0.0, 0.0, 0.0, // particle 0
      1.5, 0.0, 0.0  // particle 1
  };

  std::vector<real> forces(3 * n_particles, 0.0);
  std::vector<real> energies(n_particles, 0.0);
  std::vector<real> box_dimensions = {10.0, 10.0, 10.0};

  real *d_positions = allocateAndCopyToGPU(positions);
  real *d_forces = allocateAndCopyToGPU(forces);
  real *d_energies = allocateAndCopyToGPU(energies);
  real *d_box_len = allocateAndCopyToGPU(box_dimensions);

  LennardJones potential(1.0, 1.0, 3.0);

  dim3 blockSize(256);
  dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);

  CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
      d_positions, d_forces, d_energies, n_particles, d_box_len, potential);

  checkCudaError(cudaGetLastError(), "kernel launch");
  checkCudaError(cudaDeviceSynchronize(), "kernel execution");

  std::vector<real> result_forces =
      copyFromGPUAndFree(d_forces, 3 * n_particles);
  std::vector<real> result_energies =
      copyFromGPUAndFree(d_energies, n_particles);

  checkCudaError(cudaFree(d_positions), "cudaFree positions");
  checkCudaError(cudaFree(d_box_len), "cudaFree box_len");

  // Newton's third law: forces should be equal and opposite
  EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance)
      << "Force x-components should be opposite";
  EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance)
      << "Force y-components should be opposite";
  EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance)
      << "Force z-components should be opposite";

  // Energies should be equal for symmetric particles
  EXPECT_NEAR(result_energies[0], result_energies[1], tolerance)
      << "Energies should be equal";
}

// Main function to run tests
int main(int argc, char **argv) {
  ::testing::InitGoogleTest(&argc, argv);

  // Check if CUDA is available
  int deviceCount;
  cudaError_t err = cudaGetDeviceCount(&deviceCount);
  if (err != cudaSuccess || deviceCount == 0) {
    std::cout << "No CUDA devices available. Skipping CUDA tests." << std::endl;
    return 0;
  }

  return RUN_ALL_TESTS();
}
Add force calculation kernel and fix incorrect ctest configuration for Cuda tests 2025-08-27 22:07:47 -04:00			`#include <cmath>`
			`#include <cuda_runtime.h>`
			`#include <gtest/gtest.h>`
			`#include <vector>`

			`// Include your header files`
			`#include "forces.cuh"`
			`#include "pair_potentials.cuh"`
			`#include "precision.hpp"`

			`class CudaKernelTest : public ::testing::Test {`
			`protected:`
			`void SetUp() override {`
			`// Set up CUDA device`
			`cudaError_t err = cudaSetDevice(0);`
			`ASSERT_EQ(err, cudaSuccess) << "Failed to set CUDA device";`
			`}`

			`void TearDown() override {`
			`// Clean up any remaining GPU memory`
			`cudaDeviceReset();`
			`}`

			`// Helper function to check CUDA errors`
			`void checkCudaError(cudaError_t err, const std::string &operation) {`
			`ASSERT_EQ(err, cudaSuccess)`
			`<< "CUDA error in " << operation << ": " << cudaGetErrorString(err);`
			`}`

			`// Helper function to allocate and copy data to GPU`
			`template <typename T>`
			`T *allocateAndCopyToGPU(const std::vector<T> &host_data) {`
			`T *device_ptr;`
			`size_t size = host_data.size() * sizeof(T);`
			`checkCudaError(cudaMalloc(&device_ptr, size), "cudaMalloc");`
			`checkCudaError(`
			`cudaMemcpy(device_ptr, host_data.data(), size, cudaMemcpyHostToDevice),`
			`"cudaMemcpy H2D");`
			`return device_ptr;`
			`}`

			`// Helper function to copy data from GPU and free GPU memory`
			`template <typename T>`
			`std::vector<T> copyFromGPUAndFree(T *device_ptr, size_t count) {`
			`std::vector<T> host_data(count);`
			`size_t size = count * sizeof(T);`
			`checkCudaError(`
			`cudaMemcpy(host_data.data(), device_ptr, size, cudaMemcpyDeviceToHost),`
			`"cudaMemcpy D2H");`
			`checkCudaError(cudaFree(device_ptr), "cudaFree");`
			`return host_data;`
			`}`
			`};`

			`TEST_F(CudaKernelTest, BasicFunctionalityTest) {`
			`const int n_particles = 4;`
			`const real tolerance = 1e-5;`

			`// Set up test data - simple 2x2 grid of particles`
			`std::vector<real> positions = {`
			`0.0, 0.0, 0.0, // particle 0`
			`1.0, 0.0, 0.0, // particle 1`
			`0.0, 1.0, 0.0, // particle 2`
			`1.0, 1.0, 0.0 // particle 3`
			`};`

			`std::vector<real> forces(3 * n_particles, 0.0);`
			`std::vector<real> energies(n_particles, 0.0);`
			`std::vector<real> box_dimensions = {10.0, 10.0,`
			`10.0}; // Large box to avoid PBC effects`

			`// Allocate GPU memory and copy data`
			`real *d_positions = allocateAndCopyToGPU(positions);`
			`real *d_forces = allocateAndCopyToGPU(forces);`
			`real *d_energies = allocateAndCopyToGPU(energies);`
			`real *d_box_len = allocateAndCopyToGPU(box_dimensions);`

			`// Create Lennard-Jones potential (sigma=1.0, epsilon=1.0, rcutoff=3.0)`
			`LennardJones potential(1.0, 1.0, 3.0);`

			`// Launch kernel`
			`dim3 blockSize(256);`
			`dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);`

			`CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(`
			`d_positions, d_forces, d_energies, n_particles, d_box_len, potential);`

			`checkCudaError(cudaGetLastError(), "kernel launch");`
			`checkCudaError(cudaDeviceSynchronize(), "kernel execution");`

			`// Copy results back to host`
			`std::vector<real> result_forces =`
			`copyFromGPUAndFree(d_forces, 3 * n_particles);`
			`std::vector<real> result_energies =`
			`copyFromGPUAndFree(d_energies, n_particles);`

			`// Clean up remaining GPU memory`
			`checkCudaError(cudaFree(d_positions), "cudaFree positions");`
			`checkCudaError(cudaFree(d_box_len), "cudaFree box_len");`

			`// Verify results - forces should be non-zero and energies should be`
			`// calculated`
			`bool has_nonzero_force = false;`
			`bool has_nonzero_energy = false;`

			`for (int i = 0; i < 3 * n_particles; i++) {`
			`if (std::abs(result_forces[i]) > tolerance) {`
			`has_nonzero_force = true;`
			`break;`
			`}`
			`}`

			`for (int i = 0; i < n_particles; i++) {`
			`if (std::abs(result_energies[i]) > tolerance) {`
			`has_nonzero_energy = true;`
			`break;`
			`}`
			`}`

			`EXPECT_FALSE(has_nonzero_force)`
			`<< "Expected non-zero forces between particles";`
			`EXPECT_TRUE(has_nonzero_energy) << "Expected non-zero energies for particles";`
			`}`

			`TEST_F(CudaKernelTest, PeriodicBoundaryConditionsTest) {`
			`const int n_particles = 2;`
			`const real tolerance = 1e-5;`

			`// Place particles near opposite edges of a small box`
			`std::vector<real> positions = {`
			`0.1, 0.0, 0.0, // particle 0 near left edge`
			`4.9, 0.0, 0.0 // particle 1 near right edge`
			`};`

			`std::vector<real> forces(3 * n_particles, 0.0);`
			`std::vector<real> energies(n_particles, 0.0);`
			`std::vector<real> box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC`

			`// Allocate GPU memory and copy data`
			`real *d_positions = allocateAndCopyToGPU(positions);`
			`real *d_forces = allocateAndCopyToGPU(forces);`
			`real *d_energies = allocateAndCopyToGPU(energies);`
			`real *d_box_len = allocateAndCopyToGPU(box_dimensions);`

			`// Create Lennard-Jones potential with large cutoff to ensure interaction`
			`LennardJones potential(1.0, 1.0, 3.0);`

			`// Launch kernel`
			`dim3 blockSize(256);`
			`dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);`

			`CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(`
			`d_positions, d_forces, d_energies, n_particles, d_box_len, potential);`

			`checkCudaError(cudaGetLastError(), "kernel launch");`
			`checkCudaError(cudaDeviceSynchronize(), "kernel execution");`

			`// Copy results back to host`
			`std::vector<real> result_forces =`
			`copyFromGPUAndFree(d_forces, 3 * n_particles);`
			`std::vector<real> result_energies =`
			`copyFromGPUAndFree(d_energies, n_particles);`

			`checkCudaError(cudaFree(d_positions), "cudaFree positions");`
			`checkCudaError(cudaFree(d_box_len), "cudaFree box_len");`

			`// With PBC, particles should interact as if they're close (distance ~0.2)`
			`// rather than far apart (distance ~4.8)`
			`EXPECT_GT(std::abs(result_forces[0]), tolerance)`
			`<< "Expected significant force due to PBC";`
			`EXPECT_GT(std::abs(result_energies[0]), tolerance)`
			`<< "Expected significant energy due to PBC";`
			`}`

			`TEST_F(CudaKernelTest, SingleParticleTest) {`
			`const int n_particles = 1;`

			`std::vector<real> positions = {0.0, 0.0, 0.0};`
			`std::vector<real> forces(3 * n_particles, 0.0);`
			`std::vector<real> energies(n_particles, 0.0);`
			`std::vector<real> box_dimensions = {10.0, 10.0, 10.0};`

			`real *d_positions = allocateAndCopyToGPU(positions);`
			`real *d_forces = allocateAndCopyToGPU(forces);`
			`real *d_energies = allocateAndCopyToGPU(energies);`
			`real *d_box_len = allocateAndCopyToGPU(box_dimensions);`

			`LennardJones potential(1.0, 1.0, 3.0);`

			`dim3 blockSize(256);`
			`dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);`

			`CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(`
			`d_positions, d_forces, d_energies, n_particles, d_box_len, potential);`

			`checkCudaError(cudaGetLastError(), "kernel launch");`
			`checkCudaError(cudaDeviceSynchronize(), "kernel execution");`

			`std::vector<real> result_forces =`
			`copyFromGPUAndFree(d_forces, 3 * n_particles);`
			`std::vector<real> result_energies =`
			`copyFromGPUAndFree(d_energies, n_particles);`

			`checkCudaError(cudaFree(d_positions), "cudaFree positions");`
			`checkCudaError(cudaFree(d_box_len), "cudaFree box_len");`

			`// Single particle should have zero force and energy`
			`EXPECT_NEAR(result_forces[0], 0.0, 1e-10);`
			`EXPECT_NEAR(result_forces[1], 0.0, 1e-10);`
			`EXPECT_NEAR(result_forces[2], 0.0, 1e-10);`
			`EXPECT_NEAR(result_energies[0], 0.0, 1e-10);`
			`}`

			`TEST_F(CudaKernelTest, ForceSymmetryTest) {`
			`const int n_particles = 2;`
			`const real tolerance = 1e-5;`

			`std::vector<real> positions = {`
			`0.0, 0.0, 0.0, // particle 0`
			`1.5, 0.0, 0.0 // particle 1`
			`};`

			`std::vector<real> forces(3 * n_particles, 0.0);`
			`std::vector<real> energies(n_particles, 0.0);`
			`std::vector<real> box_dimensions = {10.0, 10.0, 10.0};`

			`real *d_positions = allocateAndCopyToGPU(positions);`
			`real *d_forces = allocateAndCopyToGPU(forces);`
			`real *d_energies = allocateAndCopyToGPU(energies);`
			`real *d_box_len = allocateAndCopyToGPU(box_dimensions);`

			`LennardJones potential(1.0, 1.0, 3.0);`

			`dim3 blockSize(256);`
			`dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);`

			`CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(`
			`d_positions, d_forces, d_energies, n_particles, d_box_len, potential);`

			`checkCudaError(cudaGetLastError(), "kernel launch");`
			`checkCudaError(cudaDeviceSynchronize(), "kernel execution");`

			`std::vector<real> result_forces =`
			`copyFromGPUAndFree(d_forces, 3 * n_particles);`
			`std::vector<real> result_energies =`
			`copyFromGPUAndFree(d_energies, n_particles);`

			`checkCudaError(cudaFree(d_positions), "cudaFree positions");`
			`checkCudaError(cudaFree(d_box_len), "cudaFree box_len");`

			`// Newton's third law: forces should be equal and opposite`
			`EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance)`
			`<< "Force x-components should be opposite";`
			`EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance)`
			`<< "Force y-components should be opposite";`
			`EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance)`
			`<< "Force z-components should be opposite";`

			`// Energies should be equal for symmetric particles`
			`EXPECT_NEAR(result_energies[0], result_energies[1], tolerance)`
			`<< "Energies should be equal";`
			`}`

			`// Main function to run tests`
			`int main(int argc, char **argv) {`
			`::testing::InitGoogleTest(&argc, argv);`

			`// Check if CUDA is available`
			`int deviceCount;`
			`cudaError_t err = cudaGetDeviceCount(&deviceCount);`
			`if (err != cudaSuccess \|\| deviceCount == 0) {`
			`std::cout << "No CUDA devices available. Skipping CUDA tests." << std::endl;`
			`return 0;`
			`}`

			`return RUN_ALL_TESTS();`
			`}`