From 68f8b02f0a10c12b1ce12539dea800bba1e72977 Mon Sep 17 00:00:00 2001
From: Alex Selimov <alex@alexselimov.com>
Date: Tue, 15 Apr 2025 14:10:01 -0400
Subject: [PATCH 01/10] Update to add Cuda to build system

---
 CMakeLists.txt         | 27 ++++++++++++++++++++-----
 kernels/CMakeLists.txt | 18 +++++++++++++++++
 kernels/hello_world.cu | 46 ++++++++++++++++++++++++++++++++++++++++++
 kernels/hello_world.h  | 10 +++++++++
 main.cpp               |  9 ++++++++-
 src/CMakeLists.txt     | 21 +++++++++----------
 src/test.cpp           |  4 ++++
 src/test.h             |  2 ++
 8 files changed, 120 insertions(+), 17 deletions(-)
 create mode 100644 kernels/CMakeLists.txt
 create mode 100644 kernels/hello_world.cu
 create mode 100644 kernels/hello_world.h
 create mode 100644 src/test.cpp
 create mode 100644 src/test.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 36cd7d8..f760070 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,17 +1,34 @@
 cmake_minimum_required(VERSION 3.9)
-project(MyProject)
+project(MyProject LANGUAGES CUDA CXX)
+
+add_compile_options(-Wall -Wextra -Wpedantic)
 
 set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CUDA_ARCHITECTURES 61)
+set(CUDA_SEPARABLE_COMPILATION ON)
+
 
-set(SOURCE_FILES main.cpp)
-add_executable(${CMAKE_PROJECT_NAME}_run ${SOURCE_FILES})
 
 include_directories(src)
+include_directories(kernels)
+include_directories(/usr/local/cuda-12.8/include)
+
 
 add_subdirectory(src)
+add_subdirectory(kernels)
 add_subdirectory(tests)
 
-target_link_libraries(${CMAKE_PROJECT_NAME}_run ${CMAKE_PROJECT_NAME}_lib)
+add_executable(${CMAKE_PROJECT_NAME}_run main.cpp)
+
+
+
+target_link_libraries(
+    ${CMAKE_PROJECT_NAME}_run 
+    PRIVATE
+    ${CMAKE_PROJECT_NAME}_lib 
+    ${CMAKE_PROJECT_NAME}_cuda_lib 
+    ${CUDA_LIBRARIES}
+)
 
 # Doxygen Build
 option(BUILD_DOC "Build Documentation" ON)
@@ -35,4 +52,4 @@ if(DOXYGEN_FOUND)
             VERBATIM)
 else(DOXYGEN_FOUND)
     message("Doxygen needs to be installed to generate the documentation.")
-endif(DOXYGEN_FOUND)
\ No newline at end of file
+endif(DOXYGEN_FOUND)
diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt
new file mode 100644
index 0000000..f9a1174
--- /dev/null
+++ b/kernels/CMakeLists.txt
@@ -0,0 +1,18 @@
+project(${CMAKE_PROJECT_NAME}_cuda_lib CUDA CXX)
+
+set(HEADER_FILES
+    hello_world.h
+)
+set(SOURCE_FILES
+    hello_world.cu
+)
+
+# The library contains header and source files.
+add_library(${CMAKE_PROJECT_NAME}_cuda_lib STATIC
+    ${SOURCE_FILES}
+    ${HEADER_FILES}
+)
+
+if(CMAKE_COMPILER_IS_GNUCXX)
+  target_compile_options(${CMAKE_PROJECT_NAME}_cuda_lib PRIVATE -Wno-gnu-line-marker)
+endif()
diff --git a/kernels/hello_world.cu b/kernels/hello_world.cu
new file mode 100644
index 0000000..7c65115
--- /dev/null
+++ b/kernels/hello_world.cu
@@ -0,0 +1,46 @@
+#include <cuda_runtime.h>
+#include <stdio.h>
+
+__global__ void hello_cuda() {
+  printf("Hello CUDA from thread %d\n", threadIdx.x);
+}
+
+extern "C" void launch_hello_cuda() {
+  // First check device properties
+  cudaDeviceProp prop;
+  cudaGetDeviceProperties(&prop, 1);
+  printf("Using device: %s with compute capability %d.%d\n", prop.name,
+         prop.major, prop.minor);
+
+  hello_cuda<<<1, 10>>>();
+  cudaDeviceSynchronize();
+  fflush(stdout);
+}
+
+extern "C" void check_cuda() {
+  int deviceCount = 0;
+  cudaError_t error = cudaGetDeviceCount(&deviceCount);
+
+  if (error != cudaSuccess) {
+    printf("CUDA error: %s\n", cudaGetErrorString(error));
+  }
+
+  printf("Found %d CUDA devices\n", deviceCount);
+
+  for (int i = 0; i < deviceCount; i++) {
+    cudaDeviceProp prop;
+    cudaGetDeviceProperties(&prop, i);
+
+    printf("Device %d: %s\n", i, prop.name);
+    printf("  Compute capability: %d.%d\n", prop.major, prop.minor);
+    printf("  Total global memory: %.2f GB\n",
+           static_cast<float>(prop.totalGlobalMem) / (1024 * 1024 * 1024));
+    printf("  Multiprocessors: %d\n", prop.multiProcessorCount);
+    printf("  Max threads per block: %d\n", prop.maxThreadsPerBlock);
+    printf("  Max threads dimensions: (%d, %d, %d)\n", prop.maxThreadsDim[0],
+           prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
+    printf("  Max grid dimensions: (%d, %d, %d)\n", prop.maxGridSize[0],
+           prop.maxGridSize[1], prop.maxGridSize[2]);
+    printf("\n");
+  }
+}
diff --git a/kernels/hello_world.h b/kernels/hello_world.h
new file mode 100644
index 0000000..4024e2e
--- /dev/null
+++ b/kernels/hello_world.h
@@ -0,0 +1,10 @@
+#ifndef HELLO_WORLD_CU_H
+#define HELLO_WORLD_CU_H
+
+extern "C" {
+// Declaration of the CUDA function that will be called from C++
+void launch_hello_cuda();
+void check_cuda();
+}
+
+#endif // HELLO_WORLD_CU_H
diff --git a/main.cpp b/main.cpp
index d269c89..e3c8734 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1,3 +1,10 @@
+#include "hello_world.h"
+#include <iostream>
+
 int main() {
+  std::cout << "Starting CUDA example..." << std::endl; // Using endl to flush
+  check_cuda();
+  launch_hello_cuda();
+  std::cout << "Ending CUDA example" << std::endl; // Using endl to flush
   return 0;
-}
\ No newline at end of file
+}
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 94717aa..6c6cd8f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,17 +1,16 @@
-project(${CMAKE_PROJECT_NAME}_lib)
+project(${CMAKE_PROJECT_NAME}_lib CUDA CXX)
 
 set(HEADER_FILES
+    ./test.h
 )
 set(SOURCE_FILES
+    ./test.cpp
 )
 
-if (EXISTS ${SOURCE_FILES})
-    # The library contains header and source files.
-    add_library(${CMAKE_PROJECT_NAME}_lib STATIC
-        ${SOURCE_FILES}
-        ${HEADER_FILES}
-    )
-else()
-    # The library only contains header files.
-    add_library(${CMAKE_PROJECT_NAME}_lib INTERFACE)
-endif()
\ No newline at end of file
+# The library contains header and source files.
+add_library(${CMAKE_PROJECT_NAME}_lib
+    ${SOURCE_FILES}
+    ${HEADER_FILES}
+)
+
+target_include_directories(${CMAKE_PROJECT_NAME}_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
diff --git a/src/test.cpp b/src/test.cpp
new file mode 100644
index 0000000..eec7934
--- /dev/null
+++ b/src/test.cpp
@@ -0,0 +1,4 @@
+#include "test.h"
+#include <iostream>
+
+void test_hello() { std::cout << "Hello!"; }
diff --git a/src/test.h b/src/test.h
new file mode 100644
index 0000000..b9c7ab1
--- /dev/null
+++ b/src/test.h
@@ -0,0 +1,2 @@
+#include <iostream>
+void test_hello();

From 942caf0f15c2449a2b86b15995d2ed496cf1e228 Mon Sep 17 00:00:00 2001
From: Alex Selimov <alex@alexselimov.com>
Date: Tue, 15 Apr 2025 14:11:34 -0400
Subject: [PATCH 02/10] Update README

---
 README.md | 70 +------------------------------------------------------
 1 file changed, 1 insertion(+), 69 deletions(-)

diff --git a/README.md b/README.md
index 816511c..9236f1b 100644
--- a/README.md
+++ b/README.md
@@ -6,75 +6,7 @@ following components:
 
 - Directory Structure
 - Make Build (CMake)
+- CUDA integration
 - Unit Test Framework (Google Test)
 - API Documentation (Doxygen)
 
-Feel free to fork this repository and tailor it to suit you.
-
-## Procedure
-1. Download Bash script to create new C++ projects 
-    ```bash
-    curl -O https://raw.githubusercontent.com/TimothyHelton/cpp_project_template/master/new_cpp_project.sh
-    chmod u+x new_cpp_project.sh
-    ```
-1. Create new C++ project
-    ```bash
-    ./new_cpp_project.sh NewProjectName
-    ```
-1. In the project top level **CMakeLists.txt**:
-    1. Line 2: Change the variable **MyProject** to the name of your project.
-        ```cmake
-        project(NewProject)
-        ```
-        - This variable will be used in a couple of different places.
-            - MyProject_run: will be the main executable name
-            - MyProject_lib: will be the project library name
-    1. Line 4: Set the version of C++ to use.  For example, let's set up the
-    NewProject to use C++ 11.
-        ```cmake
-        set(CMAKE_CXX_STANDARD 11)
-        ```
-1. Update project name and description in the `Doxyfile` located in the `docs`
-directory.
-    1. Update line `PROJECT_NAME`
-        1. This name will appear on each documentation page.
-    1. Update line `PROJECT_NUMBER`
-        1. This is the version number of your project.
-    1. Update line `PROJECT_BRIEF`
-        1. Any text entered here will also appear on each documentation page.
-        Try not to make this one too long.
-1. Reload the top CMake file.
-
-## CLION IDE Specific Instructions
-I started using an IDE from [JET Brains](https://www.jetbrains.com/) tailored
-for Python called [PyCharm](https://www.jetbrains.com/pycharm/) and thought
-it helped me write better code.
-I'd been wanting to learn C++ and decided to give JET Brains C/C++ IDE called
-[CLion](https://www.jetbrains.com/clion/) a try.
-The code completion, interactive suggestions, debugger, introspection tools,
-and built-in test execution are very handy.
-There are a couple extra details to set when using this IDE.
-
-1. The IDE allows you to mark directories with their desired purpose.
-To mark a directory right click on the directory name in the `Project` window
-and select `Mark Directory as` from the drop-down menu.
-    1. Mark the `src` directory as `Project Sources and Headers`
-    1. Mark the `tests/lib/googletest` directory as  `Library Files`
-1. Setup the `Run/Debug Configuration` by selecting `Edit Configurations...`
-from the pull-down menu from the run button (green triangle) in the upper right
-corner.
-    1. Update Doxygen Build to execute the unit test suite.
-        1. Select Doxygen from the Application menu on the left.
-        1. Choose the **executable** for Doxygen to be `Unit_Tests_run`.
-    1. Create a `Google Test` configuration
-        1. In the upper left corner select the plus symbol.
-        1. Chose `Google Test` from the drop-down menu.
-        1. Set **Name** to `Unit Tests`.
-        1. Set **Target** to `Unit_Tests_run`.
-
-## Wrap Up
-That should be all it takes to start writing code.
-If you find any issues or bugs with this repository please file an issue on
-[GitHub](https://github.com/TimothyHelton/cpp_project_template/issues).
-
-Hope you find this template useful and enjoy learning C++!
\ No newline at end of file

From a65149a619558b10f1a2ffe54b175c40c4d8b933 Mon Sep 17 00:00:00 2001
From: Alex Selimov <alex@alexselimov.com>
Date: Tue, 15 Apr 2025 14:11:55 -0400
Subject: [PATCH 03/10] Remove unneeded shell files

---
 create_project.sh  | 33 ---------------------------------
 new_cpp_project.sh | 20 --------------------
 2 files changed, 53 deletions(-)
 delete mode 100644 create_project.sh
 delete mode 100644 new_cpp_project.sh

diff --git a/create_project.sh b/create_project.sh
deleted file mode 100644
index b92cf96..0000000
--- a/create_project.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/env bash
-
-# Exit if name argument is not given
-if [ -z "$*" ]; then
-    echo "A project name argument must be provided."
-    exit 0
-fi
-
-NAME=$1
-
-
-################################################################################
-
-
-# Clone template repository
-git clone https://github.com/TimothyHelton/cpp_project_template
-
-# Create bare repository
-git --bare init ${NAME}
-
-# Push template master branch to bare repository
-cd cpp_project_template
-git push ../${NAME} +master:master
-
-# Convert bare repository into a normal repository
-cd ../${NAME}
-mkdir .git
-mv * .git
-git config --local --bool core.bare false
-git reset --hard
-
-# Clean Up
-rm -rf ../cpp_project_template ../create_project.sh
\ No newline at end of file
diff --git a/new_cpp_project.sh b/new_cpp_project.sh
deleted file mode 100644
index 31924f5..0000000
--- a/new_cpp_project.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/env bash
-
-# Exit if name argument is not given
-if [ -z "$*" ]; then
-    echo "A project name argument must be provided."
-    exit 0
-fi
-
-NAME=$1
-
-
-################################################################################
-
-
-# Download latest version of the build file
-curl -O https://raw.githubusercontent.com/TimothyHelton/cpp_project_template/master/create_project.sh
-chmod u+x create_project.sh
-
-# Create Project
-./create_project.sh ${NAME}
\ No newline at end of file

From dfd6f43e9b5c280608957839b6a5a3ecba7c2866 Mon Sep 17 00:00:00 2001
From: Alex Selimov <alex@alexselimov.com>
Date: Wed, 16 Apr 2025 08:12:35 -0400
Subject: [PATCH 04/10] Properly include Vec3

---
 CMakeLists.txt | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f760070..5034e31 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,32 +1,46 @@
 cmake_minimum_required(VERSION 3.9)
-project(MyProject LANGUAGES CUDA CXX)
+project(cudaCAC LANGUAGES CUDA CXX)
 
+enable_testing()
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+# Default settings 
 add_compile_options(-Wall -Wextra -Wpedantic)
 
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CUDA_ARCHITECTURES 61)
 set(CUDA_SEPARABLE_COMPILATION ON)
 
+# Add Vec3  as a dependency
+add_subdirectory(tests)
+include(FetchContent)
+FetchContent_Declare(Vec3
+    GIT_REPOSITORY https://www.alexselimov.com/git/aselimov/Vec3.git
+)
 
+FetchContent_GetProperties(Vec3)
+if(NOT Vec3_POPULATED)
+    FetchContent_MakeAvailable(Vec3)
+    include_directories(${Vec3_SOURCE_DIR})
+endif()
 
 include_directories(src)
 include_directories(kernels)
 include_directories(/usr/local/cuda-12.8/include)
 
-
 add_subdirectory(src)
 add_subdirectory(kernels)
-add_subdirectory(tests)
-
-add_executable(${CMAKE_PROJECT_NAME}_run main.cpp)
 
+add_executable(${CMAKE_PROJECT_NAME} main.cpp)
+install(DIRECTORY src/ DESTINATION src/)
 
 
 target_link_libraries(
-    ${CMAKE_PROJECT_NAME}_run 
+    ${CMAKE_PROJECT_NAME} 
     PRIVATE
     ${CMAKE_PROJECT_NAME}_lib 
     ${CMAKE_PROJECT_NAME}_cuda_lib 
+    
     ${CUDA_LIBRARIES}
 )
 

From 6162b27a89b4f938c6a6848870e9b4852a70c352 Mon Sep 17 00:00:00 2001
From: Alex Selimov <alex@alexselimov.com>
Date: Wed, 16 Apr 2025 14:01:40 -0400
Subject: [PATCH 05/10] Add pair potential and tests

---
 src/CMakeLists.txt                  |  13 ++-
 src/box.hpp                         |  22 ++++
 src/pair_potentials.cpp             |  33 ++++++
 src/pair_potentials.hpp             |  41 +++++++
 src/particle.hpp                    |  18 +++
 src/precision.hpp                   |  15 +++
 src/simulation.hpp                  |  17 +++
 src/test.cpp                        |   4 -
 src/test.h                          |   2 -
 tests/unit_tests/CMakeLists.txt     |   5 +-
 tests/unit_tests/test_potential.cpp | 174 ++++++++++++++++++++++++++++
 11 files changed, 330 insertions(+), 14 deletions(-)
 create mode 100644 src/box.hpp
 create mode 100644 src/pair_potentials.cpp
 create mode 100644 src/pair_potentials.hpp
 create mode 100644 src/particle.hpp
 create mode 100644 src/precision.hpp
 create mode 100644 src/simulation.hpp
 delete mode 100644 src/test.cpp
 delete mode 100644 src/test.h
 create mode 100644 tests/unit_tests/test_potential.cpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 6c6cd8f..a0cc382 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,16 +1,17 @@
 project(${CMAKE_PROJECT_NAME}_lib CUDA CXX)
 
 set(HEADER_FILES
-    ./test.h
+    particle.hpp
+    simulation.hpp
+    box.hpp
+    pair_potentials.hpp
 )
 set(SOURCE_FILES
-    ./test.cpp
+    pair_potentials.cpp
 )
 
 # The library contains header and source files.
-add_library(${CMAKE_PROJECT_NAME}_lib
+add_library(${CMAKE_PROJECT_NAME}_lib 
+    ${HEADER_FILES} 
     ${SOURCE_FILES}
-    ${HEADER_FILES}
 )
-
-target_include_directories(${CMAKE_PROJECT_NAME}_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
diff --git a/src/box.hpp b/src/box.hpp
new file mode 100644
index 0000000..816b53e
--- /dev/null
+++ b/src/box.hpp
@@ -0,0 +1,22 @@
+#ifndef BOX_H
+#define BOX_H
+
+/**
+ * Struct representing the simulation box.
+ * Currently the simulation box is always assumed to be perfectly rectangular.
+ * This code does not support shearing the box. This functionality may be added
+ * in later.
+ */
+template <typename T> struct Box {
+  T xlo;
+  T xhi;
+  T ylo;
+  T yhi;
+  T zlo;
+  T zhi;
+  bool x_is_periodic;
+  bool y_is_periodic;
+  bool z_is_periodic;
+};
+
+#endif
diff --git a/src/pair_potentials.cpp b/src/pair_potentials.cpp
new file mode 100644
index 0000000..fc5f6ba
--- /dev/null
+++ b/src/pair_potentials.cpp
@@ -0,0 +1,33 @@
+#include "potentials.hpp"
+#include <cmath>
+
+/**
+ * Calculate the Lennard-Jones energy and force for the current particle pair
+ * described by displacement vector r
+ */
+ForceAndEnergy LennardJones::calc_force_and_energy(Vec3<real> r) {
+  real rmagsq = r.squared_norm2();
+  if (rmagsq < this->m_rcutoffsq && rmagsq > 0.0) {
+    real inv_rmag = 1 / std::sqrt(rmagsq);
+
+    // Pre-Compute the terms (doing this saves on multiple devisions/pow
+    // function call)
+    real sigma_r = m_sigma / inv_rmag;
+    real sigma_r5 = sigma_r * sigma_r * sigma_r * sigma_r * sigma_r;
+    real sigma_r6 = sigma_r5 * sigma_r;
+    real sigma_r11 = sigma_r5 * sigma_r5 * sigma_r;
+    real sigma_r12 = sigma_r6 * sigma_r6;
+
+    // Get the energy
+    real energy = 4.0 * m_epsilon * (sigma_r12 - sigma_r6);
+
+    // Get the force vector
+    real force_mag = 4.0 * m_epsilon * (6.0 * sigma_r5 - 12.0 * sigma_r11);
+    Vec3<real> force = r.scale(force_mag * inv_rmag);
+
+    return {energy, force};
+
+  } else {
+    return ForceAndEnergy::zero();
+  }
+};
diff --git a/src/pair_potentials.hpp b/src/pair_potentials.hpp
new file mode 100644
index 0000000..13d98c7
--- /dev/null
+++ b/src/pair_potentials.hpp
@@ -0,0 +1,41 @@
+#ifndef POTENTIALS_H
+#define POTENTIALS_H
+
+#include "precision.hpp"
+#include "vec3.h"
+
+/**
+ * Result struct for the Pair Potential
+ */
+struct ForceAndEnergy {
+  real energy;
+  Vec3<real> force;
+
+  inline static ForceAndEnergy zero() { return {0.0, {0.0, 0.0, 0.0}}; };
+};
+
+/**
+ * Abstract implementation of a Pair Potential.
+ * Pair potentials are potentials which depend solely on the distance
+ * between two particles. These do not include multi-body potentials such as
+ * EAM
+ *
+ */
+struct PairPotential {
+  real m_rcutoffsq;
+
+  /**
+   * Calculate the force and energy for a specific atom pair based on a
+   * displacement vector r.
+   */
+  virtual ForceAndEnergy calc_force_and_energy(Vec3<real> r) = 0;
+};
+
+struct LennardJones : PairPotential {
+  real m_epsilon;
+  real m_sigma;
+
+  ForceAndEnergy calc_force_and_energy(Vec3<real> r);
+};
+
+#endif
diff --git a/src/particle.hpp b/src/particle.hpp
new file mode 100644
index 0000000..84fd9b8
--- /dev/null
+++ b/src/particle.hpp
@@ -0,0 +1,18 @@
+#ifndef PARTICLE_H
+#define PARTICLE_H
+
+#include "vec3.h"
+
+/**
+ * Class representing a single molecular dynamics particle.
+ * This class is only used on the host side of the code and is converted
+ * to the device arrays.
+ */
+template <typename T = float> struct Particle {
+  Vec3<T> pos;
+  Vec3<T> vel;
+  Vec3<T> force;
+  T mass;
+};
+
+#endif
diff --git a/src/precision.hpp b/src/precision.hpp
new file mode 100644
index 0000000..c132c09
--- /dev/null
+++ b/src/precision.hpp
@@ -0,0 +1,15 @@
+#ifndef PRECISION_H
+#define PRECISION_H
+
+#ifdef USE_FLOATS
+
+/*
+ * If macro USE_FLOATS is set then the default type will be floating point
+ * precision. Otherwise we use double precision by default
+ */
+typedef float real;
+#else
+typedef double real;
+#endif
+
+#endif
diff --git a/src/simulation.hpp b/src/simulation.hpp
new file mode 100644
index 0000000..5b468b9
--- /dev/null
+++ b/src/simulation.hpp
@@ -0,0 +1,17 @@
+#ifndef SIMULATION_H
+#define SIMULATION_H
+
+#include "box.hpp"
+#include "particle.hpp"
+#include <vector>
+
+template <typename T> class Simulation {
+  // Simulation State variables
+  T timestep;
+  Box<T> box;
+
+  // Host Data
+  std::vector<Particle<T>> particles;
+};
+
+#endif
diff --git a/src/test.cpp b/src/test.cpp
deleted file mode 100644
index eec7934..0000000
--- a/src/test.cpp
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "test.h"
-#include <iostream>
-
-void test_hello() { std::cout << "Hello!"; }
diff --git a/src/test.h b/src/test.h
deleted file mode 100644
index b9c7ab1..0000000
--- a/src/test.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#include <iostream>
-void test_hello();
diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt
index e7273a3..f13ed79 100644
--- a/tests/unit_tests/CMakeLists.txt
+++ b/tests/unit_tests/CMakeLists.txt
@@ -1,8 +1,9 @@
 include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR})
 
 add_executable(Unit_Tests_run
-    test_example.cpp
+    test_potential.cpp
 )
 
 target_link_libraries(Unit_Tests_run gtest gtest_main)
-target_link_libraries(Unit_Tests_run ${CMAKE_PROJECT_NAME}_lib)
\ No newline at end of file
+target_link_libraries(Unit_Tests_run ${CMAKE_PROJECT_NAME}_lib)
+add_test(Name Tests COMMAND Unit_Tests_run)
diff --git a/tests/unit_tests/test_potential.cpp b/tests/unit_tests/test_potential.cpp
new file mode 100644
index 0000000..429e903
--- /dev/null
+++ b/tests/unit_tests/test_potential.cpp
@@ -0,0 +1,174 @@
+#include "potentials.hpp"
+#include "precision.hpp"
+#include "gtest/gtest.h"
+#include <cmath>
+
+class LennardJonesTest : public ::testing::Test {
+protected:
+  void SetUp() override {
+    // Default parameters
+    sigma = 1.0;
+    epsilon = 1.0;
+    rCutoff = 2.5;
+
+    // Create default LennardJones object
+    lj = new LennardJones(sigma, epsilon, rCutoff);
+  }
+
+  void TearDown() override { delete lj; }
+
+  real sigma;
+  real epsilon;
+  real rCutoff;
+  LennardJones *lj;
+
+  // Helper function to compare Vec3 values with tolerance
+  void expectVec3Near(const Vec3<real> &expected, const Vec3<real> &actual,
+                      real tolerance) {
+    EXPECT_NEAR(expected.x, actual.x, tolerance);
+    EXPECT_NEAR(expected.y, actual.y, tolerance);
+    EXPECT_NEAR(expected.z, actual.z, tolerance);
+  }
+};
+
+TEST_F(LennardJonesTest, ZeroDistance) {
+  // At zero distance, the calculation should return zero force and energy
+  Vec3<real> r(0.0, 0.0, 0.0);
+  auto result = lj->calc_force_and_energy(r);
+
+  EXPECT_EQ(0.0, result.energy);
+  expectVec3Near(Vec3<real>(0.0, 0.0, 0.0), result.force, 1e-10);
+}
+
+TEST_F(LennardJonesTest, BeyondCutoff) {
+  // Distance beyond cutoff should return zero force and energy
+  Vec3<real> r(3.0, 0.0, 0.0); // 3.0 > rCutoff (2.5)
+  auto result = lj->calc_force_and_energy(r);
+
+  EXPECT_EQ(0.0, result.energy);
+  expectVec3Near(Vec3<real>(0.0, 0.0, 0.0), result.force, 1e-10);
+}
+
+TEST_F(LennardJonesTest, AtMinimum) {
+  // The LJ potential has a minimum at r = 2^(1/6) * sigma
+  real min_dist = std::pow(2.0, 1.0 / 6.0) * sigma;
+  Vec3<real> r(min_dist, 0.0, 0.0);
+  auto result = lj->calc_force_and_energy(r);
+
+  // At minimum, force should be close to zero
+  EXPECT_NEAR(-epsilon, result.energy, 1e-10);
+  expectVec3Near(Vec3<real>(0.0, 0.0, 0.0), result.force, 1e-10);
+}
+
+TEST_F(LennardJonesTest, AtEquilibrium) {
+  // At r = sigma, the energy should be zero and force should be repulsive
+  Vec3<real> r(sigma, 0.0, 0.0);
+  auto result = lj->calc_force_and_energy(r);
+
+  EXPECT_NEAR(0.0, result.energy, 1e-10);
+  EXPECT_GT(result.force.x,
+            0.0); // Force should be repulsive (positive x-direction)
+  EXPECT_NEAR(0.0, result.force.y, 1e-10);
+  EXPECT_NEAR(0.0, result.force.z, 1e-10);
+}
+
+TEST_F(LennardJonesTest, RepulsiveRegion) {
+  // Test in the repulsive region (r < sigma)
+  Vec3<real> r(0.8 * sigma, 0.0, 0.0);
+  auto result = lj->calc_force_and_energy(r);
+
+  // Energy should be positive and force should be repulsive
+  EXPECT_GT(result.energy, 0.0);
+  EXPECT_GT(result.force.x, 0.0); // Force should be repulsive
+}
+
+TEST_F(LennardJonesTest, AttractiveRegion) {
+  // Test in the attractive region (sigma < r < r_min)
+  Vec3<real> r(1.5 * sigma, 0.0, 0.0);
+  auto result = lj->calc_force_and_energy(r);
+
+  // Energy should be negative and force should be attractive
+  EXPECT_LT(result.energy, 0.0);
+  EXPECT_LT(result.force.x,
+            0.0); // Force should be attractive (negative x-direction)
+}
+
+TEST_F(LennardJonesTest, ArbitraryDirection) {
+  // Test with a vector in an arbitrary direction
+  Vec3<real> r(1.0, 1.0, 1.0);
+  auto result = lj->calc_force_and_energy(r);
+
+  // The force should be in the same direction as r but opposite sign
+  // (attractive region)
+  real rmag = std::sqrt(r.squared_norm2());
+
+  // Calculate expected force direction (should be along -r)
+  Vec3<real> normalized_r = r.scale(1.0 / rmag);
+  real force_dot_r = result.force.x * normalized_r.x +
+                     result.force.y * normalized_r.y +
+                     result.force.z * normalized_r.z;
+
+  // In this case, we're at r = sqrt(3) * sigma which is in attractive region
+  EXPECT_LT(force_dot_r, 0.0); // Force should be attractive
+
+  // Force should be symmetric in all dimensions for this vector
+  EXPECT_NEAR(result.force.x, result.force.y, 1e-10);
+  EXPECT_NEAR(result.force.y, result.force.z, 1e-10);
+}
+
+TEST_F(LennardJonesTest, ParameterVariation) {
+  // Test with different parameter values
+  real new_sigma = 2.0;
+  real new_epsilon = 0.5;
+  real new_rCutoff = 5.0;
+
+  LennardJones lj2(new_sigma, new_epsilon, new_rCutoff);
+
+  Vec3<real> r(2.0, 0.0, 0.0);
+  auto result1 = lj->calc_force_and_energy(r);
+  auto result2 = lj2.calc_force_and_energy(r);
+
+  // Results should be different with different parameters
+  EXPECT_NE(result1.energy, result2.energy);
+  EXPECT_NE(result1.force.x, result2.force.x);
+}
+
+TEST_F(LennardJonesTest, ExactValueCheck) {
+  // Test with pre-calculated values for a specific case
+  LennardJones lj_exact(1.0, 1.0, 3.0);
+  Vec3<real> r(1.5, 0.0, 0.0);
+  auto result = lj_exact.calc_force_and_energy(r);
+
+  // Pre-calculated values (you may need to adjust these based on your specific
+  // implementation)
+  real expected_energy =
+      4.0 * (std::pow(1.0 / 1.5, 12) - std::pow(1.0 / 1.5, 6));
+  real expected_force =
+      24.0 * (std::pow(1.0 / 1.5, 6) - 2.0 * std::pow(1.0 / 1.5, 12)) / 1.5;
+
+  EXPECT_NEAR(expected_energy, result.energy, 1e-10);
+  EXPECT_NEAR(-expected_force, result.force.x,
+              1e-10); // Negative because force is attractive
+  EXPECT_NEAR(0.0, result.force.y, 1e-10);
+  EXPECT_NEAR(0.0, result.force.z, 1e-10);
+}
+
+TEST_F(LennardJonesTest, NearCutoff) {
+  // Test behavior just inside and just outside the cutoff
+  real inside_cutoff = rCutoff - 0.01;
+  real outside_cutoff = rCutoff + 0.01;
+
+  Vec3<real> r_inside(inside_cutoff, 0.0, 0.0);
+  Vec3<real> r_outside(outside_cutoff, 0.0, 0.0);
+
+  auto result_inside = lj->calc_force_and_energy(r_inside);
+  auto result_outside = lj->calc_force_and_energy(r_outside);
+
+  // Inside should have non-zero values
+  EXPECT_NE(0.0, result_inside.energy);
+  EXPECT_NE(0.0, result_inside.force.x);
+
+  // Outside should be zero
+  EXPECT_EQ(0.0, result_outside.energy);
+  expectVec3Near(Vec3<real>(0.0, 0.0, 0.0), result_outside.force, 1e-10);
+}

From f15eb0cf515af8d8c73893de435f44f8695b2d91 Mon Sep 17 00:00:00 2001
From: Alex Selimov <alex@alexselimov.com>
Date: Wed, 16 Apr 2025 23:06:50 -0400
Subject: [PATCH 06/10] Update CMakeFiles, add initial pair potential
 implementation and tests

---
 .gitignore                          |  5 +++
 CMakeLists.txt                      | 13 +++----
 kernels/CMakeLists.txt              |  8 ++---
 main.cpp                            | 10 +++---
 src/CMakeLists.txt                  |  4 +--
 src/pair_potentials.cpp             | 12 +++----
 src/pair_potentials.hpp             |  8 +++++
 tests/unit_tests/CMakeLists.txt     |  8 ++---
 tests/unit_tests/test_potential.cpp | 54 ++++++++++++++---------------
 9 files changed, 67 insertions(+), 55 deletions(-)

diff --git a/.gitignore b/.gitignore
index fd57df9..de19d1d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,7 @@
 # Builds
 build/
+Debug/
+Testing/
 
 # Google Tests
 tests/lib/
@@ -7,3 +9,6 @@ tests/lib/
 # Jet Brains
 .idea/
 cmake-build-debug/
+
+# Cache dir
+.cache
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5034e31..e177680 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,6 @@
 cmake_minimum_required(VERSION 3.9)
-project(cudaCAC LANGUAGES CUDA CXX)
+set(NAME "cudaCAC")
+project(${NAME} LANGUAGES CUDA CXX)
 
 enable_testing()
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@@ -12,7 +13,6 @@ set(CMAKE_CUDA_ARCHITECTURES 61)
 set(CUDA_SEPARABLE_COMPILATION ON)
 
 # Add Vec3  as a dependency
-add_subdirectory(tests)
 include(FetchContent)
 FetchContent_Declare(Vec3
     GIT_REPOSITORY https://www.alexselimov.com/git/aselimov/Vec3.git
@@ -30,16 +30,17 @@ include_directories(/usr/local/cuda-12.8/include)
 
 add_subdirectory(src)
 add_subdirectory(kernels)
+add_subdirectory(tests)
 
-add_executable(${CMAKE_PROJECT_NAME} main.cpp)
+add_executable(${NAME} main.cpp)
 install(DIRECTORY src/ DESTINATION src/)
 
 
 target_link_libraries(
-    ${CMAKE_PROJECT_NAME} 
+    ${NAME} 
     PRIVATE
-    ${CMAKE_PROJECT_NAME}_lib 
-    ${CMAKE_PROJECT_NAME}_cuda_lib 
+    ${NAME}_lib 
+    ${NAME}_cuda_lib 
     
     ${CUDA_LIBRARIES}
 )
diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt
index f9a1174..be62649 100644
--- a/kernels/CMakeLists.txt
+++ b/kernels/CMakeLists.txt
@@ -1,4 +1,4 @@
-project(${CMAKE_PROJECT_NAME}_cuda_lib CUDA CXX)
+project(${NAME}_cuda_lib CUDA CXX)
 
 set(HEADER_FILES
     hello_world.h
@@ -8,11 +8,9 @@ set(SOURCE_FILES
 )
 
 # The library contains header and source files.
-add_library(${CMAKE_PROJECT_NAME}_cuda_lib STATIC
+add_library(${NAME}_cuda_lib STATIC
     ${SOURCE_FILES}
     ${HEADER_FILES}
 )
 
-if(CMAKE_COMPILER_IS_GNUCXX)
-  target_compile_options(${CMAKE_PROJECT_NAME}_cuda_lib PRIVATE -Wno-gnu-line-marker)
-endif()
+target_compile_options(${CMAKE_PROJECT_NAME}_cuda_lib PRIVATE -Wno-gnu-line-marker -Wno-pedantic)
diff --git a/main.cpp b/main.cpp
index e3c8734..26990c6 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1,10 +1,10 @@
-#include "hello_world.h"
+#include "particle.hpp"
+#include "vec3.h"
 #include <iostream>
 
 int main() {
-  std::cout << "Starting CUDA example..." << std::endl; // Using endl to flush
-  check_cuda();
-  launch_hello_cuda();
-  std::cout << "Ending CUDA example" << std::endl; // Using endl to flush
+  Particle<float> test = {
+      {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, 10};
+  std::cout << test.pos.x << " " << test.pos.y << " " << test.pos.z;
   return 0;
 }
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a0cc382..b8ed52a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,4 +1,4 @@
-project(${CMAKE_PROJECT_NAME}_lib CUDA CXX)
+project(${NAME}_lib CUDA CXX)
 
 set(HEADER_FILES
     particle.hpp
@@ -11,7 +11,7 @@ set(SOURCE_FILES
 )
 
 # The library contains header and source files.
-add_library(${CMAKE_PROJECT_NAME}_lib 
+add_library(${NAME}_lib 
     ${HEADER_FILES} 
     ${SOURCE_FILES}
 )
diff --git a/src/pair_potentials.cpp b/src/pair_potentials.cpp
index fc5f6ba..19b7ef8 100644
--- a/src/pair_potentials.cpp
+++ b/src/pair_potentials.cpp
@@ -1,6 +1,7 @@
-#include "potentials.hpp"
+#include "pair_potentials.hpp"
 #include <cmath>
 
+PairPotential::~PairPotential() {};
 /**
  * Calculate the Lennard-Jones energy and force for the current particle pair
  * described by displacement vector r
@@ -12,17 +13,16 @@ ForceAndEnergy LennardJones::calc_force_and_energy(Vec3<real> r) {
 
     // Pre-Compute the terms (doing this saves on multiple devisions/pow
     // function call)
-    real sigma_r = m_sigma / inv_rmag;
-    real sigma_r5 = sigma_r * sigma_r * sigma_r * sigma_r * sigma_r;
-    real sigma_r6 = sigma_r5 * sigma_r;
-    real sigma_r11 = sigma_r5 * sigma_r5 * sigma_r;
+    real sigma_r = m_sigma * inv_rmag;
+    real sigma_r6 = sigma_r * sigma_r * sigma_r * sigma_r * sigma_r * sigma_r;
     real sigma_r12 = sigma_r6 * sigma_r6;
 
     // Get the energy
     real energy = 4.0 * m_epsilon * (sigma_r12 - sigma_r6);
 
     // Get the force vector
-    real force_mag = 4.0 * m_epsilon * (6.0 * sigma_r5 - 12.0 * sigma_r11);
+    real force_mag = 4.0 * m_epsilon *
+                     (12.0 * sigma_r12 * inv_rmag - 6.0 * sigma_r6 * inv_rmag);
     Vec3<real> force = r.scale(force_mag * inv_rmag);
 
     return {energy, force};
diff --git a/src/pair_potentials.hpp b/src/pair_potentials.hpp
index 13d98c7..04f64d4 100644
--- a/src/pair_potentials.hpp
+++ b/src/pair_potentials.hpp
@@ -24,6 +24,9 @@ struct ForceAndEnergy {
 struct PairPotential {
   real m_rcutoffsq;
 
+  PairPotential(real rcutoff) : m_rcutoffsq(rcutoff * rcutoff) {};
+  virtual ~PairPotential() = 0;
+
   /**
    * Calculate the force and energy for a specific atom pair based on a
    * displacement vector r.
@@ -35,7 +38,12 @@ struct LennardJones : PairPotential {
   real m_epsilon;
   real m_sigma;
 
+  LennardJones(real sigma, real epsilon, real rcutoff)
+      : PairPotential(rcutoff), m_epsilon(epsilon), m_sigma(sigma) {};
+
   ForceAndEnergy calc_force_and_energy(Vec3<real> r);
+
+  ~LennardJones() {};
 };
 
 #endif
diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt
index f13ed79..75a4347 100644
--- a/tests/unit_tests/CMakeLists.txt
+++ b/tests/unit_tests/CMakeLists.txt
@@ -1,9 +1,9 @@
 include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR})
 
-add_executable(Unit_Tests_run
+add_executable(${NAME}_tests
     test_potential.cpp
 )
 
-target_link_libraries(Unit_Tests_run gtest gtest_main)
-target_link_libraries(Unit_Tests_run ${CMAKE_PROJECT_NAME}_lib)
-add_test(Name Tests COMMAND Unit_Tests_run)
+target_link_libraries(${NAME}_tests gtest gtest_main)
+target_link_libraries(${NAME}_tests ${CMAKE_PROJECT_NAME}_lib)
+add_test(NAME ${NAME}Tests COMMAND ${CMAKE_BINARY_DIR}/tests/unit_tests/${NAME}_tests)
diff --git a/tests/unit_tests/test_potential.cpp b/tests/unit_tests/test_potential.cpp
index 429e903..7bc0dcb 100644
--- a/tests/unit_tests/test_potential.cpp
+++ b/tests/unit_tests/test_potential.cpp
@@ -1,4 +1,4 @@
-#include "potentials.hpp"
+#include "pair_potentials.hpp"
 #include "precision.hpp"
 #include "gtest/gtest.h"
 #include <cmath>
@@ -9,22 +9,22 @@ protected:
     // Default parameters
     sigma = 1.0;
     epsilon = 1.0;
-    rCutoff = 2.5;
+    r_cutoff = 2.5;
 
     // Create default LennardJones object
-    lj = new LennardJones(sigma, epsilon, rCutoff);
+    lj = new LennardJones(sigma, epsilon, r_cutoff);
   }
 
   void TearDown() override { delete lj; }
 
   real sigma;
   real epsilon;
-  real rCutoff;
+  real r_cutoff;
   LennardJones *lj;
 
   // Helper function to compare Vec3 values with tolerance
-  void expectVec3Near(const Vec3<real> &expected, const Vec3<real> &actual,
-                      real tolerance) {
+  void expect_vec3_near(const Vec3<real> &expected, const Vec3<real> &actual,
+                        real tolerance) {
     EXPECT_NEAR(expected.x, actual.x, tolerance);
     EXPECT_NEAR(expected.y, actual.y, tolerance);
     EXPECT_NEAR(expected.z, actual.z, tolerance);
@@ -33,36 +33,36 @@ protected:
 
 TEST_F(LennardJonesTest, ZeroDistance) {
   // At zero distance, the calculation should return zero force and energy
-  Vec3<real> r(0.0, 0.0, 0.0);
+  Vec3<real> r = {0.0, 0.0, 0.0};
   auto result = lj->calc_force_and_energy(r);
 
   EXPECT_EQ(0.0, result.energy);
-  expectVec3Near(Vec3<real>(0.0, 0.0, 0.0), result.force, 1e-10);
+  expect_vec3_near({0.0, 0.0, 0.0}, result.force, 1e-10);
 }
 
 TEST_F(LennardJonesTest, BeyondCutoff) {
   // Distance beyond cutoff should return zero force and energy
-  Vec3<real> r(3.0, 0.0, 0.0); // 3.0 > rCutoff (2.5)
+  Vec3<real> r = {3.0, 0.0, 0.0}; // 3.0 > r_cutoff (2.5)
   auto result = lj->calc_force_and_energy(r);
 
   EXPECT_EQ(0.0, result.energy);
-  expectVec3Near(Vec3<real>(0.0, 0.0, 0.0), result.force, 1e-10);
+  expect_vec3_near({0.0, 0.0, 0.0}, result.force, 1e-10);
 }
 
 TEST_F(LennardJonesTest, AtMinimum) {
   // The LJ potential has a minimum at r = 2^(1/6) * sigma
   real min_dist = std::pow(2.0, 1.0 / 6.0) * sigma;
-  Vec3<real> r(min_dist, 0.0, 0.0);
+  Vec3<real> r = {min_dist, 0.0, 0.0};
   auto result = lj->calc_force_and_energy(r);
 
   // At minimum, force should be close to zero
   EXPECT_NEAR(-epsilon, result.energy, 1e-10);
-  expectVec3Near(Vec3<real>(0.0, 0.0, 0.0), result.force, 1e-10);
+  expect_vec3_near({0.0, 0.0, 0.0}, result.force, 1e-10);
 }
 
 TEST_F(LennardJonesTest, AtEquilibrium) {
   // At r = sigma, the energy should be zero and force should be repulsive
-  Vec3<real> r(sigma, 0.0, 0.0);
+  Vec3<real> r = {sigma, 0.0, 0.0};
   auto result = lj->calc_force_and_energy(r);
 
   EXPECT_NEAR(0.0, result.energy, 1e-10);
@@ -74,7 +74,7 @@ TEST_F(LennardJonesTest, AtEquilibrium) {
 
 TEST_F(LennardJonesTest, RepulsiveRegion) {
   // Test in the repulsive region (r < sigma)
-  Vec3<real> r(0.8 * sigma, 0.0, 0.0);
+  Vec3<real> r = {0.8 * sigma, 0.0, 0.0};
   auto result = lj->calc_force_and_energy(r);
 
   // Energy should be positive and force should be repulsive
@@ -84,7 +84,7 @@ TEST_F(LennardJonesTest, RepulsiveRegion) {
 
 TEST_F(LennardJonesTest, AttractiveRegion) {
   // Test in the attractive region (sigma < r < r_min)
-  Vec3<real> r(1.5 * sigma, 0.0, 0.0);
+  Vec3<real> r = {1.5 * sigma, 0.0, 0.0};
   auto result = lj->calc_force_and_energy(r);
 
   // Energy should be negative and force should be attractive
@@ -95,15 +95,15 @@ TEST_F(LennardJonesTest, AttractiveRegion) {
 
 TEST_F(LennardJonesTest, ArbitraryDirection) {
   // Test with a vector in an arbitrary direction
-  Vec3<real> r(1.0, 1.0, 1.0);
+  Vec3<real> r = {1.0, 1.0, 1.0};
   auto result = lj->calc_force_and_energy(r);
 
   // The force should be in the same direction as r but opposite sign
   // (attractive region)
-  real rmag = std::sqrt(r.squared_norm2());
+  real r_mag = std::sqrt(r.squared_norm2());
 
   // Calculate expected force direction (should be along -r)
-  Vec3<real> normalized_r = r.scale(1.0 / rmag);
+  Vec3<real> normalized_r = r.scale(1.0 / r_mag);
   real force_dot_r = result.force.x * normalized_r.x +
                      result.force.y * normalized_r.y +
                      result.force.z * normalized_r.z;
@@ -120,11 +120,11 @@ TEST_F(LennardJonesTest, ParameterVariation) {
   // Test with different parameter values
   real new_sigma = 2.0;
   real new_epsilon = 0.5;
-  real new_rCutoff = 5.0;
+  real new_r_cutoff = 5.0;
 
-  LennardJones lj2(new_sigma, new_epsilon, new_rCutoff);
+  LennardJones lj2(new_sigma, new_epsilon, new_r_cutoff);
 
-  Vec3<real> r(2.0, 0.0, 0.0);
+  Vec3<real> r = {2.0, 0.0, 0.0};
   auto result1 = lj->calc_force_and_energy(r);
   auto result2 = lj2.calc_force_and_energy(r);
 
@@ -136,7 +136,7 @@ TEST_F(LennardJonesTest, ParameterVariation) {
 TEST_F(LennardJonesTest, ExactValueCheck) {
   // Test with pre-calculated values for a specific case
   LennardJones lj_exact(1.0, 1.0, 3.0);
-  Vec3<real> r(1.5, 0.0, 0.0);
+  Vec3<real> r = {1.5, 0.0, 0.0};
   auto result = lj_exact.calc_force_and_energy(r);
 
   // Pre-calculated values (you may need to adjust these based on your specific
@@ -155,11 +155,11 @@ TEST_F(LennardJonesTest, ExactValueCheck) {
 
 TEST_F(LennardJonesTest, NearCutoff) {
   // Test behavior just inside and just outside the cutoff
-  real inside_cutoff = rCutoff - 0.01;
-  real outside_cutoff = rCutoff + 0.01;
+  real inside_cutoff = r_cutoff - 0.01;
+  real outside_cutoff = r_cutoff + 0.01;
 
-  Vec3<real> r_inside(inside_cutoff, 0.0, 0.0);
-  Vec3<real> r_outside(outside_cutoff, 0.0, 0.0);
+  Vec3<real> r_inside = {inside_cutoff, 0.0, 0.0};
+  Vec3<real> r_outside = {outside_cutoff, 0.0, 0.0};
 
   auto result_inside = lj->calc_force_and_energy(r_inside);
   auto result_outside = lj->calc_force_and_energy(r_outside);
@@ -170,5 +170,5 @@ TEST_F(LennardJonesTest, NearCutoff) {
 
   // Outside should be zero
   EXPECT_EQ(0.0, result_outside.energy);
-  expectVec3Near(Vec3<real>(0.0, 0.0, 0.0), result_outside.force, 1e-10);
+  expect_vec3_near({0.0, 0.0, 0.0}, result_outside.force, 1e-10);
 }

From 5155ec21aa40361dbad2766c40ef984f004ef909 Mon Sep 17 00:00:00 2001
From: Alex Selimov <alex@alexselimov.com>
Date: Thu, 17 Apr 2025 16:07:26 -0400
Subject: [PATCH 07/10] Add basic LJ potential*

- Add PairPotential Abstract class
- Add Lennard-Jones potential that should work with both CUDA and C++
  code
- Add tests on HOST side for LJ potential
---
 .gitignore                          |  2 +
 CMakeLists.txt                      | 22 ++++---
 kernels/CMakeLists.txt              |  6 +-
 kernels/hello_world.cu              | 46 ---------------
 kernels/hello_world.h               | 10 ----
 kernels/pair_potentials.cuh         | 91 +++++++++++++++++++++++++++++
 src/CMakeLists.txt                  |  5 +-
 src/pair_potentials.cpp             | 33 -----------
 src/pair_potentials.hpp             | 49 ----------------
 tests/unit_tests/CMakeLists.txt     |  2 +-
 tests/unit_tests/test_potential.cpp |  2 +-
 11 files changed, 114 insertions(+), 154 deletions(-)
 delete mode 100644 kernels/hello_world.cu
 delete mode 100644 kernels/hello_world.h
 create mode 100644 kernels/pair_potentials.cuh
 delete mode 100644 src/pair_potentials.cpp
 delete mode 100644 src/pair_potentials.hpp

diff --git a/.gitignore b/.gitignore
index de19d1d..7d0b8c3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 build/
 Debug/
 Testing/
+compile_commands.json
 
 # Google Tests
 tests/lib/
@@ -12,3 +13,4 @@ cmake-build-debug/
 
 # Cache dir
 .cache
+
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e177680..fb27a81 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,9 +8,19 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 # Default settings 
 add_compile_options(-Wall -Wextra -Wpedantic)
 
+add_compile_options($<$<COMPILE_LANGUAGE:CUDA>:-Wno-pedantic>)
+
+# Add pedantic just for 
+
 set(CMAKE_CXX_STANDARD 17)
+
+# Cuda Settings
 set(CMAKE_CUDA_ARCHITECTURES 61)
 set(CUDA_SEPARABLE_COMPILATION ON)
+# Cuda settings to get correct compile_commands.json
+set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_INCLUDES 0)
+set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_LIBRARIES 0)
+set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_OBJECTS 0)
 
 # Add Vec3  as a dependency
 include(FetchContent)
@@ -24,9 +34,9 @@ if(NOT Vec3_POPULATED)
     include_directories(${Vec3_SOURCE_DIR})
 endif()
 
+include_directories(/usr/local/cuda-12.8/include)
 include_directories(src)
 include_directories(kernels)
-include_directories(/usr/local/cuda-12.8/include)
 
 add_subdirectory(src)
 add_subdirectory(kernels)
@@ -35,21 +45,19 @@ add_subdirectory(tests)
 add_executable(${NAME} main.cpp)
 install(DIRECTORY src/ DESTINATION src/)
 
-
 target_link_libraries(
     ${NAME} 
     PRIVATE
     ${NAME}_lib 
     ${NAME}_cuda_lib 
-    
     ${CUDA_LIBRARIES}
 )
 
 # Doxygen Build
-option(BUILD_DOC "Build Documentation" ON)
+option(BUILD_DOC "Build Documentation" OFF)
 
 find_package(Doxygen)
-if(DOXYGEN_FOUND)
+if(DOXYGEN_FOUND AND BUILD_DOC)
     set(BUILD_DOC_DIR ${CMAKE_SOURCE_DIR}/build/docs)
     if(NOT EXISTS ${BUILD_DOC_DIR})
         file(MAKE_DIRECTORY ${BUILD_DOC_DIR})
@@ -65,6 +73,6 @@ if(DOXYGEN_FOUND)
             WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
             COMMENT "Generating API documentation with Doxygen"
             VERBATIM)
-else(DOXYGEN_FOUND)
+else(DOXYGEN_FOUND AND BUILD_DOC)
     message("Doxygen needs to be installed to generate the documentation.")
-endif(DOXYGEN_FOUND)
+endif(DOXYGEN_FOUND AND BUILD_DOC)
diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt
index be62649..baa8a60 100644
--- a/kernels/CMakeLists.txt
+++ b/kernels/CMakeLists.txt
@@ -1,16 +1,14 @@
 project(${NAME}_cuda_lib CUDA CXX)
 
 set(HEADER_FILES
-    hello_world.h
+    pair_potentials.cuh
 )
 set(SOURCE_FILES
-    hello_world.cu
 )
 
 # The library contains header and source files.
-add_library(${NAME}_cuda_lib STATIC
+add_library(${NAME}_cuda_lib INTERFACE
     ${SOURCE_FILES}
     ${HEADER_FILES}
 )
 
-target_compile_options(${CMAKE_PROJECT_NAME}_cuda_lib PRIVATE -Wno-gnu-line-marker -Wno-pedantic)
diff --git a/kernels/hello_world.cu b/kernels/hello_world.cu
deleted file mode 100644
index 7c65115..0000000
--- a/kernels/hello_world.cu
+++ /dev/null
@@ -1,46 +0,0 @@
-#include <cuda_runtime.h>
-#include <stdio.h>
-
-__global__ void hello_cuda() {
-  printf("Hello CUDA from thread %d\n", threadIdx.x);
-}
-
-extern "C" void launch_hello_cuda() {
-  // First check device properties
-  cudaDeviceProp prop;
-  cudaGetDeviceProperties(&prop, 1);
-  printf("Using device: %s with compute capability %d.%d\n", prop.name,
-         prop.major, prop.minor);
-
-  hello_cuda<<<1, 10>>>();
-  cudaDeviceSynchronize();
-  fflush(stdout);
-}
-
-extern "C" void check_cuda() {
-  int deviceCount = 0;
-  cudaError_t error = cudaGetDeviceCount(&deviceCount);
-
-  if (error != cudaSuccess) {
-    printf("CUDA error: %s\n", cudaGetErrorString(error));
-  }
-
-  printf("Found %d CUDA devices\n", deviceCount);
-
-  for (int i = 0; i < deviceCount; i++) {
-    cudaDeviceProp prop;
-    cudaGetDeviceProperties(&prop, i);
-
-    printf("Device %d: %s\n", i, prop.name);
-    printf("  Compute capability: %d.%d\n", prop.major, prop.minor);
-    printf("  Total global memory: %.2f GB\n",
-           static_cast<float>(prop.totalGlobalMem) / (1024 * 1024 * 1024));
-    printf("  Multiprocessors: %d\n", prop.multiProcessorCount);
-    printf("  Max threads per block: %d\n", prop.maxThreadsPerBlock);
-    printf("  Max threads dimensions: (%d, %d, %d)\n", prop.maxThreadsDim[0],
-           prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
-    printf("  Max grid dimensions: (%d, %d, %d)\n", prop.maxGridSize[0],
-           prop.maxGridSize[1], prop.maxGridSize[2]);
-    printf("\n");
-  }
-}
diff --git a/kernels/hello_world.h b/kernels/hello_world.h
deleted file mode 100644
index 4024e2e..0000000
--- a/kernels/hello_world.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef HELLO_WORLD_CU_H
-#define HELLO_WORLD_CU_H
-
-extern "C" {
-// Declaration of the CUDA function that will be called from C++
-void launch_hello_cuda();
-void check_cuda();
-}
-
-#endif // HELLO_WORLD_CU_H
diff --git a/kernels/pair_potentials.cuh b/kernels/pair_potentials.cuh
new file mode 100644
index 0000000..ab45648
--- /dev/null
+++ b/kernels/pair_potentials.cuh
@@ -0,0 +1,91 @@
+#ifndef POTENTIALS_H
+#define POTENTIALS_H
+
+#include "precision.hpp"
+#include "vec3.h"
+
+#ifdef __CUDACC__
+#define CUDA_CALLABLE __host__ __device__
+#else
+#define CUDA_CALLABLE
+#endif
+
+/**
+ * Result struct for the Pair Potential
+ */
+struct ForceAndEnergy {
+  real energy;
+  Vec3<real> force;
+
+  CUDA_CALLABLE inline static ForceAndEnergy zero() {
+    return {0.0, {0.0, 0.0, 0.0}};
+  };
+};
+
+/**
+ * Abstract implementation of a Pair Potential.
+ * Pair potentials are potentials which depend solely on the distance
+ * between two particles. These do not include multi-body potentials such as
+ * EAM
+ *
+ */
+struct PairPotential {
+  real m_rcutoffsq;
+
+  PairPotential(real rcutoff) : m_rcutoffsq(rcutoff * rcutoff) {};
+#ifdef __CUDACC__
+  CUDA_CALLABLE ~PairPotential();
+#else
+  virtual ~PairPotential() = 0;
+#endif
+
+  /**
+   * Calculate the force and energy for a specific atom pair based on a
+   * displacement vector r.
+   */
+  CUDA_CALLABLE virtual ForceAndEnergy calc_force_and_energy(Vec3<real> r) = 0;
+};
+
+/**
+ * Calculate the Lennard-Jones energy and force for the current particle pair
+ * described by displacement vector r
+ */
+struct LennardJones : PairPotential {
+  real m_epsilon;
+  real m_sigma;
+
+  CUDA_CALLABLE LennardJones(real sigma, real epsilon, real rcutoff)
+      : PairPotential(rcutoff), m_epsilon(epsilon), m_sigma(sigma) {};
+
+  CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3<real> r) {
+    real rmagsq = r.squared_norm2();
+    if (rmagsq < this->m_rcutoffsq && rmagsq > 0.0) {
+      real inv_rmag = 1 / std::sqrt(rmagsq);
+
+      // Pre-Compute the terms (doing this saves on multiple devisions/pow
+      // function call)
+      real sigma_r = m_sigma * inv_rmag;
+      real sigma_r6 = sigma_r * sigma_r * sigma_r * sigma_r * sigma_r * sigma_r;
+      real sigma_r12 = sigma_r6 * sigma_r6;
+
+      // Get the energy
+      real energy = 4.0 * m_epsilon * (sigma_r12 - sigma_r6);
+
+      // Get the force vector
+      real force_mag =
+          4.0 * m_epsilon *
+          (12.0 * sigma_r12 * inv_rmag - 6.0 * sigma_r6 * inv_rmag);
+      Vec3<real> force = r.scale(force_mag * inv_rmag);
+
+      return {energy, force};
+
+    } else {
+      return ForceAndEnergy::zero();
+    }
+  };
+
+  ~LennardJones() {};
+};
+
+PairPotential::~PairPotential() {};
+#endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index b8ed52a..48c2307 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -4,14 +4,13 @@ set(HEADER_FILES
     particle.hpp
     simulation.hpp
     box.hpp
-    pair_potentials.hpp
+
 )
 set(SOURCE_FILES
-    pair_potentials.cpp
 )
 
 # The library contains header and source files.
-add_library(${NAME}_lib 
+add_library(${NAME}_lib INTERFACE
     ${HEADER_FILES} 
     ${SOURCE_FILES}
 )
diff --git a/src/pair_potentials.cpp b/src/pair_potentials.cpp
deleted file mode 100644
index 19b7ef8..0000000
--- a/src/pair_potentials.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "pair_potentials.hpp"
-#include <cmath>
-
-PairPotential::~PairPotential() {};
-/**
- * Calculate the Lennard-Jones energy and force for the current particle pair
- * described by displacement vector r
- */
-ForceAndEnergy LennardJones::calc_force_and_energy(Vec3<real> r) {
-  real rmagsq = r.squared_norm2();
-  if (rmagsq < this->m_rcutoffsq && rmagsq > 0.0) {
-    real inv_rmag = 1 / std::sqrt(rmagsq);
-
-    // Pre-Compute the terms (doing this saves on multiple devisions/pow
-    // function call)
-    real sigma_r = m_sigma * inv_rmag;
-    real sigma_r6 = sigma_r * sigma_r * sigma_r * sigma_r * sigma_r * sigma_r;
-    real sigma_r12 = sigma_r6 * sigma_r6;
-
-    // Get the energy
-    real energy = 4.0 * m_epsilon * (sigma_r12 - sigma_r6);
-
-    // Get the force vector
-    real force_mag = 4.0 * m_epsilon *
-                     (12.0 * sigma_r12 * inv_rmag - 6.0 * sigma_r6 * inv_rmag);
-    Vec3<real> force = r.scale(force_mag * inv_rmag);
-
-    return {energy, force};
-
-  } else {
-    return ForceAndEnergy::zero();
-  }
-};
diff --git a/src/pair_potentials.hpp b/src/pair_potentials.hpp
deleted file mode 100644
index 04f64d4..0000000
--- a/src/pair_potentials.hpp
+++ /dev/null
@@ -1,49 +0,0 @@
-#ifndef POTENTIALS_H
-#define POTENTIALS_H
-
-#include "precision.hpp"
-#include "vec3.h"
-
-/**
- * Result struct for the Pair Potential
- */
-struct ForceAndEnergy {
-  real energy;
-  Vec3<real> force;
-
-  inline static ForceAndEnergy zero() { return {0.0, {0.0, 0.0, 0.0}}; };
-};
-
-/**
- * Abstract implementation of a Pair Potential.
- * Pair potentials are potentials which depend solely on the distance
- * between two particles. These do not include multi-body potentials such as
- * EAM
- *
- */
-struct PairPotential {
-  real m_rcutoffsq;
-
-  PairPotential(real rcutoff) : m_rcutoffsq(rcutoff * rcutoff) {};
-  virtual ~PairPotential() = 0;
-
-  /**
-   * Calculate the force and energy for a specific atom pair based on a
-   * displacement vector r.
-   */
-  virtual ForceAndEnergy calc_force_and_energy(Vec3<real> r) = 0;
-};
-
-struct LennardJones : PairPotential {
-  real m_epsilon;
-  real m_sigma;
-
-  LennardJones(real sigma, real epsilon, real rcutoff)
-      : PairPotential(rcutoff), m_epsilon(epsilon), m_sigma(sigma) {};
-
-  ForceAndEnergy calc_force_and_energy(Vec3<real> r);
-
-  ~LennardJones() {};
-};
-
-#endif
diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt
index 75a4347..c396ab7 100644
--- a/tests/unit_tests/CMakeLists.txt
+++ b/tests/unit_tests/CMakeLists.txt
@@ -5,5 +5,5 @@ add_executable(${NAME}_tests
 )
 
 target_link_libraries(${NAME}_tests gtest gtest_main)
-target_link_libraries(${NAME}_tests ${CMAKE_PROJECT_NAME}_lib)
+target_link_libraries(${NAME}_tests ${CMAKE_PROJECT_NAME}_cuda_lib)
 add_test(NAME ${NAME}Tests COMMAND ${CMAKE_BINARY_DIR}/tests/unit_tests/${NAME}_tests)
diff --git a/tests/unit_tests/test_potential.cpp b/tests/unit_tests/test_potential.cpp
index 7bc0dcb..a94f022 100644
--- a/tests/unit_tests/test_potential.cpp
+++ b/tests/unit_tests/test_potential.cpp
@@ -1,4 +1,4 @@
-#include "pair_potentials.hpp"
+#include "pair_potentials.cuh"
 #include "precision.hpp"
 #include "gtest/gtest.h"
 #include <cmath>

From 4269333aa2fd2be56a2216763a7710a9d936cdac Mon Sep 17 00:00:00 2001
From: Alex Selimov <alex@alexselimov.com>
Date: Thu, 17 Apr 2025 16:21:59 -0400
Subject: [PATCH 08/10] Fix bug with CUDA impl and add CUDA tests

---
 kernels/pair_potentials.cuh             |   2 +-
 tests/CMakeLists.txt                    |   3 +-
 tests/cuda_unit_tests/CMakeLists.txt    |   9 +
 tests/cuda_unit_tests/test_potential.cu | 316 ++++++++++++++++++++++++
 4 files changed, 328 insertions(+), 2 deletions(-)
 create mode 100644 tests/cuda_unit_tests/CMakeLists.txt
 create mode 100644 tests/cuda_unit_tests/test_potential.cu

diff --git a/kernels/pair_potentials.cuh b/kernels/pair_potentials.cuh
index ab45648..052a079 100644
--- a/kernels/pair_potentials.cuh
+++ b/kernels/pair_potentials.cuh
@@ -84,7 +84,7 @@ struct LennardJones : PairPotential {
     }
   };
 
-  ~LennardJones() {};
+  CUDA_CALLABLE ~LennardJones(){};
 };
 
 PairPotential::~PairPotential() {};
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 85a8157..7f994a6 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -10,4 +10,5 @@ if(NOT EXISTS ${GOOGLETEST_DIR})
 endif()
 
 add_subdirectory(lib/googletest)
-add_subdirectory(unit_tests)
\ No newline at end of file
+add_subdirectory(unit_tests)
+add_subdirectory(cuda_unit_tests)
diff --git a/tests/cuda_unit_tests/CMakeLists.txt b/tests/cuda_unit_tests/CMakeLists.txt
new file mode 100644
index 0000000..27490a0
--- /dev/null
+++ b/tests/cuda_unit_tests/CMakeLists.txt
@@ -0,0 +1,9 @@
+include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR})
+
+add_executable(${NAME}_cuda_tests
+    test_potential.cu
+)
+
+target_link_libraries(${NAME}_cuda_tests gtest gtest_main)
+target_link_libraries(${NAME}_cuda_tests ${CMAKE_PROJECT_NAME}_cuda_lib)
+add_test(NAME ${NAME}CudaTests COMMAND ${CMAKE_BINARY_DIR}/tests/unit_tests/${NAME}_tests)
diff --git a/tests/cuda_unit_tests/test_potential.cu b/tests/cuda_unit_tests/test_potential.cu
new file mode 100644
index 0000000..cab3216
--- /dev/null
+++ b/tests/cuda_unit_tests/test_potential.cu
@@ -0,0 +1,316 @@
+#include "pair_potentials.cuh"
+#include "precision.hpp"
+#include "gtest/gtest.h"
+#include <cmath>
+#include <cuda_runtime.h>
+
+// Structure to hold test results from device
+struct TestResults {
+  bool zero_distance_pass;
+  bool beyond_cutoff_pass;
+  bool at_minimum_pass;
+  bool at_equilibrium_pass;
+  bool repulsive_region_pass;
+  bool attractive_region_pass;
+  bool arbitrary_direction_pass;
+  bool parameter_variation_pass;
+  bool exact_value_check_pass;
+  bool near_cutoff_pass;
+
+  // Additional result data for exact checks
+  real energy_values[10];
+  Vec3<real> force_values[10];
+};
+
+// Check if two Vec3 values are close within tolerance
+__device__ bool vec3_near(const Vec3<real> &a, const Vec3<real> &b,
+                          real tolerance) {
+  return (fabs(a.x - b.x) < tolerance) && (fabs(a.y - b.y) < tolerance) &&
+         (fabs(a.z - b.z) < tolerance);
+}
+
+// Device kernel to run all tests
+__global__ void lennard_jones_test_kernel(TestResults *results) {
+  // Default parameters
+  real sigma = 1.0;
+  real epsilon = 1.0;
+  real r_cutoff = 2.5;
+  real tolerance = 1e-10;
+
+  // Create LennardJones object on device
+  LennardJones lj(sigma, epsilon, r_cutoff);
+
+  // Zero Distance Test
+  {
+    Vec3<real> r = {0.0, 0.0, 0.0};
+    auto result = lj.calc_force_and_energy(r);
+    results->energy_values[0] = result.energy;
+    results->force_values[0] = result.force;
+    results->zero_distance_pass =
+        (result.energy == 0.0) &&
+        vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result.force, tolerance);
+  }
+
+  // Beyond Cutoff Test
+  {
+    Vec3<real> r = {3.0, 0.0, 0.0};
+    auto result = lj.calc_force_and_energy(r);
+    results->energy_values[1] = result.energy;
+    results->force_values[1] = result.force;
+    results->beyond_cutoff_pass =
+        (result.energy == 0.0) &&
+        vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result.force, tolerance);
+  }
+
+  // At Minimum Test
+  {
+    real min_dist = pow(2.0, 1.0 / 6.0) * sigma;
+    Vec3<real> r = {min_dist, 0.0, 0.0};
+    auto result = lj.calc_force_and_energy(r);
+    results->energy_values[2] = result.energy;
+    results->force_values[2] = result.force;
+    results->at_minimum_pass =
+        (fabs(result.energy + epsilon) < tolerance) &&
+        vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result.force, tolerance);
+  }
+
+  // At Equilibrium Test
+  {
+    Vec3<real> r = {sigma, 0.0, 0.0};
+    auto result = lj.calc_force_and_energy(r);
+    results->energy_values[3] = result.energy;
+    results->force_values[3] = result.force;
+    results->at_equilibrium_pass = (fabs(result.energy) < tolerance) &&
+                                   (result.force.x > 0.0) &&
+                                   (fabs(result.force.y) < tolerance) &&
+                                   (fabs(result.force.z) < tolerance);
+  }
+
+  // Repulsive Region Test
+  {
+    Vec3<real> r = {0.8 * sigma, 0.0, 0.0};
+    auto result = lj.calc_force_and_energy(r);
+    results->energy_values[4] = result.energy;
+    results->force_values[4] = result.force;
+    results->repulsive_region_pass =
+        (result.energy > 0.0) && (result.force.x > 0.0);
+  }
+
+  // Attractive Region Test
+  {
+    Vec3<real> r = {1.5 * sigma, 0.0, 0.0};
+    auto result = lj.calc_force_and_energy(r);
+    results->energy_values[5] = result.energy;
+    results->force_values[5] = result.force;
+    results->attractive_region_pass =
+        (result.energy < 0.0) && (result.force.x < 0.0);
+  }
+
+  // Arbitrary Direction Test
+  {
+    Vec3<real> r = {1.0, 1.0, 1.0};
+    auto result = lj.calc_force_and_energy(r);
+    results->energy_values[6] = result.energy;
+    results->force_values[6] = result.force;
+
+    real r_mag = sqrt(r.squared_norm2());
+    Vec3<real> normalized_r = r.scale(1.0 / r_mag);
+    real force_dot_r = result.force.x * normalized_r.x +
+                       result.force.y * normalized_r.y +
+                       result.force.z * normalized_r.z;
+
+    results->arbitrary_direction_pass =
+        (force_dot_r < 0.0) &&
+        (fabs(result.force.x - result.force.y) < tolerance) &&
+        (fabs(result.force.y - result.force.z) < tolerance);
+  }
+
+  // Parameter Variation Test
+  {
+    real new_sigma = 2.0;
+    real new_epsilon = 0.5;
+    real new_r_cutoff = 5.0;
+
+    LennardJones lj2(new_sigma, new_epsilon, new_r_cutoff);
+
+    Vec3<real> r = {2.0, 0.0, 0.0};
+    auto result1 = lj.calc_force_and_energy(r);
+    auto result2 = lj2.calc_force_and_energy(r);
+
+    results->energy_values[7] = result2.energy;
+    results->force_values[7] = result2.force;
+
+    results->parameter_variation_pass = (result1.energy != result2.energy) &&
+                                        (result1.force.x != result2.force.x);
+  }
+
+  // Exact Value Check Test
+  {
+    LennardJones lj_exact(1.0, 1.0, 3.0);
+    Vec3<real> r = {1.5, 0.0, 0.0};
+    auto result = lj_exact.calc_force_and_energy(r);
+
+    results->energy_values[8] = result.energy;
+    results->force_values[8] = result.force;
+
+    real expected_energy = 4.0 * (pow(1.0 / 1.5, 12) - pow(1.0 / 1.5, 6));
+    real expected_force =
+        24.0 * (pow(1.0 / 1.5, 6) - 2.0 * pow(1.0 / 1.5, 12)) / 1.5;
+
+    results->exact_value_check_pass =
+        (fabs(result.energy - expected_energy) < tolerance) &&
+        (fabs(result.force.x + expected_force) < tolerance) &&
+        (fabs(result.force.y) < tolerance) &&
+        (fabs(result.force.z) < tolerance);
+  }
+
+  // Near Cutoff Test
+  {
+    real inside_cutoff = r_cutoff - 0.01;
+    real outside_cutoff = r_cutoff + 0.01;
+
+    Vec3<real> r_inside = {inside_cutoff, 0.0, 0.0};
+    Vec3<real> r_outside = {outside_cutoff, 0.0, 0.0};
+
+    auto result_inside = lj.calc_force_and_energy(r_inside);
+    auto result_outside = lj.calc_force_and_energy(r_outside);
+
+    results->energy_values[9] = result_inside.energy;
+    results->force_values[9] = result_inside.force;
+
+    results->near_cutoff_pass =
+        (result_inside.energy != 0.0) && (result_inside.force.x != 0.0) &&
+        (result_outside.energy == 0.0) &&
+        vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result_outside.force, tolerance);
+  }
+}
+
+// Helper class for CUDA error checking
+class CudaErrorCheck {
+public:
+  static void checkAndThrow(cudaError_t err, const char *msg) {
+    if (err != cudaSuccess) {
+      std::string error_message =
+          std::string(msg) + ": " + cudaGetErrorString(err);
+      throw std::runtime_error(error_message);
+    }
+  }
+};
+
+// Google Test wrapper that runs the device tests
+class LennardJonesCudaTest : public ::testing::Test {
+protected:
+  void SetUp() override {
+    // Allocate device memory for results
+    CudaErrorCheck::checkAndThrow(
+        cudaMalloc(&d_results, sizeof(TestResults)),
+        "Failed to allocate device memory for test results");
+  }
+
+  void TearDown() override {
+    if (d_results) {
+      cudaFree(d_results);
+      d_results = nullptr;
+    }
+  }
+
+  // Helper function to run tests on device and get results
+  TestResults runDeviceTests() {
+    TestResults h_results;
+
+    // Clear device memory
+    CudaErrorCheck::checkAndThrow(cudaMemset(d_results, 0, sizeof(TestResults)),
+                                  "Failed to clear device memory");
+
+    // Run kernel with a single thread
+    lennard_jones_test_kernel<<<1, 1>>>(d_results);
+
+    // Check for kernel launch errors
+    CudaErrorCheck::checkAndThrow(cudaGetLastError(), "Kernel launch failed");
+
+    // Wait for kernel to complete
+    CudaErrorCheck::checkAndThrow(cudaDeviceSynchronize(),
+                                  "Kernel execution failed");
+
+    // Copy results back to host
+    CudaErrorCheck::checkAndThrow(cudaMemcpy(&h_results, d_results,
+                                             sizeof(TestResults),
+                                             cudaMemcpyDeviceToHost),
+                                  "Failed to copy results from device");
+
+    return h_results;
+  }
+
+  TestResults *d_results = nullptr;
+};
+
+// Define the actual test cases
+TEST_F(LennardJonesCudaTest, DeviceZeroDistance) {
+  auto results = runDeviceTests();
+  EXPECT_TRUE(results.zero_distance_pass)
+      << "Zero distance test failed on device. Energy: "
+      << results.energy_values[0] << ", Force: (" << results.force_values[0].x
+      << ", " << results.force_values[0].y << ", " << results.force_values[0].z
+      << ")";
+}
+
+TEST_F(LennardJonesCudaTest, DeviceBeyondCutoff) {
+  auto results = runDeviceTests();
+  EXPECT_TRUE(results.beyond_cutoff_pass)
+      << "Beyond cutoff test failed on device. Energy: "
+      << results.energy_values[1];
+}
+
+TEST_F(LennardJonesCudaTest, DeviceAtMinimum) {
+  auto results = runDeviceTests();
+  EXPECT_TRUE(results.at_minimum_pass)
+      << "At minimum test failed on device. Energy: "
+      << results.energy_values[2];
+}
+
+TEST_F(LennardJonesCudaTest, DeviceAtEquilibrium) {
+  auto results = runDeviceTests();
+  EXPECT_TRUE(results.at_equilibrium_pass)
+      << "At equilibrium test failed on device. Energy: "
+      << results.energy_values[3] << ", Force x: " << results.force_values[3].x;
+}
+
+TEST_F(LennardJonesCudaTest, DeviceRepulsiveRegion) {
+  auto results = runDeviceTests();
+  EXPECT_TRUE(results.repulsive_region_pass)
+      << "Repulsive region test failed on device. Energy: "
+      << results.energy_values[4] << ", Force x: " << results.force_values[4].x;
+}
+
+TEST_F(LennardJonesCudaTest, DeviceAttractiveRegion) {
+  auto results = runDeviceTests();
+  EXPECT_TRUE(results.attractive_region_pass)
+      << "Attractive region test failed on device. Energy: "
+      << results.energy_values[5] << ", Force x: " << results.force_values[5].x;
+}
+
+TEST_F(LennardJonesCudaTest, DeviceArbitraryDirection) {
+  auto results = runDeviceTests();
+  EXPECT_TRUE(results.arbitrary_direction_pass)
+      << "Arbitrary direction test failed on device.";
+}
+
+TEST_F(LennardJonesCudaTest, DeviceParameterVariation) {
+  auto results = runDeviceTests();
+  EXPECT_TRUE(results.parameter_variation_pass)
+      << "Parameter variation test failed on device.";
+}
+
+TEST_F(LennardJonesCudaTest, DeviceExactValueCheck) {
+  auto results = runDeviceTests();
+  EXPECT_TRUE(results.exact_value_check_pass)
+      << "Exact value check test failed on device. Energy: "
+      << results.energy_values[8] << ", Force x: " << results.force_values[8].x;
+}
+
+TEST_F(LennardJonesCudaTest, DeviceNearCutoff) {
+  auto results = runDeviceTests();
+  EXPECT_TRUE(results.near_cutoff_pass)
+      << "Near cutoff test failed on device. Inside energy: "
+      << results.energy_values[9];
+}

From 62e52940bce6df27a86fa85e369370828ecbfc16 Mon Sep 17 00:00:00 2001
From: Alex Selimov <alex@alexselimov.com>
Date: Sun, 27 Apr 2025 14:33:46 -0400
Subject: [PATCH 09/10] Update all code to use real type

---
 src/box.hpp        | 15 ++++++++-------
 src/particle.hpp   | 11 ++++++-----
 src/simulation.hpp |  9 +++++----
 3 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/src/box.hpp b/src/box.hpp
index 816b53e..b588c49 100644
--- a/src/box.hpp
+++ b/src/box.hpp
@@ -1,19 +1,20 @@
 #ifndef BOX_H
 #define BOX_H
 
+#include "precision.hpp"
 /**
  * Struct representing the simulation box.
  * Currently the simulation box is always assumed to be perfectly rectangular.
  * This code does not support shearing the box. This functionality may be added
  * in later.
  */
-template <typename T> struct Box {
-  T xlo;
-  T xhi;
-  T ylo;
-  T yhi;
-  T zlo;
-  T zhi;
+struct Box {
+  real xlo;
+  real xhi;
+  real ylo;
+  real yhi;
+  real zlo;
+  real zhi;
   bool x_is_periodic;
   bool y_is_periodic;
   bool z_is_periodic;
diff --git a/src/particle.hpp b/src/particle.hpp
index 84fd9b8..b9e3464 100644
--- a/src/particle.hpp
+++ b/src/particle.hpp
@@ -1,6 +1,7 @@
 #ifndef PARTICLE_H
 #define PARTICLE_H
 
+#include "precision.hpp"
 #include "vec3.h"
 
 /**
@@ -8,11 +9,11 @@
  * This class is only used on the host side of the code and is converted
  * to the device arrays.
  */
-template <typename T = float> struct Particle {
-  Vec3<T> pos;
-  Vec3<T> vel;
-  Vec3<T> force;
-  T mass;
+struct Particle {
+  Vec3<real> pos;
+  Vec3<real> vel;
+  Vec3<real> force;
+  real mass;
 };
 
 #endif
diff --git a/src/simulation.hpp b/src/simulation.hpp
index 5b468b9..0d69d34 100644
--- a/src/simulation.hpp
+++ b/src/simulation.hpp
@@ -3,15 +3,16 @@
 
 #include "box.hpp"
 #include "particle.hpp"
+#include "precision.hpp"
 #include <vector>
 
-template <typename T> class Simulation {
+class Simulation {
   // Simulation State variables
-  T timestep;
-  Box<T> box;
+  real timestep;
+  Box box;
 
   // Host Data
-  std::vector<Particle<T>> particles;
+  std::vector<Particle> particles;
 };
 
 #endif

From 746face82943a833a1d5274d53173d2edb0de97c Mon Sep 17 00:00:00 2001
From: Alex Selimov <alex@alexselimov.com>
Date: Mon, 14 Jul 2025 10:37:35 -0400
Subject: [PATCH 10/10] Formatting change

---
 main.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/main.cpp b/main.cpp
index 26990c6..1fcf9ba 100644
--- a/main.cpp
+++ b/main.cpp
@@ -3,8 +3,7 @@
 #include <iostream>
 
 int main() {
-  Particle<float> test = {
-      {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, 10};
+  Particle test = {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, 10};
   std::cout << test.pos.x << " " << test.pos.y << " " << test.pos.z;
   return 0;
 }