diff --git a/CMakeLists.txt b/CMakeLists.txt
index b7394b0..fb27a81 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,7 +25,7 @@ set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_OBJECTS 0)
 # Add Vec3  as a dependency
 include(FetchContent)
 FetchContent_Declare(Vec3
-    GIT_REPOSITORY https://forge.alexselimov.com/aselimov/Vec3.git
+    GIT_REPOSITORY https://www.alexselimov.com/git/aselimov/Vec3.git
 )
 
 FetchContent_GetProperties(Vec3)
diff --git a/README.md b/README.md
index d8435ec..9236f1b 100644
--- a/README.md
+++ b/README.md
@@ -1,35 +1,12 @@
-# ⚛️ CudaCAC
+# C++ Project Template
+When setting out on a new project in C++ there are a few configuration steps
+which need to be completed prior to actually getting down to writing code.
+This repository is going to be a C++ project template that already has the
+following components:
 
-CudaCAC is a Cuda accelerated implementation of the Concurrent Atomistic-Continuum (CAC) method.
+- Directory Structure
+- Make Build (CMake)
+- CUDA integration
+- Unit Test Framework (Google Test)
+- API Documentation (Doxygen)
 
-## Background
-
-### Molecular Dynamics
-
-Molecular dynamics (MD) is a computer simulation method for analyzing the physical movements of atoms and molecules. The atoms and molecules are allowed to interact for a fixed period of time, giving a view of the dynamic evolution of the system. In the most common version, the trajectories of atoms and molecules are determined by numerically solving Newton's equations of motion for a system of interacting particles, where forces between the particles and their potential energies are often calculated using interatomic potentials or molecular mechanics force fields.
-
-### Concurrent Atomistic-Continuum (CAC) Method
-
-The Concurrent Atomistic-Continuum (CAC) method is a multiscale modeling technique used for simulating materials at the nano and micro-scale. It partitions a simulation into a coarse-grained domain and an atomistic domain. This allows for the detailed, fully-resolved atomistic simulation of important regions, like those with lattice defects, while more efficiently modeling the rest of the material as a continuum. A key feature of the CAC method is its use of a unified set of governing equations and interatomic potentials across both the atomistic and continuum domains. This avoids the need for complex coupling procedures at the interface of the two regions.
-
-## Tech Stack
-
-This project leverages a high-performance computing stack for its simulations:
-
-*   **C++:** The core application logic is written in modern C++, providing a balance of performance and high-level abstractions.
-*   **CUDA:** NVIDIA's CUDA platform is used to accelerate the computationally intensive parts of the simulation on the GPU.
-*   **CMake:** A cross-platform build system used to manage the compilation and linking of the project.
-*   **Google Test:** A testing framework for writing C++ tests.
-*   **Doxygen:** A documentation generator for C++ code.
-
-## Roadmap
-
-- [ ] Complete basic molecular dynamics atomistic solver using Cuda using Lennard-Jones pair potential with order O(n^2) calculations
-- [ ] Implement CAC rhombohedral finite element solver
-- [ ] Adding neighbor lists with cutoff distances to reduce runtime complexity
-- [ ] Adding multi-body potential support
-- [ ] Adding support for overlaying multiple potentials
-
-## Contact
-
-For any questions or inquiries, please contact Alex Selimov at [alex@alexselimov.com](mailto:alex@alexselimov.com) or visit his website at [alexselimov.com](https://alexselimov.com).
diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt
index fac4474..baa8a60 100644
--- a/kernels/CMakeLists.txt
+++ b/kernels/CMakeLists.txt
@@ -2,14 +2,12 @@ project(${NAME}_cuda_lib CUDA CXX)
 
 set(HEADER_FILES
     pair_potentials.cuh
-    forces.cuh
 )
 set(SOURCE_FILES
-    forces.cu
 )
 
 # The library contains header and source files.
-add_library(${NAME}_cuda_lib STATIC
+add_library(${NAME}_cuda_lib INTERFACE
     ${SOURCE_FILES}
     ${HEADER_FILES}
 )
diff --git a/kernels/forces.cu b/kernels/forces.cu
deleted file mode 100644
index 2251bd5..0000000
--- a/kernels/forces.cu
+++ /dev/null
@@ -1,36 +0,0 @@
-#include "forces.cuh"
-
-__global__ void CAC::calc_forces_and_energies(real *xs, real *forces,
-                                              real *energies, int n_particles,
-                                              real *box_len,
-                                              PairPotential &potential) {
-  int i = blockIdx.x * blockDim.x + threadIdx.x;
-  if (i < n_particles) {
-    real xi = xs[3 * i];
-    real yi = xs[3 * i + 1];
-    real zi = xs[3 * i + 2];
-
-    for (int j = 0; j < n_particles; j++) {
-      if (i != j) {
-        real xj = xs[3 * j];
-        real yj = xs[3 * j + 1];
-        real zj = xs[3 * j + 2];
-
-        real dx = xi - xj;
-        real dy = yi - yj;
-        real dz = zi - zj;
-
-        // Apply periodic boundary conditions
-        dx -= box_len[0] * round(dx / box_len[0]);
-        dy -= box_len[1] * round(dy / box_len[1]);
-        dz -= box_len[2] * round(dz / box_len[2]);
-
-        ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz});
-        forces[3 * i] += sol.force.x;
-        forces[3 * i + 1] += sol.force.y;
-        forces[3 * i + 2] += sol.force.z;
-        energies[i] = sol.energy;
-      }
-    }
-  }
-}
diff --git a/kernels/forces.cuh b/kernels/forces.cuh
deleted file mode 100644
index 87a610f..0000000
--- a/kernels/forces.cuh
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef FORCES_CUH
-#define FORCES_CUH
-
-#include "pair_potentials.cuh"
-#include "precision.hpp"
-namespace CAC {
-/**
- * Calculate forces and energies using CUDA for acceleration
- * This code currently only accepts a single PairPotential object and does an
- * n^2 force calculation. Future improvements will:
- *   - Allow for neighbor listing
- *   - Allow for overlaid force calculations
- */
-__global__ void calc_forces_and_energies(real *xs, real *forces, real *energies,
-                                         int n_particles, real *box_bd,
-                                         PairPotential &potential);
-} // namespace CAC
-
-#endif
diff --git a/kernels/pair_potentials.cuh b/kernels/pair_potentials.cuh
index d5d8566..052a079 100644
--- a/kernels/pair_potentials.cuh
+++ b/kernels/pair_potentials.cuh
@@ -1,5 +1,5 @@
-#ifndef POTENTIALS_CUH
-#define POTENTIALS_CUH
+#ifndef POTENTIALS_H
+#define POTENTIALS_H
 
 #include "precision.hpp"
 #include "vec3.h"
@@ -84,8 +84,8 @@ struct LennardJones : PairPotential {
     }
   };
 
-  CUDA_CALLABLE inline ~LennardJones(){};
+  CUDA_CALLABLE ~LennardJones(){};
 };
 
-inline PairPotential::~PairPotential() {};
+PairPotential::~PairPotential() {};
 #endif
diff --git a/tests/cuda_unit_tests/CMakeLists.txt b/tests/cuda_unit_tests/CMakeLists.txt
index 3419e5e..27490a0 100644
--- a/tests/cuda_unit_tests/CMakeLists.txt
+++ b/tests/cuda_unit_tests/CMakeLists.txt
@@ -2,16 +2,8 @@ include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR})
 
 add_executable(${NAME}_cuda_tests
     test_potential.cu
-    test_forces.cu
 )
 
 target_link_libraries(${NAME}_cuda_tests gtest gtest_main)
 target_link_libraries(${NAME}_cuda_tests ${CMAKE_PROJECT_NAME}_cuda_lib)
-add_test(NAME ${NAME}CudaTests COMMAND ${CMAKE_BINARY_DIR}/tests/cuda_unit_tests/${NAME}_cuda_tests)
-
-# Add environment variables for NVIDIA GPU selection. Useful for facilitating testing on multi gpu
-# systems
-set_property(TEST ${NAME}CudaTests PROPERTY ENVIRONMENT
-    "__NV_PRIME_RENDER_OFFLOAD=1"
-    "__GLX_VENDOR_LIBRARY_NAME=nvidia"
-)
+add_test(NAME ${NAME}CudaTests COMMAND ${CMAKE_BINARY_DIR}/tests/unit_tests/${NAME}_tests)
diff --git a/tests/cuda_unit_tests/test_forces.cu b/tests/cuda_unit_tests/test_forces.cu
deleted file mode 100644
index ca84e55..0000000
--- a/tests/cuda_unit_tests/test_forces.cu
+++ /dev/null
@@ -1,277 +0,0 @@
-#include <cmath>
-#include <cuda_runtime.h>
-#include <gtest/gtest.h>
-#include <vector>
-
-// Include your header files
-#include "forces.cuh"
-#include "pair_potentials.cuh"
-#include "precision.hpp"
-
-class CudaKernelTest : public ::testing::Test {
-protected:
-  void SetUp() override {
-    // Set up CUDA device
-    cudaError_t err = cudaSetDevice(0);
-    ASSERT_EQ(err, cudaSuccess) << "Failed to set CUDA device";
-  }
-
-  void TearDown() override {
-    // Clean up any remaining GPU memory
-    cudaDeviceReset();
-  }
-
-  // Helper function to check CUDA errors
-  void checkCudaError(cudaError_t err, const std::string &operation) {
-    ASSERT_EQ(err, cudaSuccess)
-        << "CUDA error in " << operation << ": " << cudaGetErrorString(err);
-  }
-
-  // Helper function to allocate and copy data to GPU
-  template <typename T>
-  T *allocateAndCopyToGPU(const std::vector<T> &host_data) {
-    T *device_ptr;
-    size_t size = host_data.size() * sizeof(T);
-    checkCudaError(cudaMalloc(&device_ptr, size), "cudaMalloc");
-    checkCudaError(
-        cudaMemcpy(device_ptr, host_data.data(), size, cudaMemcpyHostToDevice),
-        "cudaMemcpy H2D");
-    return device_ptr;
-  }
-
-  // Helper function to copy data from GPU and free GPU memory
-  template <typename T>
-  std::vector<T> copyFromGPUAndFree(T *device_ptr, size_t count) {
-    std::vector<T> host_data(count);
-    size_t size = count * sizeof(T);
-    checkCudaError(
-        cudaMemcpy(host_data.data(), device_ptr, size, cudaMemcpyDeviceToHost),
-        "cudaMemcpy D2H");
-    checkCudaError(cudaFree(device_ptr), "cudaFree");
-    return host_data;
-  }
-};
-
-TEST_F(CudaKernelTest, BasicFunctionalityTest) {
-  const int n_particles = 4;
-  const real tolerance = 1e-5;
-
-  // Set up test data - simple 2x2 grid of particles
-  std::vector<real> positions = {
-      0.0, 0.0, 0.0, // particle 0
-      1.0, 0.0, 0.0, // particle 1
-      0.0, 1.0, 0.0, // particle 2
-      1.0, 1.0, 0.0  // particle 3
-  };
-
-  std::vector<real> forces(3 * n_particles, 0.0);
-  std::vector<real> energies(n_particles, 0.0);
-  std::vector<real> box_dimensions = {10.0, 10.0,
-                                      10.0}; // Large box to avoid PBC effects
-
-  // Allocate GPU memory and copy data
-  real *d_positions = allocateAndCopyToGPU(positions);
-  real *d_forces = allocateAndCopyToGPU(forces);
-  real *d_energies = allocateAndCopyToGPU(energies);
-  real *d_box_len = allocateAndCopyToGPU(box_dimensions);
-
-  // Create Lennard-Jones potential (sigma=1.0, epsilon=1.0, rcutoff=3.0)
-  LennardJones potential(1.0, 1.0, 3.0);
-
-  // Launch kernel
-  dim3 blockSize(256);
-  dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
-
-  CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
-      d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
-
-  checkCudaError(cudaGetLastError(), "kernel launch");
-  checkCudaError(cudaDeviceSynchronize(), "kernel execution");
-
-  // Copy results back to host
-  std::vector<real> result_forces =
-      copyFromGPUAndFree(d_forces, 3 * n_particles);
-  std::vector<real> result_energies =
-      copyFromGPUAndFree(d_energies, n_particles);
-
-  // Clean up remaining GPU memory
-  checkCudaError(cudaFree(d_positions), "cudaFree positions");
-  checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
-
-  // Verify results - forces should be non-zero and energies should be
-  // calculated
-  bool has_nonzero_force = false;
-  bool has_nonzero_energy = false;
-
-  for (int i = 0; i < 3 * n_particles; i++) {
-    if (std::abs(result_forces[i]) > tolerance) {
-      has_nonzero_force = true;
-      break;
-    }
-  }
-
-  for (int i = 0; i < n_particles; i++) {
-    if (std::abs(result_energies[i]) > tolerance) {
-      has_nonzero_energy = true;
-      break;
-    }
-  }
-
-  EXPECT_FALSE(has_nonzero_force)
-      << "Expected non-zero forces between particles";
-  EXPECT_TRUE(has_nonzero_energy) << "Expected non-zero energies for particles";
-}
-
-TEST_F(CudaKernelTest, PeriodicBoundaryConditionsTest) {
-  const int n_particles = 2;
-  const real tolerance = 1e-5;
-
-  // Place particles near opposite edges of a small box
-  std::vector<real> positions = {
-      0.1, 0.0, 0.0, // particle 0 near left edge
-      4.9, 0.0, 0.0  // particle 1 near right edge
-  };
-
-  std::vector<real> forces(3 * n_particles, 0.0);
-  std::vector<real> energies(n_particles, 0.0);
-  std::vector<real> box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC
-
-  // Allocate GPU memory and copy data
-  real *d_positions = allocateAndCopyToGPU(positions);
-  real *d_forces = allocateAndCopyToGPU(forces);
-  real *d_energies = allocateAndCopyToGPU(energies);
-  real *d_box_len = allocateAndCopyToGPU(box_dimensions);
-
-  // Create Lennard-Jones potential with large cutoff to ensure interaction
-  LennardJones potential(1.0, 1.0, 3.0);
-
-  // Launch kernel
-  dim3 blockSize(256);
-  dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
-
-  CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
-      d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
-
-  checkCudaError(cudaGetLastError(), "kernel launch");
-  checkCudaError(cudaDeviceSynchronize(), "kernel execution");
-
-  // Copy results back to host
-  std::vector<real> result_forces =
-      copyFromGPUAndFree(d_forces, 3 * n_particles);
-  std::vector<real> result_energies =
-      copyFromGPUAndFree(d_energies, n_particles);
-
-  checkCudaError(cudaFree(d_positions), "cudaFree positions");
-  checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
-
-  // With PBC, particles should interact as if they're close (distance ~0.2)
-  // rather than far apart (distance ~4.8)
-  EXPECT_GT(std::abs(result_forces[0]), tolerance)
-      << "Expected significant force due to PBC";
-  EXPECT_GT(std::abs(result_energies[0]), tolerance)
-      << "Expected significant energy due to PBC";
-}
-
-TEST_F(CudaKernelTest, SingleParticleTest) {
-  const int n_particles = 1;
-
-  std::vector<real> positions = {0.0, 0.0, 0.0};
-  std::vector<real> forces(3 * n_particles, 0.0);
-  std::vector<real> energies(n_particles, 0.0);
-  std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
-
-  real *d_positions = allocateAndCopyToGPU(positions);
-  real *d_forces = allocateAndCopyToGPU(forces);
-  real *d_energies = allocateAndCopyToGPU(energies);
-  real *d_box_len = allocateAndCopyToGPU(box_dimensions);
-
-  LennardJones potential(1.0, 1.0, 3.0);
-
-  dim3 blockSize(256);
-  dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
-
-  CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
-      d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
-
-  checkCudaError(cudaGetLastError(), "kernel launch");
-  checkCudaError(cudaDeviceSynchronize(), "kernel execution");
-
-  std::vector<real> result_forces =
-      copyFromGPUAndFree(d_forces, 3 * n_particles);
-  std::vector<real> result_energies =
-      copyFromGPUAndFree(d_energies, n_particles);
-
-  checkCudaError(cudaFree(d_positions), "cudaFree positions");
-  checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
-
-  // Single particle should have zero force and energy
-  EXPECT_NEAR(result_forces[0], 0.0, 1e-10);
-  EXPECT_NEAR(result_forces[1], 0.0, 1e-10);
-  EXPECT_NEAR(result_forces[2], 0.0, 1e-10);
-  EXPECT_NEAR(result_energies[0], 0.0, 1e-10);
-}
-
-TEST_F(CudaKernelTest, ForceSymmetryTest) {
-  const int n_particles = 2;
-  const real tolerance = 1e-5;
-
-  std::vector<real> positions = {
-      0.0, 0.0, 0.0, // particle 0
-      1.5, 0.0, 0.0  // particle 1
-  };
-
-  std::vector<real> forces(3 * n_particles, 0.0);
-  std::vector<real> energies(n_particles, 0.0);
-  std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
-
-  real *d_positions = allocateAndCopyToGPU(positions);
-  real *d_forces = allocateAndCopyToGPU(forces);
-  real *d_energies = allocateAndCopyToGPU(energies);
-  real *d_box_len = allocateAndCopyToGPU(box_dimensions);
-
-  LennardJones potential(1.0, 1.0, 3.0);
-
-  dim3 blockSize(256);
-  dim3 gridSize((n_particles + blockSize.x - 1) / blockSize.x);
-
-  CAC::calc_forces_and_energies<<<gridSize, blockSize>>>(
-      d_positions, d_forces, d_energies, n_particles, d_box_len, potential);
-
-  checkCudaError(cudaGetLastError(), "kernel launch");
-  checkCudaError(cudaDeviceSynchronize(), "kernel execution");
-
-  std::vector<real> result_forces =
-      copyFromGPUAndFree(d_forces, 3 * n_particles);
-  std::vector<real> result_energies =
-      copyFromGPUAndFree(d_energies, n_particles);
-
-  checkCudaError(cudaFree(d_positions), "cudaFree positions");
-  checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
-
-  // Newton's third law: forces should be equal and opposite
-  EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance)
-      << "Force x-components should be opposite";
-  EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance)
-      << "Force y-components should be opposite";
-  EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance)
-      << "Force z-components should be opposite";
-
-  // Energies should be equal for symmetric particles
-  EXPECT_NEAR(result_energies[0], result_energies[1], tolerance)
-      << "Energies should be equal";
-}
-
-// Main function to run tests
-int main(int argc, char **argv) {
-  ::testing::InitGoogleTest(&argc, argv);
-
-  // Check if CUDA is available
-  int deviceCount;
-  cudaError_t err = cudaGetDeviceCount(&deviceCount);
-  if (err != cudaSuccess || deviceCount == 0) {
-    std::cout << "No CUDA devices available. Skipping CUDA tests." << std::endl;
-    return 0;
-  }
-
-  return RUN_ALL_TESTS();
-}