diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt
index abcaadf..f07f1a4 100644
--- a/kernels/CMakeLists.txt
+++ b/kernels/CMakeLists.txt
@@ -3,14 +3,12 @@ project(${NAME}_cuda_lib CUDA CXX)
 set(HEADER_FILES
     potentials/pair_potentials.cuh
     forces.cuh
-    kernel_config.cuh
 )
 set(SOURCE_FILES
-    kernel_config.cu
 )
 
 # The library contains header and source files.
-add_library(${NAME}_cuda_lib STATIC
+add_library(${NAME}_cuda_lib INTERFACE
     ${SOURCE_FILES}
     ${HEADER_FILES}
 )
diff --git a/kernels/forces.cuh b/kernels/forces.cuh
index e4f52f1..640da42 100644
--- a/kernels/forces.cuh
+++ b/kernels/forces.cuh
@@ -1,75 +1,74 @@
 #ifndef FORCES_CUH
 #define FORCES_CUH
-#include "kernel_config.cuh"
 #include "potentials/pair_potentials.cuh"
 #include "precision.hpp"
 #include <cstdio>
-#include <cuda_runtime.h>
+#include <type_traits>
+#include <variant>
 #include <vector>
 
 namespace CAC {
 
-inline void reset_forces_and_energies(int n_particles,
-                                      float4 *forces_energies) {
-  cudaMemset(forces_energies, 0, n_particles * sizeof(float4));
+inline void reset_forces_and_energies(int n_particles, real *forces,
+                                      real *energies) {
+  cudaMemset(forces, 0, n_particles * sizeof(real) * 3);
+  cudaMemset(energies, 0, n_particles * sizeof(real));
 }
 
 template <typename PotentialType>
-__global__ void calc_forces_and_energies(float4 *pos, float4 *force_energies,
+__global__ void calc_forces_and_energies(real *xs, real *forces, real *energies,
                                          int n_particles, real *box_len,
                                          PotentialType potential) {
-
-  int i = get_thread_id();
+  int i = blockIdx.x * blockDim.x + threadIdx.x;
 
   if (i < n_particles) {
-    float4 my_pos = pos[i]; // Loads 16 bytes in one transaction
-    real xi = my_pos.x;
-    real yi = my_pos.y;
-    real zi = my_pos.z;
-
-    real total_fx = 0, total_fy = 0, total_fz = 0, total_energy = 0;
+    real xi = xs[3 * i];
+    real yi = xs[3 * i + 1];
+    real zi = xs[3 * i + 2];
 
     for (int j = 0; j < n_particles; j++) {
       if (i != j) {
-        float4 other_pos = pos[j];
-        real dx = xi - other_pos.x;
-        real dy = yi - other_pos.y;
-        real dz = zi - other_pos.z;
+        real xj = xs[3 * j];
+        real yj = xs[3 * j + 1];
+        real zj = xs[3 * j + 2];
+
+        real dx = xi - xj;
+        real dy = yi - yj;
+        real dz = zi - zj;
 
         // Apply periodic boundary conditions
         dx -= box_len[0] * round(dx / box_len[0]);
         dy -= box_len[1] * round(dy / box_len[1]);
         dz -= box_len[2] * round(dz / box_len[2]);
 
-        float4 sol = potential.calc_force_and_energy({dx, dy, dz});
-        total_fx += sol.x;
-        total_fy += sol.y;
-        total_fz += sol.z;
-        total_energy += sol.w;
+        ForceAndEnergy sol = potential.calc_force_and_energy({dx, dy, dz});
+        forces[3 * i] += sol.force.x;
+        forces[3 * i + 1] += sol.force.y;
+        forces[3 * i + 2] += sol.force.z;
+        energies[i] += sol.energy;
       }
     }
-
-    force_energies[i] = make_float4(total_fx, total_fy, total_fz, total_energy);
   }
 }
-inline void launch_force_kernels(float4 *xs, float4 *force_energies,
+
+inline void launch_force_kernels(real *xs, real *forces, real *energies,
                                  int n_particles, real *box_len,
                                  std::vector<PairPotentials> potentials,
-                                 dim3 blocks, dim3 threads_per_block) {
+                                 int grid_size, int block_size) {
 
-  reset_forces_and_energies(n_particles, force_energies);
+  reset_forces_and_energies(n_particles, forces, energies);
 
   for (const auto &potential : potentials) {
     std::visit(
         [&](const auto &potential) {
           using PotentialType = std::decay_t<decltype(potential)>;
-          calc_forces_and_energies<PotentialType>
-              <<<blocks, threads_per_block>>>(xs, force_energies, n_particles,
-                                              box_len, potential);
+          calc_forces_and_energies<PotentialType><<<grid_size, block_size>>>(
+              xs, forces, energies, n_particles, box_len, potential);
         },
         potential);
     cudaDeviceSynchronize();
   }
 }
 } // namespace CAC
+
 #endif
diff --git a/kernels/kernel_config.cu b/kernels/kernel_config.cu
deleted file mode 100644
index 3c1644c..0000000
--- a/kernels/kernel_config.cu
+++ /dev/null
@@ -1,73 +0,0 @@
-#include "kernel_config.cuh"
-#include <algorithm>
-#include <cstdio>
-
-size_t KernelConfig::total_threads() const {
-  return (size_t)blocks.x * blocks.y * blocks.z * threads.x * threads.y *
-         threads.z;
-}
-
-void KernelConfig::print() const {
-  printf("Grid: (%u, %u, %u), Block: (%u, %u, %u), Total threads: %zu\n",
-         blocks.x, blocks.y, blocks.z, threads.x, threads.y, threads.z,
-         total_threads());
-}
-
-KernelConfig get_launch_config(size_t n_elements, int threads_per_block,
-                               int max_blocks_per_dim) {
-
-  // Ensure threads_per_block is valid
-  threads_per_block = std::min(threads_per_block, 1024);
-  threads_per_block = std::max(threads_per_block, 32);
-
-  // Calculate total blocks needed
-  size_t total_blocks =
-      (n_elements + threads_per_block - 1) / threads_per_block;
-
-  dim3 threads(threads_per_block);
-  dim3 blocks;
-
-  if (total_blocks <= max_blocks_per_dim) {
-    // Simple 1D grid
-    blocks = dim3(total_blocks);
-  } else {
-    // Use 2D grid
-    blocks.x = max_blocks_per_dim;
-    blocks.y = (total_blocks + max_blocks_per_dim - 1) / max_blocks_per_dim;
-
-    // If still too big, use 3D grid
-    if (blocks.y > max_blocks_per_dim) {
-      blocks.y = max_blocks_per_dim;
-      blocks.z =
-          (total_blocks + (size_t)max_blocks_per_dim * max_blocks_per_dim - 1) /
-          ((size_t)max_blocks_per_dim * max_blocks_per_dim);
-    }
-  }
-
-  return KernelConfig(blocks, threads);
-}
-
-int get_optimal_block_size(int device_id) {
-  cudaDeviceProp prop;
-  cudaGetDeviceProperties(&prop, device_id);
-
-  // Use a fraction of max threads per block for better occupancy
-  // Typically 256 or 512 work well for most kernels
-  if (prop.maxThreadsPerBlock >= 1024) {
-    return 256; // Good balance of occupancy and register usage
-  } else if (prop.maxThreadsPerBlock >= 512) {
-    return 256;
-  } else {
-    return prop.maxThreadsPerBlock / 2;
-  }
-}
-
-KernelConfig get_launch_config_advanced(size_t n_elements, int device_id) {
-  cudaDeviceProp prop;
-  cudaGetDeviceProperties(&prop, device_id);
-
-  int threads_per_block = get_optimal_block_size(device_id);
-  int max_blocks_per_dim = prop.maxGridSize[0];
-
-  return get_launch_config(n_elements, threads_per_block, max_blocks_per_dim);
-}
diff --git a/kernels/kernel_config.cuh b/kernels/kernel_config.cuh
deleted file mode 100644
index 66364fe..0000000
--- a/kernels/kernel_config.cuh
+++ /dev/null
@@ -1,83 +0,0 @@
-#ifndef KERNEL_CONFIG_CUH
-#define KERNEL_CONFIG_CUH
-#include <cstdio>
-#include <cuda_runtime.h>
-
-/**
- * Structure to hold grid launch configuration
- */
-struct KernelConfig {
-  dim3 blocks;
-  dim3 threads;
-
-  // Convenience constructor
-  KernelConfig(dim3 b, dim3 t) : blocks(b), threads(t) {}
-
-  // Total number of threads launched
-  size_t total_threads() const;
-
-  // Print configuration for debugging
-  void print() const;
-};
-
-/**
- * Calculate optimal CUDA launch configuration for 1D problem
- *
- * @param n_elements Number of elements to process
- * @param threads_per_block Desired threads per block (default: 256)
- * @param max_blocks_per_dim Maximum blocks per grid dimension (default: 65535)
- * @return LaunchConfig with optimal grid and block dimensions
- */
-KernelConfig get_launch_config(size_t n_elements, int threads_per_block = 256,
-                               int max_blocks_per_dim = 65535);
-
-/**
- * Calculate 1D thread index for kernels launched with get_launch_config()
- * Use this inside your CUDA kernels
- */
-__device__ inline size_t get_thread_id() {
-  return (size_t)blockIdx.z * gridDim.x * gridDim.y * blockDim.x +
-         (size_t)blockIdx.y * gridDim.x * blockDim.x +
-         (size_t)blockIdx.x * blockDim.x + threadIdx.x;
-}
-
-/**
- * Alternative version that takes grid dimensions as parameters
- * Useful if you need the index calculation in multiple places
- */
-__device__ inline size_t get_thread_id(dim3 gridDim, dim3 blockDim,
-                                       dim3 blockIdx, dim3 threadIdx) {
-  return (size_t)blockIdx.z * gridDim.x * gridDim.y * blockDim.x +
-         (size_t)blockIdx.y * gridDim.x * blockDim.x +
-         (size_t)blockIdx.x * blockDim.x + threadIdx.x;
-}
-
-/**
- * GPU device properties helper - gets optimal block size for current device
- */
-int get_optimal_block_size(int device_id = 0);
-
-/**
- * Advanced version that considers device properties
- */
-KernelConfig get_launch_config_advanced(size_t n_elements, int device_id = 0);
-
-// Example usage in your kernel:
-/*
-template <typename PotentialType>
-__global__ void calc_forces_and_energies(float4 *pos, float4 *force_energies,
-                                         size_t n_particles, real *box_len,
-                                         PotentialType potential) {
-
-    size_t i = get_thread_id();
-
-    if (i < n_particles) {
-        // Your existing force calculation code here...
-        float4 my_pos = pos[i];
-        // ... rest of kernel unchanged
-    }
-}
-
-*/
-
-#endif
diff --git a/kernels/potentials/pair_potentials.cuh b/kernels/potentials/pair_potentials.cuh
index 792405c..537b03c 100644
--- a/kernels/potentials/pair_potentials.cuh
+++ b/kernels/potentials/pair_potentials.cuh
@@ -5,7 +5,6 @@
 #include "vec3.h"
 #include <cmath>
 #include <cstdio>
-#include <cuda_runtime.h>
 #include <variant>
 
 #ifdef __CUDACC__
@@ -14,6 +13,18 @@
 #define CUDA_CALLABLE
 #endif
 
+/**
+ * Result struct for the Pair Potential
+ */
+struct ForceAndEnergy {
+  real energy;
+  Vec3<real> force;
+
+  CUDA_CALLABLE inline static ForceAndEnergy zero() {
+    return {0.0, {0.0, 0.0, 0.0}};
+  };
+};
+
 /**
  * Calculate the Lennard-Jones energy and force for the current particle
  * pair described by displacement vector r
@@ -29,7 +40,7 @@ struct LennardJones {
     m_rcutoffsq = rcutoff * rcutoff;
   };
 
-  CUDA_CALLABLE float4 calc_force_and_energy(Vec3<real> r) {
+  CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3<real> r) {
     real rmagsq = r.squared_norm2();
     if (rmagsq < m_rcutoffsq && rmagsq > 0.0) {
       real inv_rmag = 1 / sqrt(rmagsq);
@@ -49,10 +60,10 @@ struct LennardJones {
           (12.0 * sigma_r12 * inv_rmag - 6.0 * sigma_r6 * inv_rmag);
       Vec3<real> force = r.scale(force_mag * inv_rmag);
 
-      return make_float4(force.x, force.y, force.z, energy);
+      return {energy, force};
 
     } else {
-      return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+      return ForceAndEnergy::zero();
     }
   };
 };
@@ -74,7 +85,7 @@ struct Morse {
     m_rcutoffsq = rcutoff * rcutoff;
   };
 
-  CUDA_CALLABLE float4 calc_force_and_energy(Vec3<real> r) {
+  CUDA_CALLABLE ForceAndEnergy calc_force_and_energy(Vec3<real> r) {
     real rmagsq = r.squared_norm2();
     if (rmagsq < m_rcutoffsq && rmagsq > 0.0) {
       real rmag = sqrt(rmagsq);
@@ -93,10 +104,10 @@ struct Morse {
       // Direction: normalized vector
       Vec3<real> force = r.scale(force_mag / rmag);
 
-      return make_float4(force.x, force.y, force.z, energy);
+      return {energy, force};
 
     } else {
-      return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+      return ForceAndEnergy::zero();
     }
   };
 };
diff --git a/src/precision.hpp b/src/precision.hpp
index aabc471..c132c09 100644
--- a/src/precision.hpp
+++ b/src/precision.hpp
@@ -1,15 +1,15 @@
 #ifndef PRECISION_H
 #define PRECISION_H
 
-#ifdef USE_DOUBLE
+#ifdef USE_FLOATS
 
 /*
- * If macro USE_DOUBLE is set then the default type will be double
- * precision. Otherwise we use floats by default
+ * If macro USE_FLOATS is set then the default type will be floating point
+ * precision. Otherwise we use double precision by default
  */
-typedef double real;
-#else
 typedef float real;
+#else
+typedef double real;
 #endif
 
 #endif
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 8310b86..7f994a6 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -10,4 +10,5 @@ if(NOT EXISTS ${GOOGLETEST_DIR})
 endif()
 
 add_subdirectory(lib/googletest)
+add_subdirectory(unit_tests)
 add_subdirectory(cuda_unit_tests)
diff --git a/tests/cuda_unit_tests/test_forces.cu b/tests/cuda_unit_tests/test_forces.cu
index 923ebe5..3576dbb 100644
--- a/tests/cuda_unit_tests/test_forces.cu
+++ b/tests/cuda_unit_tests/test_forces.cu
@@ -5,12 +5,14 @@
 
 // Include your header files
 #include "forces.cuh"
-#include "kernel_config.cuh"
 #include "potentials/pair_potentials.cuh"
 #include "precision.hpp"
 
 class CudaForceKernelTest : public ::testing::Test {
 protected:
+  const int GRID_SIZE = 1;
+  const int BLOCK_SIZE = 4;
+
   void SetUp() override {
     // Set up CUDA device
     cudaError_t err = cudaSetDevice(0);
@@ -53,32 +55,33 @@ protected:
   }
 
   // Helper function to run the force calculation kernel
-  std::vector<float4>
-  run_force_calculation(int n_particles, const std::vector<float4> &positions,
+  std::pair<std::vector<real>, std::vector<real>>
+  run_force_calculation(int n_particles, const std::vector<real> &positions,
                         const std::vector<real> &box_dimensions) {
-    std::vector<float4> force_energies(n_particles,
-                                       make_float4(0.0, 0.0, 0.0, 0.0));
+    std::vector<real> forces(3 * n_particles, 0.0);
+    std::vector<real> energies(n_particles, 0.0);
 
-    KernelConfig kernel_config = get_launch_config(n_particles);
-    float4 *d_positions = allocateAndCopyToGPU(positions);
-    float4 *d_force_energies = allocateAndCopyToGPU(force_energies);
+    real *d_positions = allocateAndCopyToGPU(positions);
+    real *d_forces = allocateAndCopyToGPU(forces);
+    real *d_energies = allocateAndCopyToGPU(energies);
     real *d_box_len = allocateAndCopyToGPU(box_dimensions);
 
     std::vector<PairPotentials> potentials = {LennardJones(1.0, 1.0, 3.0)};
-    CAC::launch_force_kernels(d_positions, d_force_energies, n_particles,
-                              d_box_len, potentials, kernel_config.blocks,
-                              kernel_config.threads);
+    CAC::launch_force_kernels(d_positions, d_forces, d_energies, n_particles,
+                              d_box_len, potentials, GRID_SIZE, BLOCK_SIZE);
 
     checkCudaError(cudaGetLastError(), "kernel launch");
     checkCudaError(cudaDeviceSynchronize(), "kernel execution");
 
-    std::vector<float4> result_force_energies =
-        copyFromGPUAndFree(d_force_energies, n_particles);
+    std::vector<real> result_forces =
+        copyFromGPUAndFree(d_forces, 3 * n_particles);
+    std::vector<real> result_energies =
+        copyFromGPUAndFree(d_energies, n_particles);
 
     checkCudaError(cudaFree(d_positions), "cudaFree positions");
     checkCudaError(cudaFree(d_box_len), "cudaFree box_len");
 
-    return result_force_energies;
+    return {result_forces, result_energies};
   }
 };
 
@@ -87,14 +90,14 @@ TEST_F(CudaForceKernelTest, BasicFunctionalityTest) {
   const real tolerance = 1e-5;
 
   // Set up test data - simple 2x2 grid of particles
-  std::vector<float4> positions = {
-      make_float4(0.0, 0.0, 0.0, 0.0), // particle 0
-      make_float4(0.5, 0.0, 0.0, 0.0), // particle 1
+  std::vector<real> positions = {
+      0.0, 0.0, 0.0, // particle 0
+      0.5, 0.0, 0.0, // particle 1
   };
 
   std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
 
-  auto result_force_energies =
+  auto [result_forces, result_energies] =
       run_force_calculation(n_particles, positions, box_dimensions);
 
   // Verify results - forces should be non-zero and energies should be
@@ -102,14 +105,17 @@ TEST_F(CudaForceKernelTest, BasicFunctionalityTest) {
   bool has_nonzero_force = false;
   bool has_nonzero_energy = false;
 
-  for (int i = 0; i < n_particles; i++) {
-    if (std::abs(result_force_energies[i].x) > tolerance ||
-        std::abs(result_force_energies[i].y) > tolerance ||
-        std::abs(result_force_energies[i].z) > tolerance) {
+  for (int i = 0; i < 3 * n_particles; i++) {
+    if (std::abs(result_forces[i]) > tolerance) {
       has_nonzero_force = true;
+      break;
     }
-    if (std::abs(result_force_energies[i].w) > tolerance) {
+  }
+
+  for (int i = 0; i < n_particles; i++) {
+    if (std::abs(result_energies[i]) > tolerance) {
       has_nonzero_energy = true;
+      break;
     }
   }
 
@@ -124,61 +130,60 @@ TEST_F(CudaForceKernelTest, PeriodicBoundaryConditionsTest) {
   const real tolerance = 1e-5;
 
   // Place particles near opposite edges of a small box
-  std::vector<float4> positions = {
-      make_float4(0.1, 0.0, 0.0, 0.0), // particle 0 near left edge
-      make_float4(4.9, 0.0, 0.0, 0.0)  // particle 1 near right edge
+  std::vector<real> positions = {
+      0.1, 0.0, 0.0, // particle 0 near left edge
+      4.9, 0.0, 0.0  // particle 1 near right edge
   };
   std::vector<real> box_dimensions = {5.0, 5.0, 5.0}; // Small box to test PBC
 
-  auto result_force_energies =
+  auto [result_forces, result_energies] =
       run_force_calculation(n_particles, positions, box_dimensions);
 
   // With PBC, particles should interact as if they're close (distance ~0.2)
   // rather than far apart (distance ~4.8)
-  EXPECT_GT(std::abs(result_force_energies[0].x), tolerance)
+  EXPECT_GT(std::abs(result_forces[0]), tolerance)
       << "Expected significant force due to PBC";
+  EXPECT_GT(std::abs(result_energies[0]), tolerance)
+      << "Expected significant energy due to PBC";
 }
 
 TEST_F(CudaForceKernelTest, SingleParticleTest) {
   const int n_particles = 1;
 
-  std::vector<float4> positions = {make_float4(0.0, 0.0, 0.0, 0.0)};
+  std::vector<real> positions = {0.0, 0.0, 0.0};
   std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
 
-  auto result_force_energies =
+  auto [result_forces, result_energies] =
       run_force_calculation(n_particles, positions, box_dimensions);
   // Single particle should have zero force and energy
-  EXPECT_NEAR(result_force_energies[0].x, 0.0, 1e-10);
-  EXPECT_NEAR(result_force_energies[0].y, 0.0, 1e-10);
-  EXPECT_NEAR(result_force_energies[0].z, 0.0, 1e-10);
-  EXPECT_NEAR(result_force_energies[0].w, 0.0, 1e-10);
+  EXPECT_NEAR(result_forces[0], 0.0, 1e-10);
+  EXPECT_NEAR(result_forces[1], 0.0, 1e-10);
+  EXPECT_NEAR(result_forces[2], 0.0, 1e-10);
+  EXPECT_NEAR(result_energies[0], 0.0, 1e-10);
 }
 
 TEST_F(CudaForceKernelTest, ForceSymmetryTest) {
   const int n_particles = 2;
   const real tolerance = 1e-5;
 
-  std::vector<float4> positions = {
-      make_float4(0.0, 0.0, 0.0, 0.0), // particle 0
-      make_float4(1.5, 0.0, 0.0, 0.0)  // particle 1
+  std::vector<real> positions = {
+      0.0, 0.0, 0.0, // particle 0
+      1.5, 0.0, 0.0  // particle 1
   };
   std::vector<real> box_dimensions = {10.0, 10.0, 10.0};
 
-  auto result_force_energies =
+  auto [result_forces, result_energies] =
       run_force_calculation(n_particles, positions, box_dimensions);
 
   // Newton's third law: forces should be equal and opposite
-  EXPECT_NEAR(result_force_energies[0].x, -result_force_energies[1].x,
-              tolerance)
+  EXPECT_NEAR(result_forces[0], -result_forces[3], tolerance)
       << "Force x-components should be opposite";
-  EXPECT_NEAR(result_force_energies[0].y, -result_force_energies[1].y,
-              tolerance)
+  EXPECT_NEAR(result_forces[1], -result_forces[4], tolerance)
       << "Force y-components should be opposite";
-  EXPECT_NEAR(result_force_energies[0].z, -result_force_energies[1].z,
-              tolerance)
+  EXPECT_NEAR(result_forces[2], -result_forces[5], tolerance)
       << "Force z-components should be opposite";
 
   // Energies should be equal for symmetric particles
-  EXPECT_NEAR(result_force_energies[0].w, result_force_energies[1].w, tolerance)
+  EXPECT_NEAR(result_energies[0], result_energies[1], tolerance)
       << "Energies should be equal";
 }
diff --git a/tests/cuda_unit_tests/test_potential.cu b/tests/cuda_unit_tests/test_potential.cu
index 2541ada..9511ea5 100644
--- a/tests/cuda_unit_tests/test_potential.cu
+++ b/tests/cuda_unit_tests/test_potential.cu
@@ -2,7 +2,6 @@
 #include "precision.hpp"
 #include "gtest/gtest.h"
 #include <cmath>
-#include <cstdio>
 #include <cuda_runtime.h>
 
 // Structure to hold test results from device
@@ -19,7 +18,8 @@ struct TestResults {
   bool near_cutoff_pass;
 
   // Additional result data for exact checks
-  float4 force_energy_values[10];
+  real energy_values[10];
+  Vec3<real> force_values[10];
 };
 
 // Check if two Vec3 values are close within tolerance
@@ -35,7 +35,7 @@ __global__ void lennard_jones_test_kernel(TestResults *results) {
   real sigma = 1.0;
   real epsilon = 1.0;
   real r_cutoff = 2.5;
-  real tolerance = 1e-5;
+  real tolerance = 1e-10;
 
   // Create LennardJones object on device
   LennardJones lj(sigma, epsilon, r_cutoff);
@@ -43,78 +43,87 @@ __global__ void lennard_jones_test_kernel(TestResults *results) {
   // Zero Distance Test
   {
     Vec3<real> r = {0.0, 0.0, 0.0};
-    float4 result = lj.calc_force_and_energy(r);
-    results->force_energy_values[0] = result;
+    auto result = lj.calc_force_and_energy(r);
+    results->energy_values[0] = result.energy;
+    results->force_values[0] = result.force;
     results->zero_distance_pass =
-        (result.w == 0.0) &&
-        vec3_near(Vec3<real>{0.0, 0.0, 0.0},
-                  Vec3<real>{result.x, result.y, result.z}, tolerance);
+        (result.energy == 0.0) &&
+        vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result.force, tolerance);
   }
 
   // Beyond Cutoff Test
   {
     Vec3<real> r = {3.0, 0.0, 0.0};
-    float4 result = lj.calc_force_and_energy(r);
-    results->force_energy_values[1] = result;
+    auto result = lj.calc_force_and_energy(r);
+    results->energy_values[1] = result.energy;
+    results->force_values[1] = result.force;
     results->beyond_cutoff_pass =
-        (result.w == 0.0) &&
-        vec3_near(Vec3<real>{0.0, 0.0, 0.0},
-                  Vec3<real>{result.x, result.y, result.z}, tolerance);
+        (result.energy == 0.0) &&
+        vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result.force, tolerance);
   }
 
   // At Minimum Test
   {
     real min_dist = pow(2.0, 1.0 / 6.0) * sigma;
     Vec3<real> r = {min_dist, 0.0, 0.0};
-    float4 result = lj.calc_force_and_energy(r);
-    results->force_energy_values[2] = result;
+    auto result = lj.calc_force_and_energy(r);
+    results->energy_values[2] = result.energy;
+    results->force_values[2] = result.force;
 
     results->at_minimum_pass =
-        (fabs(result.w + epsilon) < tolerance) &&
-        vec3_near(Vec3<real>{0.0, 0.0, 0.0},
-                  Vec3<real>{result.x, result.y, result.z}, tolerance);
+        (fabs(result.energy + epsilon) < tolerance) &&
+        vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result.force, tolerance);
   }
 
   // At Equilibrium Test
   {
     Vec3<real> r = {sigma, 0.0, 0.0};
-    float4 result = lj.calc_force_and_energy(r);
-    results->force_energy_values[3] = result;
-    results->at_equilibrium_pass =
-        (fabs(result.w) < tolerance) && (result.x > 0.0) &&
-        (fabs(result.y) < tolerance) && (fabs(result.z) < tolerance);
+    auto result = lj.calc_force_and_energy(r);
+    results->energy_values[3] = result.energy;
+    results->force_values[3] = result.force;
+    results->at_equilibrium_pass = (fabs(result.energy) < tolerance) &&
+                                   (result.force.x > 0.0) &&
+                                   (fabs(result.force.y) < tolerance) &&
+                                   (fabs(result.force.z) < tolerance);
   }
 
   // Repulsive Region Test
   {
-    Vec3<real> r = {0.8f * sigma, 0.0, 0.0};
-    float4 result = lj.calc_force_and_energy(r);
-    results->force_energy_values[4] = result;
-    results->repulsive_region_pass = (result.w > 0.0) && (result.x > 0.0);
+    Vec3<real> r = {0.8 * sigma, 0.0, 0.0};
+    auto result = lj.calc_force_and_energy(r);
+    results->energy_values[4] = result.energy;
+    results->force_values[4] = result.force;
+    results->repulsive_region_pass =
+        (result.energy > 0.0) && (result.force.x > 0.0);
   }
 
   // Attractive Region Test
   {
-    Vec3<real> r = {1.5f * sigma, 0.0, 0.0};
-    float4 result = lj.calc_force_and_energy(r);
-    results->force_energy_values[5] = result;
-    results->attractive_region_pass = (result.w < 0.0) && (result.x < 0.0);
+    Vec3<real> r = {1.5 * sigma, 0.0, 0.0};
+    auto result = lj.calc_force_and_energy(r);
+    results->energy_values[5] = result.energy;
+    results->force_values[5] = result.force;
+    results->attractive_region_pass =
+        (result.energy < 0.0) && (result.force.x < 0.0);
   }
 
   // Arbitrary Direction Test
   {
     Vec3<real> r = {1.0, 1.0, 1.0};
-    float4 result = lj.calc_force_and_energy(r);
-    results->force_energy_values[6] = result;
+    auto result = lj.calc_force_and_energy(r);
+    results->energy_values[6] = result.energy;
+    results->force_values[6] = result.force;
 
     real r_mag = sqrt(r.squared_norm2());
     Vec3<real> normalized_r = r.scale(1.0 / r_mag);
-    real force_dot_r = result.x * normalized_r.x + result.y * normalized_r.y +
-                       result.z * normalized_r.z;
+    real force_dot_r = result.force.x * normalized_r.x +
+                       result.force.y * normalized_r.y +
+                       result.force.z * normalized_r.z;
 
     results->arbitrary_direction_pass =
-        (force_dot_r < 0.0) && (fabs(result.x - result.y) < tolerance) &&
-        (fabs(result.y - result.z) < tolerance);
+        (force_dot_r < 0.0) &&
+        (fabs(result.force.x - result.force.y) < tolerance) &&
+        (fabs(result.force.y - result.force.z) < tolerance);
   }
 
   // Parameter Variation Test
@@ -126,31 +135,34 @@ __global__ void lennard_jones_test_kernel(TestResults *results) {
     LennardJones lj2(new_sigma, new_epsilon, new_r_cutoff);
 
     Vec3<real> r = {2.0, 0.0, 0.0};
-    float4 result1 = lj.calc_force_and_energy(r);
-    float4 result2 = lj2.calc_force_and_energy(r);
+    auto result1 = lj.calc_force_and_energy(r);
+    auto result2 = lj2.calc_force_and_energy(r);
 
-    results->force_energy_values[7] = result2;
+    results->energy_values[7] = result2.energy;
+    results->force_values[7] = result2.force;
 
-    results->parameter_variation_pass =
-        (result1.w != result2.w) && (result1.x != result2.x);
+    results->parameter_variation_pass = (result1.energy != result2.energy) &&
+                                        (result1.force.x != result2.force.x);
   }
 
   // Exact Value Check Test
   {
     LennardJones lj_exact(1.0, 1.0, 3.0);
     Vec3<real> r = {1.5, 0.0, 0.0};
-    float4 result = lj_exact.calc_force_and_energy(r);
+    auto result = lj_exact.calc_force_and_energy(r);
 
-    results->force_energy_values[8] = result;
+    results->energy_values[8] = result.energy;
+    results->force_values[8] = result.force;
 
     real expected_energy = 4.0 * (pow(1.0 / 1.5, 12) - pow(1.0 / 1.5, 6));
     real expected_force =
         24.0 * (pow(1.0 / 1.5, 6) - 2.0 * pow(1.0 / 1.5, 12)) / 1.5;
 
     results->exact_value_check_pass =
-        (fabs(result.w - expected_energy) < tolerance) &&
-        (fabs(result.x + expected_force) < tolerance) &&
-        (fabs(result.y) < tolerance) && (fabs(result.z) < tolerance);
+        (fabs(result.energy - expected_energy) < tolerance) &&
+        (fabs(result.force.x + expected_force) < tolerance) &&
+        (fabs(result.force.y) < tolerance) &&
+        (fabs(result.force.z) < tolerance);
   }
 
   // Near Cutoff Test
@@ -161,18 +173,16 @@ __global__ void lennard_jones_test_kernel(TestResults *results) {
     Vec3<real> r_inside = {inside_cutoff, 0.0, 0.0};
     Vec3<real> r_outside = {outside_cutoff, 0.0, 0.0};
 
-    float4 result_inside = lj.calc_force_and_energy(r_inside);
-    float4 result_outside = lj.calc_force_and_energy(r_outside);
+    auto result_inside = lj.calc_force_and_energy(r_inside);
+    auto result_outside = lj.calc_force_and_energy(r_outside);
 
-    results->force_energy_values[9] = result_inside;
+    results->energy_values[9] = result_inside.energy;
+    results->force_values[9] = result_inside.force;
 
     results->near_cutoff_pass =
-        (result_inside.w != 0.0) && (result_inside.x != 0.0) &&
-        (result_outside.w == 0.0) &&
-        vec3_near(
-            Vec3<real>{0.0, 0.0, 0.0},
-            Vec3<real>{result_outside.x, result_outside.y, result_outside.z},
-            tolerance);
+        (result_inside.energy != 0.0) && (result_inside.force.x != 0.0) &&
+        (result_outside.energy == 0.0) &&
+        vec3_near(Vec3<real>{0.0, 0.0, 0.0}, result_outside.force, tolerance);
   }
 }
 
@@ -240,48 +250,44 @@ TEST_F(LennardJonesCudaTest, DeviceZeroDistance) {
   auto results = runDeviceTests();
   EXPECT_TRUE(results.zero_distance_pass)
       << "Zero distance test failed on device. Energy: "
-      << results.force_energy_values[0].w << ", Force: ("
-      << results.force_energy_values[0].x << ", "
-      << results.force_energy_values[0].y << ", "
-      << results.force_energy_values[0].z << ")";
+      << results.energy_values[0] << ", Force: (" << results.force_values[0].x
+      << ", " << results.force_values[0].y << ", " << results.force_values[0].z
+      << ")";
 }
 
 TEST_F(LennardJonesCudaTest, DeviceBeyondCutoff) {
   auto results = runDeviceTests();
   EXPECT_TRUE(results.beyond_cutoff_pass)
       << "Beyond cutoff test failed on device. Energy: "
-      << results.force_energy_values[1].w;
+      << results.energy_values[1];
 }
 
 TEST_F(LennardJonesCudaTest, DeviceAtMinimum) {
   auto results = runDeviceTests();
   EXPECT_TRUE(results.at_minimum_pass)
       << "At minimum test failed on device. Energy: "
-      << results.force_energy_values[2].w;
+      << results.energy_values[2];
 }
 
 TEST_F(LennardJonesCudaTest, DeviceAtEquilibrium) {
   auto results = runDeviceTests();
   EXPECT_TRUE(results.at_equilibrium_pass)
       << "At equilibrium test failed on device. Energy: "
-      << results.force_energy_values[3].w
-      << ", Force x: " << results.force_energy_values[3].x;
+      << results.energy_values[3] << ", Force x: " << results.force_values[3].x;
 }
 
 TEST_F(LennardJonesCudaTest, DeviceRepulsiveRegion) {
   auto results = runDeviceTests();
   EXPECT_TRUE(results.repulsive_region_pass)
       << "Repulsive region test failed on device. Energy: "
-      << results.force_energy_values[4].w
-      << ", Force x: " << results.force_energy_values[4].x;
+      << results.energy_values[4] << ", Force x: " << results.force_values[4].x;
 }
 
 TEST_F(LennardJonesCudaTest, DeviceAttractiveRegion) {
   auto results = runDeviceTests();
   EXPECT_TRUE(results.attractive_region_pass)
       << "Attractive region test failed on device. Energy: "
-      << results.force_energy_values[5].w
-      << ", Force x: " << results.force_energy_values[5].x;
+      << results.energy_values[5] << ", Force x: " << results.force_values[5].x;
 }
 
 TEST_F(LennardJonesCudaTest, DeviceArbitraryDirection) {
@@ -300,13 +306,12 @@ TEST_F(LennardJonesCudaTest, DeviceExactValueCheck) {
   auto results = runDeviceTests();
   EXPECT_TRUE(results.exact_value_check_pass)
       << "Exact value check test failed on device. Energy: "
-      << results.force_energy_values[8].w
-      << ", Force x: " << results.force_energy_values[8].x;
+      << results.energy_values[8] << ", Force x: " << results.force_values[8].x;
 }
 
 TEST_F(LennardJonesCudaTest, DeviceNearCutoff) {
   auto results = runDeviceTests();
   EXPECT_TRUE(results.near_cutoff_pass)
       << "Near cutoff test failed on device. Inside energy: "
-      << results.force_energy_values[9].w;
+      << results.energy_values[9];
 }
diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt
new file mode 100644
index 0000000..c396ab7
--- /dev/null
+++ b/tests/unit_tests/CMakeLists.txt
@@ -0,0 +1,9 @@
+include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR})
+
+add_executable(${NAME}_tests
+    test_potential.cpp
+)
+
+target_link_libraries(${NAME}_tests gtest gtest_main)
+target_link_libraries(${NAME}_tests ${CMAKE_PROJECT_NAME}_cuda_lib)
+add_test(NAME ${NAME}Tests COMMAND ${CMAKE_BINARY_DIR}/tests/unit_tests/${NAME}_tests)
diff --git a/tests/unit_tests/test_example.cpp b/tests/unit_tests/test_example.cpp
new file mode 100644
index 0000000..bde73e6
--- /dev/null
+++ b/tests/unit_tests/test_example.cpp
@@ -0,0 +1,5 @@
+#include "gtest/gtest.h"
+
+TEST(Example, Equals) {
+  EXPECT_EQ(1, 1);
+}
\ No newline at end of file
diff --git a/tests/unit_tests/test_potential.cpp b/tests/unit_tests/test_potential.cpp
new file mode 100644
index 0000000..d6bf23b
--- /dev/null
+++ b/tests/unit_tests/test_potential.cpp
@@ -0,0 +1,174 @@
+#include "potentials/pair_potentials.cuh"
+#include "precision.hpp"
+#include "gtest/gtest.h"
+#include <cmath>
+
+class LennardJonesTest : public ::testing::Test {
+protected:
+  void SetUp() override {
+    // Default parameters
+    sigma = 1.0;
+    epsilon = 1.0;
+    r_cutoff = 2.5;
+
+    // Create default LennardJones object
+    lj = new LennardJones(sigma, epsilon, r_cutoff);
+  }
+
+  void TearDown() override { delete lj; }
+
+  real sigma;
+  real epsilon;
+  real r_cutoff;
+  LennardJones *lj;
+
+  // Helper function to compare Vec3 values with tolerance
+  void expect_vec3_near(const Vec3<real> &expected, const Vec3<real> &actual,
+                        real tolerance) {
+    EXPECT_NEAR(expected.x, actual.x, tolerance);
+    EXPECT_NEAR(expected.y, actual.y, tolerance);
+    EXPECT_NEAR(expected.z, actual.z, tolerance);
+  }
+};
+
+TEST_F(LennardJonesTest, ZeroDistance) {
+  // At zero distance, the calculation should return zero force and energy
+  Vec3<real> r = {0.0, 0.0, 0.0};
+  auto result = lj->calc_force_and_energy(r);
+
+  EXPECT_EQ(0.0, result.energy);
+  expect_vec3_near({0.0, 0.0, 0.0}, result.force, 1e-10);
+}
+
+TEST_F(LennardJonesTest, BeyondCutoff) {
+  // Distance beyond cutoff should return zero force and energy
+  Vec3<real> r = {3.0, 0.0, 0.0}; // 3.0 > r_cutoff (2.5)
+  auto result = lj->calc_force_and_energy(r);
+
+  EXPECT_EQ(0.0, result.energy);
+  expect_vec3_near({0.0, 0.0, 0.0}, result.force, 1e-10);
+}
+
+TEST_F(LennardJonesTest, AtMinimum) {
+  // The LJ potential has a minimum at r = 2^(1/6) * sigma
+  real min_dist = std::pow(2.0, 1.0 / 6.0) * sigma;
+  Vec3<real> r = {min_dist, 0.0, 0.0};
+  auto result = lj->calc_force_and_energy(r);
+
+  // At minimum, force should be close to zero
+  EXPECT_NEAR(-epsilon, result.energy, 1e-10);
+  expect_vec3_near({0.0, 0.0, 0.0}, result.force, 1e-10);
+}
+
+TEST_F(LennardJonesTest, AtEquilibrium) {
+  // At r = sigma, the energy should be zero and force should be repulsive
+  Vec3<real> r = {sigma, 0.0, 0.0};
+  auto result = lj->calc_force_and_energy(r);
+
+  EXPECT_NEAR(0.0, result.energy, 1e-10);
+  EXPECT_GT(result.force.x,
+            0.0); // Force should be repulsive (positive x-direction)
+  EXPECT_NEAR(0.0, result.force.y, 1e-10);
+  EXPECT_NEAR(0.0, result.force.z, 1e-10);
+}
+
+TEST_F(LennardJonesTest, RepulsiveRegion) {
+  // Test in the repulsive region (r < sigma)
+  Vec3<real> r = {0.8 * sigma, 0.0, 0.0};
+  auto result = lj->calc_force_and_energy(r);
+
+  // Energy should be positive and force should be repulsive
+  EXPECT_GT(result.energy, 0.0);
+  EXPECT_GT(result.force.x, 0.0); // Force should be repulsive
+}
+
+TEST_F(LennardJonesTest, AttractiveRegion) {
+  // Test in the attractive region (sigma < r < r_min)
+  Vec3<real> r = {1.5 * sigma, 0.0, 0.0};
+  auto result = lj->calc_force_and_energy(r);
+
+  // Energy should be negative and force should be attractive
+  EXPECT_LT(result.energy, 0.0);
+  EXPECT_LT(result.force.x,
+            0.0); // Force should be attractive (negative x-direction)
+}
+
+TEST_F(LennardJonesTest, ArbitraryDirection) {
+  // Test with a vector in an arbitrary direction
+  Vec3<real> r = {1.0, 1.0, 1.0};
+  auto result = lj->calc_force_and_energy(r);
+
+  // The force should be in the same direction as r but opposite sign
+  // (attractive region)
+  real r_mag = std::sqrt(r.squared_norm2());
+
+  // Calculate expected force direction (should be along -r)
+  Vec3<real> normalized_r = r.scale(1.0 / r_mag);
+  real force_dot_r = result.force.x * normalized_r.x +
+                     result.force.y * normalized_r.y +
+                     result.force.z * normalized_r.z;
+
+  // In this case, we're at r = sqrt(3) * sigma which is in attractive region
+  EXPECT_LT(force_dot_r, 0.0); // Force should be attractive
+
+  // Force should be symmetric in all dimensions for this vector
+  EXPECT_NEAR(result.force.x, result.force.y, 1e-10);
+  EXPECT_NEAR(result.force.y, result.force.z, 1e-10);
+}
+
+TEST_F(LennardJonesTest, ParameterVariation) {
+  // Test with different parameter values
+  real new_sigma = 2.0;
+  real new_epsilon = 0.5;
+  real new_r_cutoff = 5.0;
+
+  LennardJones lj2(new_sigma, new_epsilon, new_r_cutoff);
+
+  Vec3<real> r = {2.0, 0.0, 0.0};
+  auto result1 = lj->calc_force_and_energy(r);
+  auto result2 = lj2.calc_force_and_energy(r);
+
+  // Results should be different with different parameters
+  EXPECT_NE(result1.energy, result2.energy);
+  EXPECT_NE(result1.force.x, result2.force.x);
+}
+
+TEST_F(LennardJonesTest, ExactValueCheck) {
+  // Test with pre-calculated values for a specific case
+  LennardJones lj_exact(1.0, 1.0, 3.0);
+  Vec3<real> r = {1.5, 0.0, 0.0};
+  auto result = lj_exact.calc_force_and_energy(r);
+
+  // Pre-calculated values (you may need to adjust these based on your specific
+  // implementation)
+  real expected_energy =
+      4.0 * (std::pow(1.0 / 1.5, 12) - std::pow(1.0 / 1.5, 6));
+  real expected_force =
+      24.0 * (std::pow(1.0 / 1.5, 6) - 2.0 * std::pow(1.0 / 1.5, 12)) / 1.5;
+
+  EXPECT_NEAR(expected_energy, result.energy, 1e-10);
+  EXPECT_NEAR(-expected_force, result.force.x,
+              1e-10); // Negative because force is attractive
+  EXPECT_NEAR(0.0, result.force.y, 1e-10);
+  EXPECT_NEAR(0.0, result.force.z, 1e-10);
+}
+
+TEST_F(LennardJonesTest, NearCutoff) {
+  // Test behavior just inside and just outside the cutoff
+  real inside_cutoff = r_cutoff - 0.01;
+  real outside_cutoff = r_cutoff + 0.01;
+
+  Vec3<real> r_inside = {inside_cutoff, 0.0, 0.0};
+  Vec3<real> r_outside = {outside_cutoff, 0.0, 0.0};
+
+  auto result_inside = lj->calc_force_and_energy(r_inside);
+  auto result_outside = lj->calc_force_and_energy(r_outside);
+
+  // Inside should have non-zero values
+  EXPECT_NE(0.0, result_inside.energy);
+  EXPECT_NE(0.0, result_inside.force.x);
+
+  // Outside should be zero
+  EXPECT_EQ(0.0, result_outside.energy);
+  expect_vec3_near({0.0, 0.0, 0.0}, result_outside.force, 1e-10);
+}