cudaCAC/kernels/forces.cuh
Alex Selimov c4757df1be
Some checks failed
Build and Test / build-and-test (push) Failing after 5m3s
Add newton's third law
2025-09-13 20:29:05 -04:00

83 lines
2.6 KiB
Text

#ifndef FORCES_CUH
#define FORCES_CUH
#include "kernel_config.cuh"
#include "potentials/pair_potentials.cuh"
#include "precision.hpp"
#include <cstdio>
#include <cuda_runtime.h>
#include <vector>
namespace CAC {
inline void reset_forces_and_energies(int n_particles,
float4 *forces_energies) {
cudaMemset(forces_energies, 0, n_particles * sizeof(float4));
}
template <typename PotentialType>
__global__ void calc_forces_and_energies(float4 *pos, float4 *force_energies,
int n_particles, real *box_len,
PotentialType potential) {
int i = get_thread_id();
if (i < n_particles) {
float4 my_pos = pos[i]; // Loads 16 bytes in one transaction
real xi = my_pos.x;
real yi = my_pos.y;
real zi = my_pos.z;
real total_fx = 0, total_fy = 0, total_fz = 0, total_energy = 0;
for (int j = 0; j < n_particles; j++) {
if (i < j) {
float4 other_pos = pos[j];
real dx = xi - other_pos.x;
real dy = yi - other_pos.y;
real dz = zi - other_pos.z;
// Apply periodic boundary conditions
dx -= box_len[0] * round(dx / box_len[0]);
dy -= box_len[1] * round(dy / box_len[1]);
dz -= box_len[2] * round(dz / box_len[2]);
float4 sol = potential.calc_force_and_energy({dx, dy, dz});
total_fx += sol.x;
total_fy += sol.y;
total_fz += sol.z;
total_energy += sol.w;
atomicAdd(&force_energies[j].x, -sol.x);
atomicAdd(&force_energies[j].y, -sol.y);
atomicAdd(&force_energies[j].z, -sol.z);
atomicAdd(&force_energies[j].w, sol.w);
}
}
atomicAdd(&force_energies[i].x, total_fx);
atomicAdd(&force_energies[i].y, total_fy);
atomicAdd(&force_energies[i].z, total_fz);
atomicAdd(&force_energies[i].w, total_energy);
}
}
inline void launch_force_kernels(float4 *xs, float4 *force_energies,
int n_particles, real *box_len,
std::vector<PairPotentials> potentials,
dim3 blocks, dim3 threads_per_block) {
reset_forces_and_energies(n_particles, force_energies);
for (const auto &potential : potentials) {
std::visit(
[&](const auto &potential) {
using PotentialType = std::decay_t<decltype(potential)>;
calc_forces_and_energies<PotentialType>
<<<blocks, threads_per_block>>>(xs, force_energies, n_particles,
box_len, potential);
},
potential);
cudaDeviceSynchronize();
}
}
} // namespace CAC
#endif