2025-08-27 22:07:47 -04:00
|
|
|
#ifndef FORCES_CUH
|
|
|
|
#define FORCES_CUH
|
2025-09-10 22:47:54 -04:00
|
|
|
#include "potentials/pair_potentials.cuh"
|
2025-08-27 22:07:47 -04:00
|
|
|
#include "precision.hpp"
|
2025-09-10 22:47:54 -04:00
|
|
|
#include <cstdio>
|
2025-09-12 21:44:41 -04:00
|
|
|
#include <cuda_runtime.h>
|
2025-09-10 22:47:54 -04:00
|
|
|
#include <vector>
|
|
|
|
|
2025-08-27 22:07:47 -04:00
|
|
|
namespace CAC {
|
2025-09-10 22:47:54 -04:00
|
|
|
|
2025-09-12 21:44:41 -04:00
|
|
|
inline void reset_forces_and_energies(int n_particles,
|
|
|
|
float4 *forces_energies) {
|
|
|
|
cudaMemset(forces_energies, 0, n_particles * sizeof(float4));
|
2025-09-10 22:47:54 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename PotentialType>
|
2025-09-12 21:44:41 -04:00
|
|
|
__global__ void calc_forces_and_energies(float4 *pos, float4 *force_energies,
|
2025-09-10 22:47:54 -04:00
|
|
|
int n_particles, real *box_len,
|
|
|
|
PotentialType potential) {
|
2025-09-12 21:44:41 -04:00
|
|
|
|
2025-09-10 22:47:54 -04:00
|
|
|
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
|
|
|
|
if (i < n_particles) {
|
2025-09-12 21:44:41 -04:00
|
|
|
float4 my_pos = pos[i]; // Loads 16 bytes in one transaction
|
|
|
|
real xi = my_pos.x;
|
|
|
|
real yi = my_pos.y;
|
|
|
|
real zi = my_pos.z;
|
|
|
|
|
|
|
|
real total_fx = 0, total_fy = 0, total_fz = 0, total_energy = 0;
|
2025-09-10 22:47:54 -04:00
|
|
|
|
|
|
|
for (int j = 0; j < n_particles; j++) {
|
|
|
|
if (i != j) {
|
2025-09-12 21:44:41 -04:00
|
|
|
float4 other_pos = pos[j];
|
|
|
|
real dx = xi - other_pos.x;
|
|
|
|
real dy = yi - other_pos.y;
|
|
|
|
real dz = zi - other_pos.z;
|
2025-09-10 22:47:54 -04:00
|
|
|
|
|
|
|
// Apply periodic boundary conditions
|
|
|
|
dx -= box_len[0] * round(dx / box_len[0]);
|
|
|
|
dy -= box_len[1] * round(dy / box_len[1]);
|
|
|
|
dz -= box_len[2] * round(dz / box_len[2]);
|
|
|
|
|
2025-09-12 21:44:41 -04:00
|
|
|
float4 sol = potential.calc_force_and_energy({dx, dy, dz});
|
|
|
|
total_fx += sol.x;
|
|
|
|
total_fy += sol.y;
|
|
|
|
total_fz += sol.z;
|
|
|
|
total_energy += sol.w;
|
2025-09-10 22:47:54 -04:00
|
|
|
}
|
|
|
|
}
|
2025-09-12 21:44:41 -04:00
|
|
|
|
|
|
|
force_energies[i] = make_float4(total_fx, total_fy, total_fz, total_energy);
|
2025-09-10 22:47:54 -04:00
|
|
|
}
|
|
|
|
}
|
2025-09-12 21:44:41 -04:00
|
|
|
inline void launch_force_kernels(float4 *xs, float4 *force_energies,
|
2025-09-10 22:47:54 -04:00
|
|
|
int n_particles, real *box_len,
|
|
|
|
std::vector<PairPotentials> potentials,
|
|
|
|
int grid_size, int block_size) {
|
|
|
|
|
2025-09-12 21:44:41 -04:00
|
|
|
reset_forces_and_energies(n_particles, force_energies);
|
2025-09-10 22:47:54 -04:00
|
|
|
|
|
|
|
for (const auto &potential : potentials) {
|
|
|
|
std::visit(
|
|
|
|
[&](const auto &potential) {
|
|
|
|
using PotentialType = std::decay_t<decltype(potential)>;
|
|
|
|
calc_forces_and_energies<PotentialType><<<grid_size, block_size>>>(
|
2025-09-12 21:44:41 -04:00
|
|
|
xs, force_energies, n_particles, box_len, potential);
|
2025-09-10 22:47:54 -04:00
|
|
|
},
|
|
|
|
potential);
|
|
|
|
cudaDeviceSynchronize();
|
|
|
|
}
|
|
|
|
}
|
2025-08-27 22:07:47 -04:00
|
|
|
} // namespace CAC
|
|
|
|
#endif
|