diff --git a/kernels/forces.cuh b/kernels/forces.cuh index e4f52f1..c2a475d 100644 --- a/kernels/forces.cuh +++ b/kernels/forces.cuh @@ -30,7 +30,7 @@ __global__ void calc_forces_and_energies(float4 *pos, float4 *force_energies, real total_fx = 0, total_fy = 0, total_fz = 0, total_energy = 0; for (int j = 0; j < n_particles; j++) { - if (i != j) { + if (i < j) { float4 other_pos = pos[j]; real dx = xi - other_pos.x; real dy = yi - other_pos.y; @@ -46,10 +46,18 @@ __global__ void calc_forces_and_energies(float4 *pos, float4 *force_energies, total_fy += sol.y; total_fz += sol.z; total_energy += sol.w; + + atomicAdd(&force_energies[j].x, -sol.x); + atomicAdd(&force_energies[j].y, -sol.y); + atomicAdd(&force_energies[j].z, -sol.z); + atomicAdd(&force_energies[j].w, sol.w); } } - force_energies[i] = make_float4(total_fx, total_fy, total_fz, total_energy); + atomicAdd(&force_energies[i].x, total_fx); + atomicAdd(&force_energies[i].y, total_fy); + atomicAdd(&force_energies[i].z, total_fz); + atomicAdd(&force_energies[i].w, total_energy); } } inline void launch_force_kernels(float4 *xs, float4 *force_energies,