Add a kernel_config to calculate blocks and threads for launching kernels
Some checks failed
Build and Test / build-and-test (push) Failing after 5m3s

This commit is contained in:
Alex Selimov 2025-09-12 22:47:21 -04:00
parent 130b613a7c
commit 8ba5714648
Signed by: aselimov
GPG key ID: 3DDB9C3E023F1F31
5 changed files with 169 additions and 9 deletions

View file

@ -1,5 +1,6 @@
#ifndef FORCES_CUH
#define FORCES_CUH
#include "kernel_config.cuh"
#include "potentials/pair_potentials.cuh"
#include "precision.hpp"
#include <cstdio>
@ -18,7 +19,7 @@ __global__ void calc_forces_and_energies(float4 *pos, float4 *force_energies,
int n_particles, real *box_len,
PotentialType potential) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
int i = get_thread_id();
if (i < n_particles) {
float4 my_pos = pos[i]; // Loads 16 bytes in one transaction
@ -54,7 +55,7 @@ __global__ void calc_forces_and_energies(float4 *pos, float4 *force_energies,
inline void launch_force_kernels(float4 *xs, float4 *force_energies,
int n_particles, real *box_len,
std::vector<PairPotentials> potentials,
int grid_size, int block_size) {
dim3 blocks, dim3 threads_per_block) {
reset_forces_and_energies(n_particles, force_energies);
@ -62,8 +63,9 @@ inline void launch_force_kernels(float4 *xs, float4 *force_energies,
std::visit(
[&](const auto &potential) {
using PotentialType = std::decay_t<decltype(potential)>;
calc_forces_and_energies<PotentialType><<<grid_size, block_size>>>(
xs, force_energies, n_particles, box_len, potential);
calc_forces_and_energies<PotentialType>
<<<blocks, threads_per_block>>>(xs, force_energies, n_particles,
box_len, potential);
},
potential);
cudaDeviceSynchronize();