Update params for KernelConfig and add basic tests for getThreadId
Some checks are pending
Build and Test / build-and-test (push) Waiting to run

This commit is contained in:
Alex Selimov 2025-09-18 23:47:40 -04:00
parent 9825c0d14d
commit 8dec472929
Signed by: aselimov
GPG key ID: 3DDB9C3E023F1F31
5 changed files with 63 additions and 12 deletions

View file

@ -13,12 +13,12 @@ void KernelConfig::print() const {
total_threads());
}
KernelConfig get_launch_config(size_t n_elements, int threads_per_block,
int max_blocks_per_dim) {
KernelConfig get_launch_config(size_t n_elements, size_t threads_per_block,
size_t max_blocks_per_dim) {
// Ensure threads_per_block is valid
threads_per_block = std::min(threads_per_block, 1024);
threads_per_block = std::max(threads_per_block, 32);
threads_per_block = std::min(threads_per_block, (size_t)1024);
threads_per_block = std::max(threads_per_block, (size_t)32);
// Calculate total blocks needed
size_t total_blocks =
@ -66,8 +66,8 @@ KernelConfig get_launch_config_advanced(size_t n_elements, int device_id) {
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, device_id);
int threads_per_block = get_optimal_block_size(device_id);
int max_blocks_per_dim = prop.maxGridSize[0];
size_t threads_per_block = get_optimal_block_size(device_id);
size_t max_blocks_per_dim = prop.maxGridSize[0];
return get_launch_config(n_elements, threads_per_block, max_blocks_per_dim);
}