From 6927026f87c80ef23e801925f4ca10c0de6037fe Mon Sep 17 00:00:00 2001 From: Alex Selimov Date: Fri, 26 Sep 2025 19:30:12 -0400 Subject: [PATCH] Finalize cell list code and add tests --- CMakeLists.txt | 6 +- kernels/neighbor_list.cuh | 45 ++++++++------ tests/cuda_unit_tests/test_neighbor_list.cu | 68 ++++++++++++++++++++- 3 files changed, 97 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 618c891..0c411db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,11 +16,11 @@ set(CMAKE_CXX_STANDARD 17) # Add debug configuration if(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0") + set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0 --extended-lambda") elseif(CMAKE_BUILD_TYPE STREQUAL "Release") - set(CMAKE_CUDA_FLAGS_RELEASE "-O3 --use_fast_math") + set(CMAKE_CUDA_FLAGS_RELEASE "-O3 --use_fast_math --extended-lambda") elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") - set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo -O2") + set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo -O2 --extended-lambda") endif() # Set C++ debug/release flags globally diff --git a/kernels/neighbor_list.cuh b/kernels/neighbor_list.cuh index 35f4bb8..9708348 100644 --- a/kernels/neighbor_list.cuh +++ b/kernels/neighbor_list.cuh @@ -53,16 +53,6 @@ struct CellList { cudaFree(particle_cells); } - // Get cell index from 3D coordinates - // TODO; Maybe update this to use Morton Encodings in the future to improve - // locality of particle indices. Unclear how much of a benefit this will add, - // but would be cool to do - __host__ __device__ int - get_cell_index_from_cell_coords(int3 cell_coords) const { - return cell_coords.z * grid_size.x * grid_size.y + - cell_coords.y * grid_size.x + cell_coords.x; - } - std::pair calc_grid_and_cell_size(Box &box, float r_cutoff) const { int3 grid_size = {utils::max((int)(box.xhi - box.xlo) / r_cutoff, 1), @@ -78,17 +68,31 @@ struct CellList { return std::make_pair(grid_size, cell_size); } - __host__ __device__ int3 get_cell_coords_from_position(float3 pos) const { + // Get cell index from 3D coordinates + // TODO; Maybe update this to use Morton Encodings in the future to improve + // locality of particle indices. Unclear how much of a benefit this will add, + // but would be cool to do + __host__ __device__ static int + get_cell_index_from_cell_coords(int3 cell_coords, int3 grid_size) { + return cell_coords.z * grid_size.x * grid_size.y + + cell_coords.y * grid_size.x + cell_coords.x; + } + + __host__ __device__ static int3 + get_cell_coords_from_position(float3 pos, float3 box_min, float3 cell_size) { return make_int3((int)((pos.x - box_min.x) / cell_size.x), (int)((pos.y - box_min.y) / cell_size.y), (int)((pos.z - box_min.z) / cell_size.z)); } - __host__ __device__ int get_cell_index_from_position(float3 pos) const { - return get_cell_index_from_cell_coords(get_cell_coords_from_position(pos)); + __host__ __device__ static int + get_cell_index_from_position(float3 pos, int3 grid_size, float3 box_min, + float3 cell_size) { + return get_cell_index_from_cell_coords( + get_cell_coords_from_position(pos, box_min, cell_size), grid_size); } - void assign_particles_to_cells(float3 *positions) { + void assign_particles_to_cells(float3 *d_positions) { thrust::device_ptr particle_cells_ptr(particle_cells); thrust::device_ptr sorted_particles_ptr(sorted_particles); thrust::device_ptr cell_starts_ptr(cell_starts); @@ -96,9 +100,16 @@ struct CellList { thrust::sequence(sorted_particles_ptr, sorted_particles_ptr + n_particles); - for (size_t i = 0; i < n_particles; i++) { - particle_cells[i] = get_cell_index_from_position(positions[i]); - } + int3 grid_size = this->grid_size; + float3 box_min = this->box_min; + float3 cell_size = this->cell_size; + + thrust::transform(d_positions, d_positions + n_particles, + particle_cells_ptr, + [grid_size, box_min, cell_size] __device__(auto pos) { + return CellList::get_cell_index_from_position( + pos, grid_size, box_min, cell_size); + }); thrust::sort_by_key(particle_cells_ptr, particle_cells_ptr + n_particles, sorted_particles_ptr); diff --git a/tests/cuda_unit_tests/test_neighbor_list.cu b/tests/cuda_unit_tests/test_neighbor_list.cu index ad44eab..3534697 100644 --- a/tests/cuda_unit_tests/test_neighbor_list.cu +++ b/tests/cuda_unit_tests/test_neighbor_list.cu @@ -2,6 +2,8 @@ #include "box.hpp" #include "neighbor_list.cuh" #include "gtest/gtest.h" +#include +#include TEST(CellListTest, Constructor) { // Test case 1: Simple case @@ -27,6 +29,68 @@ TEST(CellListTest, GetCellIndex) { int expected_index = z * cell_list.grid_size.x * cell_list.grid_size.y + y * cell_list.grid_size.x + x; - EXPECT_EQ(cell_list.get_cell_index_from_cell_coords({x, y, z}), - expected_index); + EXPECT_EQ( + CellList::get_cell_index_from_cell_coords({x, y, z}, cell_list.grid_size), + expected_index); +} + +TEST(CellListTest, AssignParticlesToCells) { + Box box(0, 2, 0, 1, 0, 1); // Create a 2x1x1 box + float cutoff = 1.0; + CellList cell_list(4, box, cutoff); // 4 particles + + // Particle positions + thrust::host_vector h_positions(4); + h_positions[0] = make_float3(0.1, 0.1, 0.1); // Cell 0 + h_positions[1] = make_float3(1.1, 0.1, 0.1); // Cell 1 + h_positions[2] = make_float3(0.2, 0.1, 0.1); // Cell 0 + h_positions[3] = make_float3(1.2, 0.1, 0.1); // Cell 1 + + thrust::device_vector d_positions = h_positions; + + cell_list.assign_particles_to_cells( + thrust::raw_pointer_cast(d_positions.data())); + + // Copy results back to host + thrust::host_vector h_particle_cells(cell_list.n_particles); + thrust::host_vector h_sorted_particles(cell_list.n_particles); + thrust::host_vector h_cell_starts(cell_list.total_cells); + thrust::host_vector h_cell_count(cell_list.total_cells); + + cudaMemcpy(h_particle_cells.data(), cell_list.particle_cells, + cell_list.n_particles * sizeof(int), cudaMemcpyDeviceToHost); + cudaMemcpy(h_sorted_particles.data(), cell_list.sorted_particles, + cell_list.n_particles * sizeof(int), cudaMemcpyDeviceToHost); + cudaMemcpy(h_cell_starts.data(), cell_list.cell_starts, + cell_list.total_cells * sizeof(int), cudaMemcpyDeviceToHost); + cudaMemcpy(h_cell_count.data(), cell_list.cell_count, + cell_list.total_cells * sizeof(int), cudaMemcpyDeviceToHost); + + // Expected values + // Particles 0 and 2 are in cell 0, particles 1 and 3 are in cell 1 + // Sorted by cell: (0, 2, 1, 3) + // Cell 0: particles 0, 2 + // Cell 1: particles 1, 3 + + // Verify particle_cells (after sorting by key, this will be sorted) + // Original particle_cells: [0, 1, 0, 1] + // After sort_by_key, particle_cells will be sorted: [0, 0, 1, 1] + thrust::host_vector expected_particle_cells = {0, 0, 1, 1}; + EXPECT_EQ(h_particle_cells, expected_particle_cells); + + // Verify sorted_particles (original indices sorted by cell) + thrust::host_vector expected_sorted_particles = {0, 2, 1, 3}; + EXPECT_EQ(h_sorted_particles, expected_sorted_particles); + + // Verify cell_starts + // Cell 0 starts at index 0 in sorted_particles + // Cell 1 starts at index 2 in sorted_particles + thrust::host_vector expected_cell_starts = {0, 2}; + EXPECT_EQ(h_cell_starts, expected_cell_starts); + + // Verify cell_count + // Cell 0 has 2 particles + // Cell 1 has 2 particles + thrust::host_vector expected_cell_count = {2, 2}; + EXPECT_EQ(h_cell_count, expected_cell_count); }