#include "neighbor_list.cuh" /** * Step 1: Assign particles to cells */ __global__ void assign_particles_to_cells_kernel(const float4 *positions, int *particle_cells, const CellList cell_list, size_t n_particles) { size_t i = get_thread_id(); if (i >= n_particles) return; float4 pos = positions[i]; float3 pos3 = make_float3(pos.x, pos.y, pos.z); int3 cell_coords = cell_list.get_cell_coords(pos3); // Clamp to valid range (handle edge cases) cell_coords.x = max(0, min(cell_coords.x, cell_list.grid_size.x - 1)); cell_coords.y = max(0, min(cell_coords.y, cell_list.grid_size.y - 1)); cell_coords.z = max(0, min(cell_coords.z, cell_list.grid_size.z - 1)); particle_cells[i] = cell_list.get_cell_index(cell_coords.x, cell_coords.y, cell_coords.z); } /** * Step 2: Find cell boundaries after sorting */ __global__ void find_cell_boundaries_kernel(const int *sorted_particle_cells, int *cell_starts, int *cell_ends, size_t n_particles, size_t total_cells) { size_t i = get_thread_id(); if (i >= n_particles) return; int cell = sorted_particle_cells[i]; // Check if this is the start of a new cell if (i == 0 || sorted_particle_cells[i - 1] != cell) { cell_starts[cell] = i; } // Check if this is the end of a cell if (i == n_particles - 1 || sorted_particle_cells[i + 1] != cell) { cell_ends[cell] = i + 1; } } /** * Step 3: Build actual neighbor lists using cell lists */ __global__ void build_neighbor_lists_kernel(const float4 *positions, const CellList cell_list, NeighborList neighbor_list, float cutoff_squared, const float3 *box_lengths, size_t n_particles) { size_t i = get_thread_id(); if (i >= n_particles) return; float4 pos_i = positions[i]; float3 my_pos = make_float3(pos_i.x, pos_i.y, pos_i.z); int3 my_cell = cell_list.get_cell_coords(my_pos); int neighbor_count = 0; int max_neighbors = neighbor_list.max_neighbors; int *my_neighbors = neighbor_list.get_neighbors(i); // Search neighboring cells (3x3x3 = 27 cells including self) for (int dz = -1; dz <= 1; dz++) { for (int dy = -1; dy <= 1; dy++) { for (int dx = -1; dx <= 1; dx++) { int3 neighbor_cell = make_int3(my_cell.x + dx, my_cell.y + dy, my_cell.z + dz); // Check bounds if (neighbor_cell.x < 0 || neighbor_cell.x >= cell_list.grid_size.x || neighbor_cell.y < 0 || neighbor_cell.y >= cell_list.grid_size.y || neighbor_cell.z < 0 || neighbor_cell.z >= cell_list.grid_size.z) { continue; } int cell_idx = cell_list.get_cell_index( neighbor_cell.x, neighbor_cell.y, neighbor_cell.z); int start = cell_list.cell_starts[cell_idx]; int end = cell_list.cell_ends[cell_idx]; // Check all particles in this cell for (int idx = start; idx < end; idx++) { int j = cell_list.sorted_particles[idx]; if (i >= j) continue; // Avoid double counting and self-interaction float4 pos_j = positions[j]; float3 other_pos = make_float3(pos_j.x, pos_j.y, pos_j.z); // Calculate distance with periodic boundary conditions float dx = my_pos.x - other_pos.x; float dy = my_pos.y - other_pos.y; float dz = my_pos.z - other_pos.z; // Apply PBC dx -= box_lengths->x * roundf(dx / box_lengths->x); dy -= box_lengths->y * roundf(dy / box_lengths->y); dz -= box_lengths->z * roundf(dz / box_lengths->z); float r_squared = dx * dx + dy * dy + dz * dz; if (r_squared <= cutoff_squared && neighbor_count < max_neighbors) { my_neighbors[neighbor_count] = j; neighbor_count++; } } } } } neighbor_list.neighbor_counts[i] = neighbor_count; } // ============================================================================= // HOST FUNCTIONS // ============================================================================= void build_cell_list(const float4 *positions, CellList &cell_list, const float3 *box_lengths) { // Step 1: Assign particles to cells auto config = get_launch_config(cell_list.n_particles); assign_particles_to_cells_kernel<<>>( positions, cell_list.particle_cells, cell_list, cell_list.n_particles); // Step 2: Sort particles by cell index using Thrust thrust::device_ptr particle_cells_ptr(cell_list.particle_cells); thrust::device_ptr sorted_particles_ptr(cell_list.sorted_particles); // Initialize particle indices 0, 1, 2, ... thrust::sequence(sorted_particles_ptr, sorted_particles_ptr + cell_list.n_particles); // Sort particle indices by their cell assignments thrust::sort_by_key(particle_cells_ptr, particle_cells_ptr + cell_list.n_particles, sorted_particles_ptr); // Step 3: Initialize cell boundaries to -1 cudaMemset(cell_list.cell_starts, -1, cell_list.total_cells * sizeof(int)); cudaMemset(cell_list.cell_ends, -1, cell_list.total_cells * sizeof(int)); // Step 4: Find cell boundaries config = get_launch_config(cell_list.n_particles); find_cell_boundaries_kernel<<>>( cell_list.particle_cells, cell_list.cell_starts, cell_list.cell_ends, cell_list.n_particles, cell_list.total_cells); } void build_neighbor_list(const float4 *positions, const CellList &cell_list, NeighborList &neighbor_list, float cutoff_squared, const float3 *box_lengths) { // Initialize neighbor counts to 0 cudaMemset(neighbor_list.neighbor_counts, 0, neighbor_list.n_particles * sizeof(int)); auto config = get_launch_config(neighbor_list.n_particles); build_neighbor_lists_kernel<<>>( positions, cell_list, neighbor_list, cutoff_squared, box_lengths, neighbor_list.n_particles); } void build_neighbor_list_optimized(const float4 *positions, size_t n_particles, const float3 *box_lengths, float cutoff, NeighborList &neighbor_list) { // Get box size for cell list construction float3 box_size; cudaMemcpy(&box_size, box_lengths, sizeof(float3), cudaMemcpyDeviceToHost); // Create cell list CellList cell_list(n_particles, box_size, cutoff); // Build cell list build_cell_list(positions, cell_list, box_lengths); // Build neighbor list using cell list build_neighbor_list(positions, cell_list, neighbor_list, cutoff * cutoff, box_lengths); }