#ifndef NEIGHBOR_LIST_CUH #define NEIGHBOR_LIST_CUH #include "kernel_config.cuh" // From previous artifact #include #include #include #include /** * Simple kernel to initialize neighbor offsets */ __global__ void init_neighbor_offsets(int *offsets, size_t n_particles, int max_neighbors) { size_t i = get_thread_id(); if (i <= n_particles) { // Note: <= because we need n_particles + 1 elements offsets[i] = i * max_neighbors; } } /** * Neighbor list data structure * Uses a compact format similar to CSR sparse matrices */ struct NeighborList { int *neighbor_offsets; // Size: n_particles + 1, offset into neighbor_indices int *neighbor_indices; // Size: total_neighbors, actual neighbor particle IDs int *neighbor_counts; // Size: n_particles, number of neighbors per particle size_t max_neighbors; // Maximum neighbors allocated per particle size_t n_particles; // Constructor NeighborList(size_t n_particles, size_t max_neighbors_per_particle) : max_neighbors(max_neighbors_per_particle), n_particles(n_particles) { cudaMalloc(&neighbor_offsets, (n_particles + 1) * sizeof(int)); cudaMalloc(&neighbor_indices, n_particles * max_neighbors * sizeof(int)); cudaMalloc(&neighbor_counts, n_particles * sizeof(int)); // Initialize offsets auto kernel_config = get_launch_config(n_particles); init_neighbor_offsets<<>>( neighbor_offsets, n_particles, max_neighbors); } ~NeighborList() { cudaFree(neighbor_offsets); cudaFree(neighbor_indices); cudaFree(neighbor_counts); } // Get neighbors for particle i (device function) __device__ int *get_neighbors(int i) const { return &neighbor_indices[neighbor_offsets[i]]; } __device__ int get_neighbor_count(int i) const { return neighbor_counts[i]; } }; /** * Cell list structure for spatial hashing */ struct CellList { int *cell_starts; // Size: total_cells, start index in sorted_particles int *cell_ends; // Size: total_cells, end index in sorted_particles int *sorted_particles; // Size: n_particles, particle IDs sorted by cell int *particle_cells; // Size: n_particles, which cell each particle belongs to int3 grid_size; // Number of cells in each dimension float3 cell_size; // Size of each cell float3 box_min; // Minimum corner of simulation box size_t total_cells; size_t n_particles; CellList(size_t n_particles, float3 box_size, float cutoff) : n_particles(n_particles) { // Calculate grid dimensions (cell size slightly larger than cutoff) float cell_margin = 1.001f; // Small safety margin cell_size = make_float3(cutoff * cell_margin, cutoff * cell_margin, cutoff * cell_margin); grid_size.x = (int)ceilf(box_size.x / cell_size.x); grid_size.y = (int)ceilf(box_size.y / cell_size.y); grid_size.z = (int)ceilf(box_size.z / cell_size.z); total_cells = (size_t)grid_size.x * grid_size.y * grid_size.z; box_min = make_float3(0.0f, 0.0f, 0.0f); // Assume box starts at origin // Allocate memory cudaMalloc(&cell_starts, total_cells * sizeof(int)); cudaMalloc(&cell_ends, total_cells * sizeof(int)); cudaMalloc(&sorted_particles, n_particles * sizeof(int)); cudaMalloc(&particle_cells, n_particles * sizeof(int)); } ~CellList() { cudaFree(cell_starts); cudaFree(cell_ends); cudaFree(sorted_particles); cudaFree(particle_cells); } // Get cell index from 3D coordinates __device__ int get_cell_index(int x, int y, int z) const { return z * grid_size.x * grid_size.y + y * grid_size.x + x; } // Get cell coordinates from position __device__ int3 get_cell_coords(float3 pos) const { return make_int3((int)((pos.x - box_min.x) / cell_size.x), (int)((pos.y - box_min.y) / cell_size.y), (int)((pos.z - box_min.z) / cell_size.z)); } }; // Forward declarations void build_cell_list(const float4 *positions, CellList &cell_list, const float3 *box_lengths); void build_neighbor_list(const float4 *positions, const CellList &cell_list, NeighborList &neighbor_list, float cutoff_squared, const float3 *box_lengths); /** * High-level interface - builds complete neighbor list */ void build_neighbor_list_optimized(const float4 *positions, size_t n_particles, const float3 *box_lengths, float cutoff, NeighborList &neighbor_list); #endif