Add untested neighborlisting code
Some checks failed
Build and Test / build-and-test (push) Failing after 5m3s
Some checks failed
Build and Test / build-and-test (push) Failing after 5m3s
This commit is contained in:
parent
c4757df1be
commit
68992c77f5
3 changed files with 328 additions and 0 deletions
132
kernels/neighbor_list.cuh
Normal file
132
kernels/neighbor_list.cuh
Normal file
|
@ -0,0 +1,132 @@
|
|||
#ifndef NEIGHBOR_LIST_CUH
|
||||
#define NEIGHBOR_LIST_CUH
|
||||
|
||||
#include "kernel_config.cuh" // From previous artifact
|
||||
#include <cuda_runtime.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/sequence.h>
|
||||
#include <thrust/sort.h>
|
||||
|
||||
/**
|
||||
* Simple kernel to initialize neighbor offsets
|
||||
*/
|
||||
__global__ void init_neighbor_offsets(int *offsets, size_t n_particles,
|
||||
int max_neighbors) {
|
||||
size_t i = get_thread_id();
|
||||
if (i <= n_particles) { // Note: <= because we need n_particles + 1 elements
|
||||
offsets[i] = i * max_neighbors;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Neighbor list data structure
|
||||
* Uses a compact format similar to CSR sparse matrices
|
||||
*/
|
||||
struct NeighborList {
|
||||
int *neighbor_offsets; // Size: n_particles + 1, offset into neighbor_indices
|
||||
int *neighbor_indices; // Size: total_neighbors, actual neighbor particle IDs
|
||||
int *neighbor_counts; // Size: n_particles, number of neighbors per particle
|
||||
size_t max_neighbors; // Maximum neighbors allocated per particle
|
||||
size_t n_particles;
|
||||
|
||||
// Constructor
|
||||
NeighborList(size_t n_particles, size_t max_neighbors_per_particle)
|
||||
: max_neighbors(max_neighbors_per_particle), n_particles(n_particles) {
|
||||
|
||||
cudaMalloc(&neighbor_offsets, (n_particles + 1) * sizeof(int));
|
||||
cudaMalloc(&neighbor_indices, n_particles * max_neighbors * sizeof(int));
|
||||
cudaMalloc(&neighbor_counts, n_particles * sizeof(int));
|
||||
|
||||
// Initialize offsets
|
||||
auto kernel_config = get_launch_config(n_particles);
|
||||
init_neighbor_offsets<<<kernel_config.blocks, kernel_config.threads>>>(
|
||||
neighbor_offsets, n_particles, max_neighbors);
|
||||
}
|
||||
|
||||
~NeighborList() {
|
||||
cudaFree(neighbor_offsets);
|
||||
cudaFree(neighbor_indices);
|
||||
cudaFree(neighbor_counts);
|
||||
}
|
||||
|
||||
// Get neighbors for particle i (device function)
|
||||
__device__ int *get_neighbors(int i) const {
|
||||
return &neighbor_indices[neighbor_offsets[i]];
|
||||
}
|
||||
|
||||
__device__ int get_neighbor_count(int i) const { return neighbor_counts[i]; }
|
||||
};
|
||||
|
||||
/**
|
||||
* Cell list structure for spatial hashing
|
||||
*/
|
||||
struct CellList {
|
||||
int *cell_starts; // Size: total_cells, start index in sorted_particles
|
||||
int *cell_ends; // Size: total_cells, end index in sorted_particles
|
||||
int *sorted_particles; // Size: n_particles, particle IDs sorted by cell
|
||||
int *particle_cells; // Size: n_particles, which cell each particle belongs to
|
||||
|
||||
int3 grid_size; // Number of cells in each dimension
|
||||
float3 cell_size; // Size of each cell
|
||||
float3 box_min; // Minimum corner of simulation box
|
||||
size_t total_cells;
|
||||
size_t n_particles;
|
||||
|
||||
CellList(size_t n_particles, float3 box_size, float cutoff)
|
||||
: n_particles(n_particles) {
|
||||
|
||||
// Calculate grid dimensions (cell size slightly larger than cutoff)
|
||||
float cell_margin = 1.001f; // Small safety margin
|
||||
cell_size = make_float3(cutoff * cell_margin, cutoff * cell_margin,
|
||||
cutoff * cell_margin);
|
||||
|
||||
grid_size.x = (int)ceilf(box_size.x / cell_size.x);
|
||||
grid_size.y = (int)ceilf(box_size.y / cell_size.y);
|
||||
grid_size.z = (int)ceilf(box_size.z / cell_size.z);
|
||||
|
||||
total_cells = (size_t)grid_size.x * grid_size.y * grid_size.z;
|
||||
box_min = make_float3(0.0f, 0.0f, 0.0f); // Assume box starts at origin
|
||||
|
||||
// Allocate memory
|
||||
cudaMalloc(&cell_starts, total_cells * sizeof(int));
|
||||
cudaMalloc(&cell_ends, total_cells * sizeof(int));
|
||||
cudaMalloc(&sorted_particles, n_particles * sizeof(int));
|
||||
cudaMalloc(&particle_cells, n_particles * sizeof(int));
|
||||
}
|
||||
|
||||
~CellList() {
|
||||
cudaFree(cell_starts);
|
||||
cudaFree(cell_ends);
|
||||
cudaFree(sorted_particles);
|
||||
cudaFree(particle_cells);
|
||||
}
|
||||
|
||||
// Get cell index from 3D coordinates
|
||||
__device__ int get_cell_index(int x, int y, int z) const {
|
||||
return z * grid_size.x * grid_size.y + y * grid_size.x + x;
|
||||
}
|
||||
|
||||
// Get cell coordinates from position
|
||||
__device__ int3 get_cell_coords(float3 pos) const {
|
||||
return make_int3((int)((pos.x - box_min.x) / cell_size.x),
|
||||
(int)((pos.y - box_min.y) / cell_size.y),
|
||||
(int)((pos.z - box_min.z) / cell_size.z));
|
||||
}
|
||||
};
|
||||
|
||||
// Forward declarations
|
||||
void build_cell_list(const float4 *positions, CellList &cell_list,
|
||||
const float3 *box_lengths);
|
||||
|
||||
void build_neighbor_list(const float4 *positions, const CellList &cell_list,
|
||||
NeighborList &neighbor_list, float cutoff_squared,
|
||||
const float3 *box_lengths);
|
||||
|
||||
/**
|
||||
* High-level interface - builds complete neighbor list
|
||||
*/
|
||||
void build_neighbor_list_optimized(const float4 *positions, size_t n_particles,
|
||||
const float3 *box_lengths, float cutoff,
|
||||
NeighborList &neighbor_list);
|
||||
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue