Add CUDA support

This commit is contained in:
Alex Selimov 2025-04-16 18:00:55 -04:00
parent fbc34a0bdd
commit 80d3b6276e
7 changed files with 223 additions and 10 deletions

View file

@ -10,4 +10,16 @@ if(NOT EXISTS ${GOOGLETEST_DIR})
endif()
add_subdirectory(lib/googletest)
add_subdirectory(unit_tests)
add_subdirectory(unit_tests)
# Only run Cuda tests if cuda is available
if (CMAKE_CUDA_COMPILER)
set(CMAKE_CUDA_ARCHITECTURES 61)
set(CUDA_SEPARABLE_COMPILATION ON)
add_subdirectory(cuda_unit_tests)
message(STATUS "CUDA found. CUDA tests will be build")
else()
message(STATUS "CUDA not found. Skipping CUDA tests")
endif()

View file

@ -0,0 +1,10 @@
include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR})
add_executable(${NAME}_cuda_tests
vec3_test.cu
)
target_link_libraries(${NAME}_cuda_tests gtest gtest_main)
target_link_libraries(${NAME}_cuda_tests ${NAME})
add_test(NAME Vec3CudaTests COMMAND ${CMAKE_BINARY_DIR}/tests/cuda_unit_tests/${NAME}_cuda_tests)

View file

@ -0,0 +1,177 @@
#include "vec3.h"
#include <cuda_runtime.h>
#include <gtest/gtest.h>
// Define kernel function to test Vec3 operations
template <typename T>
__global__ void testVec3Operations(Vec3<T> *results, Vec3<T> a, Vec3<T> b,
T scalar) {
int idx = threadIdx.x;
// Test different operations based on thread index
switch (idx) {
case 0: // Addition
results[idx] = a + b;
break;
case 1: // Subtraction
results[idx] = a - b;
break;
case 2: // Scale
results[idx] = a.scale(scalar);
break;
case 3: // Dot product - store in x component
results[idx].x = a.dot(b);
results[idx].y = 0;
results[idx].z = 0;
break;
case 4: // Cross product
results[idx] = a.cross(b);
break;
case 5: // Squared norm - store in x component
results[idx].x = a.squared_norm2();
results[idx].y = 0;
results[idx].z = 0;
break;
case 6: // Norm - store in x component
results[idx].x = a.norm2();
results[idx].y = 0;
results[idx].z = 0;
break;
case 7: // Normalized
results[idx] = a.normalized();
break;
}
}
// Test fixture for Vec3 CUDA tests
class Vec3CudaTest : public ::testing::Test {
protected:
void SetUp() override {
// Allocate device memory for results
cudaMalloc(&d_results, NUM_TESTS * sizeof(Vec3<float>));
}
void TearDown() override {
// Free device memory
cudaFree(d_results);
}
// Number of operations to test
static const int NUM_TESTS = 8;
// Pointer to device memory for results
Vec3<float> *d_results;
// Host memory for results
Vec3<float> h_results[NUM_TESTS];
// Test with a reasonable epsilon for floating point comparisons
float epsilon = 1e-5f;
};
TEST_F(Vec3CudaTest, BasicOperations) {
// Define test vectors
Vec3<float> a{1.0f, 2.0f, 3.0f};
Vec3<float> b{4.0f, 5.0f, 6.0f};
float scalar = 2.0f;
// Launch kernel with 8 threads to test different operations
testVec3Operations<<<1, NUM_TESTS>>>(d_results, a, b, scalar);
// Check for kernel execution errors
cudaError_t cudaStatus = cudaGetLastError();
ASSERT_EQ(cudaStatus, cudaSuccess)
<< "Kernel launch failed: " << cudaGetErrorString(cudaStatus);
// Copy results back to host
cudaStatus = cudaMemcpy(h_results, d_results, NUM_TESTS * sizeof(Vec3<float>),
cudaMemcpyDeviceToHost);
ASSERT_EQ(cudaStatus, cudaSuccess)
<< "cudaMemcpy failed: " << cudaGetErrorString(cudaStatus);
// Wait for GPU to finish
cudaStatus = cudaDeviceSynchronize();
ASSERT_EQ(cudaStatus, cudaSuccess)
<< "cudaDeviceSynchronize failed: " << cudaGetErrorString(cudaStatus);
// Test addition
EXPECT_NEAR(h_results[0].x, 5.0f, epsilon);
EXPECT_NEAR(h_results[0].y, 7.0f, epsilon);
EXPECT_NEAR(h_results[0].z, 9.0f, epsilon);
// Test subtraction
EXPECT_NEAR(h_results[1].x, -3.0f, epsilon);
EXPECT_NEAR(h_results[1].y, -3.0f, epsilon);
EXPECT_NEAR(h_results[1].z, -3.0f, epsilon);
// Test scale
EXPECT_NEAR(h_results[2].x, 2.0f, epsilon);
EXPECT_NEAR(h_results[2].y, 4.0f, epsilon);
EXPECT_NEAR(h_results[2].z, 6.0f, epsilon);
// Test dot product
EXPECT_NEAR(h_results[3].x, 32.0f, epsilon);
// Test cross product
EXPECT_NEAR(h_results[4].x, -3.0f, epsilon);
EXPECT_NEAR(h_results[4].y, 6.0f, epsilon);
EXPECT_NEAR(h_results[4].z, -3.0f, epsilon);
// Test squared norm
EXPECT_NEAR(h_results[5].x, 14.0f, epsilon);
// Test norm
EXPECT_NEAR(h_results[6].x, std::sqrt(14.0f), epsilon);
// Test normalized
float norm = std::sqrt(14.0f);
EXPECT_NEAR(h_results[7].x, 1.0f / norm, epsilon);
EXPECT_NEAR(h_results[7].y, 2.0f / norm, epsilon);
EXPECT_NEAR(h_results[7].z, 3.0f / norm, epsilon);
}
TEST_F(Vec3CudaTest, EdgeCases) {
// Test with zero vector
Vec3<float> zero{0.0f, 0.0f, 0.0f};
Vec3<float> nonZero{1.0f, 2.0f, 3.0f};
float scalar = 5.0f;
// Launch kernel with 8 threads to test different operations
testVec3Operations<<<1, NUM_TESTS>>>(d_results, zero, nonZero, scalar);
// Check for kernel execution errors
cudaError_t cudaStatus = cudaGetLastError();
ASSERT_EQ(cudaStatus, cudaSuccess)
<< "Kernel launch failed: " << cudaGetErrorString(cudaStatus);
// Copy results back to host
cudaStatus = cudaMemcpy(h_results, d_results, NUM_TESTS * sizeof(Vec3<float>),
cudaMemcpyDeviceToHost);
ASSERT_EQ(cudaStatus, cudaSuccess)
<< "cudaMemcpy failed: " << cudaGetErrorString(cudaStatus);
// Wait for GPU to finish
cudaStatus = cudaDeviceSynchronize();
ASSERT_EQ(cudaStatus, cudaSuccess)
<< "cudaDeviceSynchronize failed: " << cudaGetErrorString(cudaStatus);
// Test normalized with zero vector (should handle epsilon)
// Normalized of zero vector should be very small but not NaN
EXPECT_FALSE(isnan(h_results[7].x));
EXPECT_FALSE(isnan(h_results[7].y));
EXPECT_FALSE(isnan(h_results[7].z));
// Test dot product with zero vector (should be zero)
EXPECT_NEAR(h_results[3].x, 0.0f, epsilon);
// Test cross product with zero vector (should be zero)
EXPECT_NEAR(h_results[4].x, 0.0f, epsilon);
EXPECT_NEAR(h_results[4].y, 0.0f, epsilon);
EXPECT_NEAR(h_results[4].z, 0.0f, epsilon);
}
// Main function to run all tests
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View file

@ -5,6 +5,6 @@ add_executable(${NAME}_tests
)
target_link_libraries(${NAME}_tests gtest gtest_main)
target_link_libraries(${NAME_tests} ${NAME})
target_link_libraries(${NAME}_tests ${NAME})
add_test(NAME Vec3Tests COMMAND ${CMAKE_BINARY_DIR}/tests/unit_tests/${NAME}_tests)