Skip to content

Commit 21e0f0e

Browse files
Add cuda 12.8 support (#605)
CUDA 12.8 introduces sm_120 that requires a reduced number of threads per sm. We also need to pass -static-global-template-stub=false when building with 12.8 as we violate CUDA ODR kernel rules Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - https://github.com/jakirkham URL: #605
1 parent 44dafe6 commit 21e0f0e

File tree

2 files changed

+9
-3
lines changed

2 files changed

+9
-3
lines changed

cpp/cmake/modules/ConfigureCUDA.cmake

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# =============================================================================
2-
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
2+
# Copyright (c) 2018-2025, NVIDIA CORPORATION.
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
55
# in compliance with the License. You may obtain a copy of the License at
@@ -35,6 +35,11 @@ if(CMAKE_COMPILER_IS_GNUCXX)
3535
endif()
3636
endif()
3737

38+
# Allow invalid CUDA kernels in the short term
39+
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8.0)
40+
list(APPEND CUVS_CUDA_FLAGS -static-global-template-stub=false)
41+
endif()
42+
3843
if(CUDA_LOG_COMPILE_TIME)
3944
list(APPEND CUVS_CUDA_FLAGS "--time=nvcc_compile_log.csv")
4045
endif()

cpp/src/neighbors/detail/nn_descent.cuh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -706,7 +706,8 @@ __device__ __forceinline__ void remove_duplicates(
706706
template <typename Index_t, typename ID_t = InternalID_t<Index_t>>
707707
RAFT_KERNEL
708708
#ifdef __CUDA_ARCH__
709-
#if (__CUDA_ARCH__) == 750 || ((__CUDA_ARCH__) >= 860 && (__CUDA_ARCH__) <= 890)
709+
#if (__CUDA_ARCH__) == 750 || ((__CUDA_ARCH__) >= 860 && (__CUDA_ARCH__) <= 890) || \
710+
(__CUDA_ARCH__) == 1200
710711
__launch_bounds__(BLOCK_SIZE)
711712
#else
712713
__launch_bounds__(BLOCK_SIZE, 4)

0 commit comments

Comments
 (0)