|
1 | 1 | # |
2 | | -# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 2 | +# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
3 | 3 | # SPDX-License-Identifier: Apache-2.0 |
4 | 4 | # |
5 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); |
@@ -176,43 +176,29 @@ set(CUDA_LIBRARIES ${CUDART_LIB}) |
176 | 176 | if (DEFINED GPU_ARCHS) |
177 | 177 | message(STATUS "GPU_ARCHS defined as ${GPU_ARCHS}. Generating CUDA code for SM ${GPU_ARCHS}") |
178 | 178 | separate_arguments(GPU_ARCHS) |
| 179 | + foreach(SM IN LISTS GPU_ARCHS) |
| 180 | + list(APPEND CMAKE_CUDA_ARCHITECTURES SM) |
| 181 | + endforeach() |
179 | 182 | else() |
180 | | - list(APPEND GPU_ARCHS |
181 | | - 75 |
182 | | - ) |
183 | | - |
184 | | - find_file(IS_L4T_NATIVE nv_tegra_release PATHS /env/) |
185 | | - set (IS_L4T_CROSS "False") |
186 | | - if (DEFINED ENV{IS_L4T_CROSS}) |
187 | | - set(IS_L4T_CROSS $ENV{IS_L4T_CROSS}) |
| 183 | + list(APPEND CMAKE_CUDA_ARCHITECTURES 72 75 80 86 87 89 90) |
| 184 | + |
| 185 | + if(CUDA_VERSION VERSION_GREATER_EQUAL 12.8) |
| 186 | + list(APPEND CMAKE_CUDA_ARCHITECTURES 100 120) |
188 | 187 | endif() |
189 | 188 |
|
190 | | - if (IS_L4T_NATIVE OR ${IS_L4T_CROSS} STREQUAL "True") |
191 | | - # Only Orin (SM87) supported |
192 | | - list(APPEND GPU_ARCHS 87) |
193 | | - endif() |
194 | | - |
195 | | - if (CUDA_VERSION VERSION_GREATER_EQUAL 11.0) |
196 | | - # Ampere GPU (SM80) support is only available in CUDA versions > 11.0 |
197 | | - list(APPEND GPU_ARCHS 80) |
198 | | - endif() |
199 | | - if (CUDA_VERSION VERSION_GREATER_EQUAL 11.1) |
200 | | - list(APPEND GPU_ARCHS 86) |
201 | | - endif() |
202 | | - |
203 | | - message(STATUS "GPU_ARCHS is not defined. Generating CUDA code for default SMs: ${GPU_ARCHS}") |
| 189 | + message(STATUS "GPU_ARCHS is not defined. Generating CUDA code for default SMs: ${CMAKE_CUDA_ARCHITECTURES}") |
204 | 190 | endif() |
205 | 191 | set(BERT_GENCODES) |
206 | 192 | # Generate SASS for each architecture |
207 | | -foreach(arch ${GPU_ARCHS}) |
| 193 | +foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) |
208 | 194 | if (${arch} GREATER_EQUAL 75) |
209 | 195 | set(BERT_GENCODES "${BERT_GENCODES} -gencode arch=compute_${arch},code=sm_${arch}") |
210 | 196 | endif() |
211 | 197 | set(GENCODES "${GENCODES} -gencode arch=compute_${arch},code=sm_${arch}") |
212 | 198 | endforeach() |
213 | 199 |
|
214 | 200 | # Generate PTX for the last architecture in the list. |
215 | | -list(GET GPU_ARCHS -1 LATEST_SM) |
| 201 | +list(GET CMAKE_CUDA_ARCHITECTURES -1 LATEST_SM) |
216 | 202 | set(GENCODES "${GENCODES} -gencode arch=compute_${LATEST_SM},code=compute_${LATEST_SM}") |
217 | 203 | if (${LATEST_SM} GREATER_EQUAL 75) |
218 | 204 | set(BERT_GENCODES "${BERT_GENCODES} -gencode arch=compute_${LATEST_SM},code=compute_${LATEST_SM}") |
|
0 commit comments