fix cuda11.6 build (#1101) · pytorch/FBGEMM@b3e6411

Commit

fix cuda11.6 build (#1101)

Summary:
Since CUB1.14.0, there is a breaking change: NVIDIA/cub#350: When the CUB_NS_[PRE|POST]FIX macros are set, CUB_NS_QUALIFIER must also be defined to the fully qualified CUB namespace (e.g. #define CUB_NS_QUALIFIER ::foo::cub).

Without the fix, on CUDA11.6, the fbgemm_gpu fail to build with following errors:

>                  from /usr/local/cuda/include/cub/device/device_radix_sort.cuh:40,
>                      from ../../../src/split_embeddings_cache_cuda.cu:10:
>     /usr/local/cuda-11.6/targets/x86_64-linux/include/cub/util_namespace.cuh:46:2: error: #error CUB requires a definition of CUB_NS_QUALIFIER when CUB_NS_PREFIX/POSTFIX are defined.
>        46 | #error CUB requires a definition of CUB_NS_QUALIFIER when CUB_NS_PREFIX/POSTFIX are defined.
>           |  ^~~~~
>     [3/169] Building CUDA object CMakeFiles/fbgemm_gpu_py.dir/src/layout_transform_ops.cu.o
>     FAILED: CMakeFiles/fbgemm_gpu_py.dir/src/layout_transform_ops.cu.o
>     /usr/local/cuda/bin/nvcc  -DFBGEMM_CUB_USE_NAMESPACE -DUSE_C10D_GLOO -DUSE_C10D_MPI -DUSE_C10D_NCCL -DUSE_DISTRIBUTED -DUSE_RPC -DUSE_TENSORPIPE -Dfbgemm_gpu_py_EXPORTS -I/code/FBGEMM/fbgemm_gpu -I/code/FBGEMM/fbgemm_gpu/include -I/code/FBGEMM/include -I/code/FBGEMM/third_party/asmjit/src -I/code/FBGEMM/third_party/cpuinfo/include  -isystem=/usr/local/lib/python3.8/dist-packages/torch/include -isystem=/usr/local/lib/python3.8/dist-packages/torch/include/torch/csrc/api/include -isystem=/usr/local/cuda/include -DONNX_NAMESPACE=onnx_c2 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80 -Xcudafe --diag_suppress=cc_clobber_ignored,--diag_suppress=integer_sign_change,--diag_suppress=useless_using_declaration,--diag_suppress=set_but_not_used,--diag_suppress=field_without_dll_interface,--diag_suppress=base_class_has_different_dll_interface,--diag_suppress=dll_interface_conflict_none_assumed,--diag_suppress=dll_interface_conflict_dllexport_assumed,--diag_suppress=implicit_return_from_non_void_function,--diag_suppress=unsigned_compare_with_zero,--diag_suppress=declared_but_not_referenced,--diag_suppress=bad_friend_decl --expt-relaxed-constexpr --expt-extended-lambda -O3 -DNDEBUG -Xcompiler=-fPIC   -D_GLIBCXX_USE_CXX11_ABI=1 --expt-relaxed-constexpr -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ -x cu -c ../../../src/layout_transform_ops.cu -o CMakeFiles/fbgemm_gpu_py.dir/src/layout_transform_ops.cu.o && /usr/local/cuda/bin/nvcc  -DFBGEMM_CUB_USE_NAMESPACE -DUSE_C10D_GLOO -DUSE_C10D_MPI -DUSE_C10D_NCCL -DUSE_DISTRIBUTED -DUSE_RPC -DUSE_TENSORPIPE -Dfbgemm_gpu_py_EXPORTS -I/code/FBGEMM/fbgemm_gpu -I/code/FBGEMM/fbgemm_gpu/include -I/code/FBGEMM/include -I/code/FBGEMM/third_party/asmjit/src -I/code/FBGEMM/third_party/cpuinfo/include  -isystem=/usr/local/lib/python3.8/dist-packages/torch/include -isystem=/usr/local/lib/python3.8/dist-packages/torch/include/torch/csrc/api/include -isystem=/usr/local/cuda/include -DONNX_NAMESPACE=onnx_c2 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80 -Xcudafe --diag_suppress=cc_clobber_ignored,--diag_suppress=integer_sign_change,--diag_suppress=useless_using_declaration,--diag_suppress=set_but_not_used,--diag_suppress=field_without_dll_interface,--diag_suppress=base_class_has_different_dll_interface,--diag_suppress=dll_interface_conflict_none_assumed,--diag_suppress=dll_interface_conflict_dllexport_assumed,--diag_suppress=implicit_return_from_non_void_function,--diag_suppress=unsigned_compare_with_zero,--diag_suppress=declared_but_not_referenced,--diag_suppress=bad_friend_decl --expt-relaxed-constexpr --expt-extended-lambda -O3 -DNDEBUG -Xcompiler=-fPIC   -D_GLIBCXX_USE_CXX11_ABI=1 --expt-relaxed-constexpr -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ -x cu -M ../../../src/layout_transform_ops.cu -MT CMakeFiles/fbgemm_gpu_py.dir/src/layout_transform_ops.cu.o -o CMakeFiles/fbgemm_gpu_py.dir/src/layout_transform_ops.cu.o.d
>     In file included from /usr/local/cuda/include/cub/device/../util_arch.cuh:37,
>                      from /usr/local/cuda/include/cub/device/../config.cuh:35,
>                      from /usr/local/cuda/include/cub/device/device_scan.cuh:40,
>                      from ../../../src/layout_transform_ops.cu:10:
>     /usr/local/cuda/include/cub/device/../util_namespace.cuh:46:2: error: #error CUB requires a definition of CUB_NS_QUALIFIER when CUB_NS_PREFIX/POSTFIX are defined.
>        46 | #error CUB requires a definition of CUB_NS_QUALIFIER when CUB_NS_PREFIX/POSTFIX are defined.
>           |  ^~~~~
>     [4/169] Building CXX object CMakeFiles/fbgemm_gpu_py.dir/src/cpu_utils.cpp.o

https://github.com/NVIDIA/cub/releases/tag/1.14.0

Pull Request resolved: #1101

Reviewed By: brad-mengchi

Differential Revision: D36189683

Pulled By: jianyuh

fbshipit-source-id: 11286fe3923972fd5f5f332649716e7f2d6e206a

Loading branch information

pengwa authored and facebook-github-bot committed May 6, 2022

1 parent 06c6369 commit b3e6411

fbgemm_gpu/include/fbgemm_gpu/cub_namespace_postfix.cuh

-Original file line number
+Diff line change
@@ Expand Up / @@ -12,6 +12,20 @@ @@
     #undef CUB_NS_PREFIX
     #undef CUB_NS_POSTFIX
+    #include <cuda.h> // for CUDA_VERSION
+    #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
+    #include <cub/version.cuh>
+    #else
+    #define CUB_VERSION 0
+    #endif
+    // PR https://github.com/NVIDIA/cub/pull/350 introduced breaking change.
+    // When the CUB_NS_[PRE|POST]FIX macros are set,
+    // CUB_NS_QUALIFIER must also be defined to the fully qualified CUB namespace
+    #if CUB_VERSION >= 101400
+    #undef CUB_NS_QUALIFIER
+    #endif
     #define FBGEMM_GPU_CUB_NS_PREFIX fbgemm_gpu::
     #else
@@ Expand Down @@

fbgemm_gpu/include/fbgemm_gpu/cub_namespace_prefix.cuh

-Original file line number
+Diff line change
@@ Expand Up / @@ -10,7 +10,28 @@ @@
     #undef CUB_NS_PREFIX
     #undef CUB_NS_POSTFIX
+    #include <cuda.h> // for CUDA_VERSION
+    #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
+    #include <cub/version.cuh>
+    #else
+    #define CUB_VERSION 0
+    #endif
+    // PR https://github.com/NVIDIA/cub/pull/350 introduced breaking change.
+    // When the CUB_NS_[PRE|POST]FIX macros are set,
+    // CUB_NS_QUALIFIER must also be defined to the fully qualified CUB namespace
+    #if CUB_VERSION >= 101400
+    #undef CUB_NS_QUALIFIER
+    #endif
     #define CUB_NS_PREFIX namespace fbgemm_gpu {
     #define CUB_NS_POSTFIX } // namespace fbgemm_gpu
+    // PR https://github.com/NVIDIA/cub/pull/350 introduced breaking change.
+    // When the CUB_NS_[PRE|POST]FIX macros are set,
+    // CUB_NS_QUALIFIER must also be defined to the fully qualified CUB namespace
+    #if CUB_VERSION >= 101400
+    #define CUB_NS_QUALIFIER ::fbgemm_gpu::cub
+    #endif
     #endif

0 comments on commit `b3e6411`

Please sign in to comment.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Commit

There are no files selected for viewing

0 comments on commit `b3e6411`

Commit

There are no files selected for viewing

0 comments on commit b3e6411

0 comments on commit `b3e6411`