Skip to content

Commit

Permalink
fix cuda11.6 build (#1101)
Browse files Browse the repository at this point in the history
Summary:
Since CUB1.14.0, there is a breaking change: NVIDIA/cub#350: When the CUB_NS_[PRE|POST]FIX macros are set, CUB_NS_QUALIFIER must also be defined to the fully qualified CUB namespace (e.g. #define CUB_NS_QUALIFIER ::foo::cub).

Without the fix, on CUDA11.6, the fbgemm_gpu fail to build with following errors:

>                  from /usr/local/cuda/include/cub/device/device_radix_sort.cuh:40,
>                      from ../../../src/split_embeddings_cache_cuda.cu:10:
>     /usr/local/cuda-11.6/targets/x86_64-linux/include/cub/util_namespace.cuh:46:2: error: #error CUB requires a definition of CUB_NS_QUALIFIER when CUB_NS_PREFIX/POSTFIX are defined.
>        46 | #error CUB requires a definition of CUB_NS_QUALIFIER when CUB_NS_PREFIX/POSTFIX are defined.
>           |  ^~~~~
>     [3/169] Building CUDA object CMakeFiles/fbgemm_gpu_py.dir/src/layout_transform_ops.cu.o
>     FAILED: CMakeFiles/fbgemm_gpu_py.dir/src/layout_transform_ops.cu.o
>     /usr/local/cuda/bin/nvcc  -DFBGEMM_CUB_USE_NAMESPACE -DUSE_C10D_GLOO -DUSE_C10D_MPI -DUSE_C10D_NCCL -DUSE_DISTRIBUTED -DUSE_RPC -DUSE_TENSORPIPE -Dfbgemm_gpu_py_EXPORTS -I/code/FBGEMM/fbgemm_gpu -I/code/FBGEMM/fbgemm_gpu/include -I/code/FBGEMM/include -I/code/FBGEMM/third_party/asmjit/src -I/code/FBGEMM/third_party/cpuinfo/include  -isystem=/usr/local/lib/python3.8/dist-packages/torch/include -isystem=/usr/local/lib/python3.8/dist-packages/torch/include/torch/csrc/api/include -isystem=/usr/local/cuda/include -DONNX_NAMESPACE=onnx_c2 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80 -Xcudafe --diag_suppress=cc_clobber_ignored,--diag_suppress=integer_sign_change,--diag_suppress=useless_using_declaration,--diag_suppress=set_but_not_used,--diag_suppress=field_without_dll_interface,--diag_suppress=base_class_has_different_dll_interface,--diag_suppress=dll_interface_conflict_none_assumed,--diag_suppress=dll_interface_conflict_dllexport_assumed,--diag_suppress=implicit_return_from_non_void_function,--diag_suppress=unsigned_compare_with_zero,--diag_suppress=declared_but_not_referenced,--diag_suppress=bad_friend_decl --expt-relaxed-constexpr --expt-extended-lambda -O3 -DNDEBUG -Xcompiler=-fPIC   -D_GLIBCXX_USE_CXX11_ABI=1 --expt-relaxed-constexpr -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ -x cu -c ../../../src/layout_transform_ops.cu -o CMakeFiles/fbgemm_gpu_py.dir/src/layout_transform_ops.cu.o && /usr/local/cuda/bin/nvcc  -DFBGEMM_CUB_USE_NAMESPACE -DUSE_C10D_GLOO -DUSE_C10D_MPI -DUSE_C10D_NCCL -DUSE_DISTRIBUTED -DUSE_RPC -DUSE_TENSORPIPE -Dfbgemm_gpu_py_EXPORTS -I/code/FBGEMM/fbgemm_gpu -I/code/FBGEMM/fbgemm_gpu/include -I/code/FBGEMM/include -I/code/FBGEMM/third_party/asmjit/src -I/code/FBGEMM/third_party/cpuinfo/include  -isystem=/usr/local/lib/python3.8/dist-packages/torch/include -isystem=/usr/local/lib/python3.8/dist-packages/torch/include/torch/csrc/api/include -isystem=/usr/local/cuda/include -DONNX_NAMESPACE=onnx_c2 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80 -Xcudafe --diag_suppress=cc_clobber_ignored,--diag_suppress=integer_sign_change,--diag_suppress=useless_using_declaration,--diag_suppress=set_but_not_used,--diag_suppress=field_without_dll_interface,--diag_suppress=base_class_has_different_dll_interface,--diag_suppress=dll_interface_conflict_none_assumed,--diag_suppress=dll_interface_conflict_dllexport_assumed,--diag_suppress=implicit_return_from_non_void_function,--diag_suppress=unsigned_compare_with_zero,--diag_suppress=declared_but_not_referenced,--diag_suppress=bad_friend_decl --expt-relaxed-constexpr --expt-extended-lambda -O3 -DNDEBUG -Xcompiler=-fPIC   -D_GLIBCXX_USE_CXX11_ABI=1 --expt-relaxed-constexpr -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ -x cu -M ../../../src/layout_transform_ops.cu -MT CMakeFiles/fbgemm_gpu_py.dir/src/layout_transform_ops.cu.o -o CMakeFiles/fbgemm_gpu_py.dir/src/layout_transform_ops.cu.o.d
>     In file included from /usr/local/cuda/include/cub/device/../util_arch.cuh:37,
>                      from /usr/local/cuda/include/cub/device/../config.cuh:35,
>                      from /usr/local/cuda/include/cub/device/device_scan.cuh:40,
>                      from ../../../src/layout_transform_ops.cu:10:
>     /usr/local/cuda/include/cub/device/../util_namespace.cuh:46:2: error: #error CUB requires a definition of CUB_NS_QUALIFIER when CUB_NS_PREFIX/POSTFIX are defined.
>        46 | #error CUB requires a definition of CUB_NS_QUALIFIER when CUB_NS_PREFIX/POSTFIX are defined.
>           |  ^~~~~
>     [4/169] Building CXX object CMakeFiles/fbgemm_gpu_py.dir/src/cpu_utils.cpp.o

https://github.com/NVIDIA/cub/releases/tag/1.14.0

Pull Request resolved: #1101

Reviewed By: brad-mengchi

Differential Revision: D36189683

Pulled By: jianyuh

fbshipit-source-id: 11286fe3923972fd5f5f332649716e7f2d6e206a
  • Loading branch information
pengwa authored and facebook-github-bot committed May 6, 2022
1 parent 06c6369 commit b3e6411
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 0 deletions.
14 changes: 14 additions & 0 deletions fbgemm_gpu/include/fbgemm_gpu/cub_namespace_postfix.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,20 @@
#undef CUB_NS_PREFIX
#undef CUB_NS_POSTFIX

#include <cuda.h> // for CUDA_VERSION
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
#include <cub/version.cuh>
#else
#define CUB_VERSION 0
#endif

// PR https://github.com/NVIDIA/cub/pull/350 introduced breaking change.
// When the CUB_NS_[PRE|POST]FIX macros are set,
// CUB_NS_QUALIFIER must also be defined to the fully qualified CUB namespace
#if CUB_VERSION >= 101400
#undef CUB_NS_QUALIFIER
#endif

#define FBGEMM_GPU_CUB_NS_PREFIX fbgemm_gpu::

#else
Expand Down
21 changes: 21 additions & 0 deletions fbgemm_gpu/include/fbgemm_gpu/cub_namespace_prefix.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,28 @@
#undef CUB_NS_PREFIX
#undef CUB_NS_POSTFIX

#include <cuda.h> // for CUDA_VERSION
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
#include <cub/version.cuh>
#else
#define CUB_VERSION 0
#endif

// PR https://github.com/NVIDIA/cub/pull/350 introduced breaking change.
// When the CUB_NS_[PRE|POST]FIX macros are set,
// CUB_NS_QUALIFIER must also be defined to the fully qualified CUB namespace
#if CUB_VERSION >= 101400
#undef CUB_NS_QUALIFIER
#endif

#define CUB_NS_PREFIX namespace fbgemm_gpu {
#define CUB_NS_POSTFIX } // namespace fbgemm_gpu

// PR https://github.com/NVIDIA/cub/pull/350 introduced breaking change.
// When the CUB_NS_[PRE|POST]FIX macros are set,
// CUB_NS_QUALIFIER must also be defined to the fully qualified CUB namespace
#if CUB_VERSION >= 101400
#define CUB_NS_QUALIFIER ::fbgemm_gpu::cub
#endif

#endif

0 comments on commit b3e6411

Please sign in to comment.