Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions ci/matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,7 @@ workflows:
- {jobs: ['test'], project: 'nvbench_helper', ctk: '13.0', cxx: ['gcc', 'clang'], gpu: 'rtx2080'}
- {jobs: ['test'], project: 'nvbench_helper', ctk: '13.X', cxx: ['gcc', 'clang'], gpu: 'rtx2080'}
# NVHPC build
# - {jobs: ['build'], cxx: 'nvhpc', ctk: 'nvhpc', std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']}
- {jobs: ['build'], cxx: 'nvhpc', ctk: 'nvhpc', std: 'all', project: ['libcudacxx', 'cub', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']}
- {jobs: ['build'], cxx: 'nvhpc', ctk: 'nvhpc', std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']}
# clang-cuda
- {jobs: ['build'], cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', std: 'all', sm: '75;80;90;100'}
# libc++
Expand All @@ -105,24 +104,24 @@ workflows:
# libcudacxx - Specialized, testing default SM
- {project: 'libcudacxx', jobs: ['test'], std: 'max', cxx: ['gcc', 'msvc'], gpu: 'rtx2080', sm: 'gpu'}
- {project: 'libcudacxx', jobs: ['build'], std: 'max', cxx: 'clang'}
# - {project: 'libcudacxx', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc'}
- {project: 'libcudacxx', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc'}
- {project: 'libcudacxx', jobs: ['build'], std: 'max', cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', sm: '70;80;90;100'}
- {project: 'libcudacxx', jobs: ['nvrtc'], std: 'max', gpu: 'rtx2080', sm: 'gpu'}
- {project: 'libcudacxx', jobs: ['verify_codegen']}
# CUB - Specialized, testing default SM
- {project: 'cub', jobs: ['test_nolid', 'test_lid0'], std: 'max', cxx: ['gcc', 'msvc'], gpu: 'rtxa6000', sm: 'gpu'}
- {project: 'cub', jobs: ['build_nolid', 'build_lid0'], std: 'max', cxx: 'clang'}
# - {project: 'cub', jobs: ['build_nolid', 'build_lid0'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc'}
- {project: 'cub', jobs: ['build_nolid', 'build_lid0'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc'}
- {project: 'cub', jobs: ['build_nolid', 'build_lid0'], std: 'max', cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', sm: '75;80;90;100'}
# Thrust - Keep number of sm small. Kernel coverage is in CUB. This just tests dispatch / glue in lite mode:
- {project: 'thrust', jobs: ['test'], std: 'max', cxx: ['gcc', 'msvc'], gpu: 'rtx4090', sm: 'gpu'}
- {project: 'thrust', jobs: ['build'], std: 'max', cxx: 'clang', sm: '75;120'}
# - {project: 'thrust', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc', sm: '75;120'}
- {project: 'thrust', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc', sm: '75;120'}
- {project: 'thrust', jobs: ['build'], std: 'max', cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', sm: '75;100'}
# cudax
- {project: 'cudax', jobs: ['test'], std: 'max', cxx: ['gcc', 'msvc'], gpu: 'rtx2080', sm: 'gpu'}
- {project: 'cudax', jobs: ['build'], std: 'max', cxx: 'clang', sm: '75;120'}
# - {project: 'cudax', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc', sm: '75;120'}
- {project: 'cudax', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc', sm: '75;120'}
# stdpar
- {project: 'stdpar', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc'}
# Python + support
Expand Down
18 changes: 14 additions & 4 deletions thrust/testing/tbb_nvcc_preinclude.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,21 @@
# error "This file must be included before <immintrin.h>"
#endif // _IMMINTRIN_H_INCLUDED

#if defined(__NVCC__) && defined(__CUDACC__)
// Forward declare builtins used by gcc 12. Clang and nvc++ define __GNUC__, too, so we need to explicitly leave them
// out.
#if defined(__NVCC__) && defined(__CUDACC__) && defined(__GNUC__) && !defined(__clang__) && !defined(__NVCOMPILER)
# if __CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ == 0 && __GNUC__ == 12
# if defined(__GNUC__) && !defined(__clang__) && !defined(__NVCOMPILER)
# if __CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ == 0 && __GNUC__ == 12
void __builtin_ia32_ldtilecfg(const void*);
void __builtin_ia32_sttilecfg(void*);
# endif // __CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ == 0 && __GNUC__ == 12
#endif // __NVCC__ && __CUDACC__ && __GNUC__ && !__clang__ && !__NVCOMPILER
# endif // __CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ == 0 && __GNUC__ == 12
# endif // __GNUC__ && !__clang__ && !__NVCOMPILER

// cudafe++ has problems with many builtins used in <avx512fp16intrin.h> and <avx512vlfp16intrin.h> when compiling with
// nvc++ as the host compiler. Since those headers are not used by thrust nor tbb, we can prevent their inclusion by
// defining their include guard macros.
# if defined(__NVCOMPILER)
# define __AVX512FP16INTRIN_H
# define __AVX512VLFP16INTRIN_H
# endif // __NVCOMPILER
#endif // __NVCC__ && __CUDACC__
Loading