diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 0802dc8a091..638ffdcfbd6 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -82,8 +82,7 @@ workflows: - {jobs: ['test'], project: 'nvbench_helper', ctk: '13.0', cxx: ['gcc', 'clang'], gpu: 'rtx2080'} - {jobs: ['test'], project: 'nvbench_helper', ctk: '13.X', cxx: ['gcc', 'clang'], gpu: 'rtx2080'} # NVHPC build - # - {jobs: ['build'], cxx: 'nvhpc', ctk: 'nvhpc', std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']} - - {jobs: ['build'], cxx: 'nvhpc', ctk: 'nvhpc', std: 'all', project: ['libcudacxx', 'cub', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']} + - {jobs: ['build'], cxx: 'nvhpc', ctk: 'nvhpc', std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']} # clang-cuda - {jobs: ['build'], cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', std: 'all', sm: '75;80;90;100'} # libc++ @@ -105,24 +104,24 @@ workflows: # libcudacxx - Specialized, testing default SM - {project: 'libcudacxx', jobs: ['test'], std: 'max', cxx: ['gcc', 'msvc'], gpu: 'rtx2080', sm: 'gpu'} - {project: 'libcudacxx', jobs: ['build'], std: 'max', cxx: 'clang'} - # - {project: 'libcudacxx', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc'} + - {project: 'libcudacxx', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc'} - {project: 'libcudacxx', jobs: ['build'], std: 'max', cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', sm: '70;80;90;100'} - {project: 'libcudacxx', jobs: ['nvrtc'], std: 'max', gpu: 'rtx2080', sm: 'gpu'} - {project: 'libcudacxx', jobs: ['verify_codegen']} # CUB - Specialized, testing default SM - {project: 'cub', jobs: ['test_nolid', 'test_lid0'], std: 'max', cxx: ['gcc', 'msvc'], gpu: 'rtxa6000', sm: 'gpu'} - {project: 'cub', jobs: ['build_nolid', 'build_lid0'], std: 'max', cxx: 'clang'} - # - {project: 'cub', jobs: ['build_nolid', 'build_lid0'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc'} + - {project: 'cub', jobs: ['build_nolid', 'build_lid0'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc'} - {project: 'cub', jobs: ['build_nolid', 'build_lid0'], std: 'max', cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', sm: '75;80;90;100'} # Thrust - Keep number of sm small. Kernel coverage is in CUB. This just tests dispatch / glue in lite mode: - {project: 'thrust', jobs: ['test'], std: 'max', cxx: ['gcc', 'msvc'], gpu: 'rtx4090', sm: 'gpu'} - {project: 'thrust', jobs: ['build'], std: 'max', cxx: 'clang', sm: '75;120'} - # - {project: 'thrust', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc', sm: '75;120'} + - {project: 'thrust', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc', sm: '75;120'} - {project: 'thrust', jobs: ['build'], std: 'max', cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', sm: '75;100'} # cudax - {project: 'cudax', jobs: ['test'], std: 'max', cxx: ['gcc', 'msvc'], gpu: 'rtx2080', sm: 'gpu'} - {project: 'cudax', jobs: ['build'], std: 'max', cxx: 'clang', sm: '75;120'} - # - {project: 'cudax', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc', sm: '75;120'} + - {project: 'cudax', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc', sm: '75;120'} # stdpar - {project: 'stdpar', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc'} # Python + support diff --git a/thrust/testing/tbb_nvcc_preinclude.h b/thrust/testing/tbb_nvcc_preinclude.h index a6fb9b59ed8..8b0dc3deba6 100644 --- a/thrust/testing/tbb_nvcc_preinclude.h +++ b/thrust/testing/tbb_nvcc_preinclude.h @@ -5,11 +5,21 @@ # error "This file must be included before " #endif // _IMMINTRIN_H_INCLUDED +#if defined(__NVCC__) && defined(__CUDACC__) // Forward declare builtins used by gcc 12. Clang and nvc++ define __GNUC__, too, so we need to explicitly leave them // out. -#if defined(__NVCC__) && defined(__CUDACC__) && defined(__GNUC__) && !defined(__clang__) && !defined(__NVCOMPILER) -# if __CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ == 0 && __GNUC__ == 12 +# if defined(__GNUC__) && !defined(__clang__) && !defined(__NVCOMPILER) +# if __CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ == 0 && __GNUC__ == 12 void __builtin_ia32_ldtilecfg(const void*); void __builtin_ia32_sttilecfg(void*); -# endif // __CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ == 0 && __GNUC__ == 12 -#endif // __NVCC__ && __CUDACC__ && __GNUC__ && !__clang__ && !__NVCOMPILER +# endif // __CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ == 0 && __GNUC__ == 12 +# endif // __GNUC__ && !__clang__ && !__NVCOMPILER + +// cudafe++ has problems with many builtins used in and when compiling with +// nvc++ as the host compiler. Since those headers are not used by thrust nor tbb, we can prevent their inclusion by +// defining their include guard macros. +# if defined(__NVCOMPILER) +# define __AVX512FP16INTRIN_H +# define __AVX512VLFP16INTRIN_H +# endif // __NVCOMPILER +#endif // __NVCC__ && __CUDACC__