Merge pull request #655 from DrTimothyAldenDavis/dev2

SuiteSparse 7.5.0 with GraphBLAS 9.0.0.
DrTimothyAldenDavis · Dec 30, 2023 · 156f3fc · 156f3fc
2 parents c98548d + ecc5398
commit 156f3fc
Show file tree

Hide file tree

Showing 304 changed files with 23,693 additions and 5,835 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -110,7 +110,7 @@ include ( SuiteSparsePolicy )
 
 if ( SUITESPARSE_USE_SYSTEM_GRAPHBLAS )
     list ( REMOVE_ITEM SUITESPARSE_ENABLE_PROJECTS "graphblas" )
-    find_package ( GraphBLAS 8.3.1 REQUIRED )
+    find_package ( GraphBLAS 9.0.0 REQUIRED )
 else ( )
     if ( "lagraph" IN_LIST SUITESPARSE_ENABLE_PROJECTS )
         # LAGraph requires GraphBLAS.
@@ -210,7 +210,7 @@ endif ( )
 
 if ( SUITESPARSE_USE_SYSTEM_SUITESPARSE_CONFIG )
     list ( REMOVE_ITEM SUITESPARSE_ENABLE_PROJECTS "suitesparse_config" )
-    find_package ( SuiteSparse_config 7.4.0 REQUIRED )
+    find_package ( SuiteSparse_config 7.5.0 REQUIRED )
 else ( )
     if ( "mongoose" IN_LIST SUITESPARSE_ENABLE_PROJECTS
             OR "amd" IN_LIST SUITESPARSE_ENABLE_PROJECTS

diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,12 @@
+Jan 1, 2024: version 7.5.0
+
+    * Suitesparse_config: 7.5.0, to reflect the addition of GraphBLAS 9.0.0.
+        No change to the build system from SuiteSparse 7.4.0, except to update
+        the date and version numbers.
+    * GraphBLAS 9.0.0: supporting the v2.1 C API;
+        see https://github.com/GraphBLAS/graphblas-api-c
+    * Example 1.6.0: using GraphBLAS 9.0.0 and SuiteSparse_config 7.5.0
+
 Dec 30, 2023: version 7.4.0
 
     * major change to build system: by Markus Mützel.  Includes a

diff --git a/Example/CMakeLists.txt b/Example/CMakeLists.txt
@@ -53,9 +53,9 @@ message ( STATUS "MY prefix path:    ${CMAKE_PREFIX_PATH}" )
 #-------------------------------------------------------------------------------
 
 # cmake inserts the date and version number into Include/my.h:
-set ( MY_DATE "Dec 30, 2023" )
+set ( MY_DATE "Jan 1, 2024" )
 set ( MY_VERSION_MAJOR 1 )
-set ( MY_VERSION_MINOR 5 )
+set ( MY_VERSION_MINOR 6 )
 set ( MY_VERSION_PATCH 0 )
 
 message ( STATUS "Building MY library version: v"
@@ -87,15 +87,15 @@ project ( my
 #-------------------------------------------------------------------------------
 
 # look for all SuiteSparse packages:
-find_package ( SuiteSparse_config 7.4.0 REQUIRED )
+find_package ( SuiteSparse_config 7.5.0 REQUIRED )
 find_package ( AMD 3.3.0 REQUIRED )
 find_package ( BTF 2.3.0 REQUIRED )
 find_package ( CAMD 3.3.0 REQUIRED )
 find_package ( CCOLAMD 3.3.0 REQUIRED )
 find_package ( CHOLMOD 5.1.0 REQUIRED )
 find_package ( COLAMD 3.3.0 REQUIRED )
 find_package ( CXSparse 4.3.0 REQUIRED )
-find_package ( GraphBLAS 8.3.1 )
+find_package ( GraphBLAS 9.0.0 )
 find_package ( KLU 2.3.0 REQUIRED )
 find_package ( KLU_CHOLMOD 2.3.0 REQUIRED )
 find_package ( LDL 3.3.0 REQUIRED )

diff --git a/Example/Include/my.h b/Example/Include/my.h
@@ -11,9 +11,9 @@
 // file, since it is constructed from Config/my.h.in by cmake.
 
 // version and date for example user library
-#define MY_DATE "Dec 30, 2023"
+#define MY_DATE "Jan 1, 2024"
 #define MY_MAJOR_VERSION 1
-#define MY_MINOR_VERSION 5
+#define MY_MINOR_VERSION 6
 #define MY_PATCH_VERSION 0
 
 #ifdef __cplusplus

diff --git a/GraphBLAS/CUDA/CMakeLists.txt b/GraphBLAS/CUDA/CMakeLists.txt
@@ -74,6 +74,10 @@ set_target_properties ( GraphBLAS_CUDA PROPERTIES CUDA_ARCHITECTURES "52;75;80"
 
 target_link_libraries ( GraphBLAS_CUDA PRIVATE CUDA::nvrtc CUDA::cudart_static CUDA::cuda_driver )
 
+if ( TARGET CUDA::nvToolsExt )
+    target_link_libraries ( GraphBLAS_CUDA PRIVATE CUDA::nvToolsExt )
+endif ( )
+
 if ( TARGET CUDA::nvtx3 )
     target_link_libraries ( GraphBLAS_CUDA PRIVATE CUDA::nvtx3 )
     target_compile_definitions ( GraphBLAS_CUDA PRIVATE GBNVTX )

diff --git a/GraphBLAS/CUDA/Config/GB_cuda_common_jitFactory.hpp.in b/GraphBLAS/CUDA/Config/GB_cuda_common_jitFactory.hpp.in
@@ -1,8 +1,8 @@
 //------------------------------------------------------------------------------
-// GB_cuda_common_jitFactory.hpp: common defines for all jitFactory classes
+// GraphBLAS/CUDA/GB_cuda_common_jitFactory.hpp: for all jitFactory classes
 //------------------------------------------------------------------------------
 
-// (c) Nvidia Corp. 2020 All rights reserved
+// (c) Nvidia Corp. 2023 All rights reserved
 // SPDX-License-Identifier: Apache-2.0
 
 //------------------------------------------------------------------------------
@@ -16,54 +16,63 @@
 // configured by cmake from the following file:
 // GraphBLAS/CUDA/Config/GB_cuda_common_jitFactory.hpp.in
 
-#ifndef GB_COMMON_JITFACTORY_H
-#define GB_COMMON_JITFACTORY_H
+#ifndef GB_CUDA_COMMON_JITFACTORY_HPP
+#define GB_CUDA_COMMON_JITFACTORY_HPP
 
 #pragma once
 
+#include "GraphBLAS_cuda.h"
+
 extern "C"
 {
     #include "GB.h"
-    #include "GraphBLAS.h"
-    #undef I
     #include "GB_stringify.h"
 }
 
 #include <iostream>
 #include <cstdint>
-#include "GB_jit_cache.h"
-#include "GB_jit_launcher.h"
+#include "GB_cuda_jitify_cache.h"
+#include "GB_cuda_jitify_launcher.h"
 #include "GB_cuda_mxm_factory.hpp"
-#include "GB_cuda_buckets.h"
-#include "GB_cuda_type_wrap.hpp"
 #include "GB_cuda_error.h"
 #include "../rmm_wrap/rmm_wrap.h"
 #include "GB_iceil.h"
 
 // amount of shared memory to use in CUDA kernel launches
 constexpr unsigned int SMEM = 0 ;
 
-static const std::vector<std::string> GB_jit_cuda_compiler_flags{
+#if 0
+
+static const std::vector<std::string> GB_jit_cuda_compiler_flags{   // OLD
    "-std=c++17",
    //"-G",
    "-remove-unused-globals",
    "-w",
    "-D__CUDACC_RTC__",
-//   "-I.",
-//   "-I..",
-//   "-I../templates",
-//   "-I../CUDA",
-//   "-I../Source/Shared",
-
-   // Add includes relative to GRAPHBLAS_SOURCE_PATH variable
-   "-I" + jit::get_user_graphblas_source_path() + "/CUDA",
-   "-I" + jit::get_user_graphblas_source_path() + "/Source/Shared",
-   "-I" + jit::get_user_graphblas_source_path() + "/CUDA/templates",
+// "-I" + jit::get_user_home_cache_dir(),   // FIXME: add +/cu/00
+// "-I" + jit::get_user_home_cache_dir() + "/src",
    "-I/usr/local/cuda/include",
-
    // FIXME: add SUITESPARSE_CUDA_ARCHITECTURES here, via config
 };
 
+#endif
+
+inline std::vector<std::string> GB_cuda_jit_compiler_flags ( )
+{
+    return (
+        std::vector<std::string>  (
+        {"-std=c++17",
+        //"-G",
+        "-remove-unused-globals",
+        "-w",
+        "-D__CUDACC_RTC__",
+        "-I" + jit::get_user_home_cache_dir(),   // FIXME: add +/cu/00
+        "-I" + jit::get_user_home_cache_dir() + "/src",
+        "-I/usr/local/cuda/include"
+        // FIXME: add SUITESPARSE_CUDA_ARCHITECTURES here, via config
+        })) ;
+} ;
+
 // FIXME: rename GB_jit_cuda_header_names or something
 static const std::vector<std::string> header_names ={};
 

diff --git a/GraphBLAS/CUDA/GB_cuda.h b/GraphBLAS/CUDA/GB_cuda.h
@@ -1,14 +1,12 @@
 //------------------------------------------------------------------------------
-// GB_cuda.h: definitions for using CUDA in GraphBLAS
+// GraphBLAS/CUDA/GB_cuda.h
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS/CUDA, (c) NVIDIA Corp. 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2023, All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0
 
 //------------------------------------------------------------------------------
 
-// This file is #include'd only in the GraphBLAS/CUDA/GB_cuda*.cu source files.
-
 #ifndef GB_CUDA_H
 #define GB_CUDA_H
 
@@ -20,9 +18,7 @@ extern "C"
     #include "GB_warnings.h"
 }
 
-#define GB_LIBRARY
-#include "GraphBLAS.h"
-#undef I
+#include "GraphBLAS_cuda.h"
 
 extern "C"
 {

diff --git a/GraphBLAS/CUDA/GB_AxB_dot3_cuda_branch.cpp → GraphBLAS/CUDA/GB_cuda_AxB_dot3_branch.cpp b/GraphBLAS/CUDA/GB_AxB_dot3_cuda_branch.cpp → GraphBLAS/CUDA/GB_cuda_AxB_dot3_branch.cpp
@@ -1,24 +1,24 @@
 //------------------------------------------------------------------------------
-// GB_AxB_dot3_cuda_branch: decide if GPU should be used for dot3 mxm
+// GraphBLAS/CUDA/GB_cuda_AxB_dot3_branch: decide to use GPU for dot3
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2023, All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0
 
 //------------------------------------------------------------------------------
 
 // Decide branch direction for GPU use for the dot-product MxM
 
-#include "GraphBLAS.h"
-#undef I
+#include "GraphBLAS_cuda.h"
+
 extern "C" 
 {
   #include "GB_mxm.h"
 }
 #include "GB_cuda.h"
 #include <cuda_runtime.h>
 
-bool GB_AxB_dot3_cuda_branch 
+bool GB_cuda_AxB_dot3_branch 
 (
     const GrB_Matrix M,             // mask matrix
     const bool Mask_struct,         // if true, use the only structure of M

diff --git a/GraphBLAS/CUDA/GB_AxB_dot3_cuda.cpp → GraphBLAS/CUDA/GB_cuda_AxB_dot3_jit.cpp b/GraphBLAS/CUDA/GB_AxB_dot3_cuda.cpp → GraphBLAS/CUDA/GB_cuda_AxB_dot3_jit.cpp
@@ -1,8 +1,8 @@
 //------------------------------------------------------------------------------
-// GB_AxB_dot3_cuda: compute C<M> = A'*B in parallel, on the GPU(s)
+// GraphBLAS/CUDA/GB_cuda_AxB_dot3_jit: compute C<M> = A'*B on GPU(s)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2023, All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0
 
 //------------------------------------------------------------------------------
@@ -18,11 +18,10 @@ extern "C"
     #include "GB_mxm.h"
 }
 
-#include "GB_jit_cache.h"
+#include "GB_cuda_jitify_cache.h"
 #include "GB_cuda_common_jitFactory.hpp"
 #include "GB_cuda_reduce_jitFactory.hpp"
 #include "GB_cuda_mxm_dot3_jitFactory.hpp"
-#include "GB_cuda_type_wrap.hpp"
 #include "test/GpuTimer.h"
 
 /*
@@ -60,7 +59,7 @@ void print_array(void *arr, I size, const char *name) {
 // GB_AxB_dot3_cuda
 //------------------------------------------------------------------------------
 
-GrB_Info GB_AxB_dot3_cuda           // C<M> = A'*B using dot product method
+GrB_Info GB_cuda_AxB_dot3_jit       // C<M> = A'*B using dot product method
 (
     GrB_Matrix C,                   // output matrix
     const GrB_Matrix M,             // mask matrix
@@ -291,7 +290,7 @@ GrB_Info GB_AxB_dot3_cuda           // C<M> = A'*B using dot product method
 
         dense_phase1launchFactory dp1lf(my_mxm_spec);
 
-        GBURBLE ("(GPU phase1 start nblk = %d) ",
+        GBURBLE ("(GPU dense phase1 start nblk = %d) ",
             dp1lf.get_number_of_blocks(M)) ;
         kernel_timer.Start();
             dp1lf.jitGridBlockLaunch(C, M, A, B, stream);
@@ -381,7 +380,8 @@ GrB_Info GB_AxB_dot3_cuda           // C<M> = A'*B using dot product method
         // phase1: assign each C(i,j) to a bucket, and count them
         //----------------------------------------------------------------------
 
-        GBURBLE ("(GPU phase1 start nblk = %d) ", p1lf.get_number_of_blocks(M));
+        GBURBLE ("(GPU sparse phase1 start nblk = %d) ",
+            p1lf.get_number_of_blocks(M));
         kernel_timer.Start();
         p1lf.jitGridBlockLaunch(Nanobuckets, Blockbucket, C, M, A, B, stream);
         CU_OK (cudaStreamSynchronize(stream));

diff --git a/GraphBLAS/CUDA/GB_cuda_common_jitFactory.hpp b/GraphBLAS/CUDA/GB_cuda_common_jitFactory.hpp
@@ -1,8 +1,8 @@
 //------------------------------------------------------------------------------
-// GB_cuda_common_jitFactory.hpp: common defines for all jitFactory classes
+// GraphBLAS/CUDA/GB_cuda_common_jitFactory.hpp: for all jitFactory classes
 //------------------------------------------------------------------------------
 
-// (c) Nvidia Corp. 2020 All rights reserved
+// (c) Nvidia Corp. 2023 All rights reserved
 // SPDX-License-Identifier: Apache-2.0
 
 //------------------------------------------------------------------------------
@@ -16,54 +16,63 @@
 // configured by cmake from the following file:
 // GraphBLAS/CUDA/Config/GB_cuda_common_jitFactory.hpp.in
 
-#ifndef GB_COMMON_JITFACTORY_H
-#define GB_COMMON_JITFACTORY_H
+#ifndef GB_CUDA_COMMON_JITFACTORY_HPP
+#define GB_CUDA_COMMON_JITFACTORY_HPP
 
 #pragma once
 
+#include "GraphBLAS_cuda.h"
+
 extern "C"
 {
     #include "GB.h"
-    #include "GraphBLAS.h"
-    #undef I
     #include "GB_stringify.h"
 }
 
 #include <iostream>
 #include <cstdint>
-#include "GB_jit_cache.h"
-#include "GB_jit_launcher.h"
+#include "GB_cuda_jitify_cache.h"
+#include "GB_cuda_jitify_launcher.h"
 #include "GB_cuda_mxm_factory.hpp"
-#include "GB_cuda_buckets.h"
-#include "GB_cuda_type_wrap.hpp"
 #include "GB_cuda_error.h"
 #include "../rmm_wrap/rmm_wrap.h"
 #include "GB_iceil.h"
 
 // amount of shared memory to use in CUDA kernel launches
 constexpr unsigned int SMEM = 0 ;
 
-static const std::vector<std::string> GB_jit_cuda_compiler_flags{
+#if 0
+
+static const std::vector<std::string> GB_jit_cuda_compiler_flags{   // OLD
    "-std=c++17",
    //"-G",
    "-remove-unused-globals",
    "-w",
    "-D__CUDACC_RTC__",
-//   "-I.",
-//   "-I..",
-//   "-I../templates",
-//   "-I../CUDA",
-//   "-I../Source/Shared",
-
-   // Add includes relative to GRAPHBLAS_SOURCE_PATH variable
-   "-I" + jit::get_user_graphblas_source_path() + "/CUDA",
-   "-I" + jit::get_user_graphblas_source_path() + "/Source/Shared",
-   "-I" + jit::get_user_graphblas_source_path() + "/CUDA/templates",
+// "-I" + jit::get_user_home_cache_dir(),   // FIXME: add +/cu/00
+// "-I" + jit::get_user_home_cache_dir() + "/src",
    "-I/usr/local/cuda/include",
-
    // FIXME: add SUITESPARSE_CUDA_ARCHITECTURES here, via config
 };
 
+#endif
+
+inline std::vector<std::string> GB_cuda_jit_compiler_flags ( )
+{
+    return (
+        std::vector<std::string>  (
+        {"-std=c++17",
+        //"-G",
+        "-remove-unused-globals",
+        "-w",
+        "-D__CUDACC_RTC__",
+        "-I" + jit::get_user_home_cache_dir(),   // FIXME: add +/cu/00
+        "-I" + jit::get_user_home_cache_dir() + "/src",
+        "-I/usr/local/cuda/include"
+        // FIXME: add SUITESPARSE_CUDA_ARCHITECTURES here, via config
+        })) ;
+} ;
+
 // FIXME: rename GB_jit_cuda_header_names or something
 static const std::vector<std::string> header_names ={};
 

diff --git a/GraphBLAS/CUDA/GB_cuda_cumsum.cu b/GraphBLAS/CUDA/GB_cuda_cumsum.cu
@@ -1,8 +1,8 @@
 //------------------------------------------------------------------------------
-// GB_cuda_cumsum: cumlative sum of an array using GPU acceleration
+// GraphBLAS/CUDA/GB_cuda_cumsum: cumlative sum of an array on the GPU(s)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2023, All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0
 
 //------------------------------------------------------------------------------