new compare function

chrispypatt · Apr 23, 2019 · f002aba · f002aba
1 parent 4a62156
commit f002aba
Show file tree

Hide file tree

Showing 6 changed files with 37 additions and 8 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,5 +2,5 @@
 \.DS_Store
 *.o
 groupby
-groupby-hash
+groupby_hash
 *~
diff --git a/cpuGroupby.cpp b/cpuGroupby.cpp
@@ -400,19 +400,50 @@ void cpuGroupby::printGPUResults(int* GPU_output_keys, int* GPU_output_values){
     std::cout << "End GPU Printing Results" << std::endl;
 }
 
-bool cpuGroupby::validGPUResult(int* GPUKeys, int* GPUValues, int GPUOutputRows) {
-    //ASSUMING THE GPU RESULT IS SORTED
+bool cpuGroupby::validGPUResult(int* GPUKeys, int* GPUValues, int GPUOutputRows, bool isSorted) {
     if (GPUOutputRows != numGroups) {
       std::cout << "FAILED - CPU Rows: " << numGroups << " GPU Rows: " << GPUOutputRows << std::endl;
         return false;
     }
+
     // cout << "GPU:CPU"<<endl;
-    for (int i=0; i<num_value_columns*numGroups; i++) {
+    if (isSorted) {
+      for (int i=0; i<num_value_columns*numGroups; i++) {
         // cout << GPUValues[i] << ":" << output_values[i] << endl;
         if (GPUValues[i] != output_values[i]) {
 	  std::cout << "FAILED - CPU data != GPU data " << std::endl;
 	  return false;
         }
+      }
+    } else {
+      std::vector<size_t> idx(GPUOutputRows);
+      std::iota(idx.begin(), idx.end(), 0);
+      std::sort(idx.begin(), idx.end(),
+		[=] (const size_t idx1, const size_t idx2) {
+		  for (size_t i = 0; i < num_key_columns; ++i) {
+		    size_t data1 = GPUKeys[i * GPUOutputRows + idx1];
+		    size_t data2 = GPUKeys[i * GPUOutputRows + idx2];
+		    if (data1 > data2) return false;
+		    if (data1 < data2) return true;
+		  }
+		  return false;
+		});
+      for (size_t i = 0; i < GPUOutputRows; ++i) {
+	for (size_t j = 0; j < num_key_columns; ++j) {
+	  if (GPUKeys[j * GPUOutputRows + idx[i]] != output_keys[j * GPUOutputRows + i]) {
+	    std::cout << "FAILED - CPU key != GPU key at entry " << i << std::endl;
+	    return false;
+	  }
+	}
+      }
+      for (size_t i = 0; i < GPUOutputRows; ++i) {
+	for (size_t j = 0; j < num_value_columns; ++j) {
+	  if (GPUValues[j * GPUOutputRows + idx[i]] != output_values[j * GPUOutputRows + i]) {
+	    std::cout << "FAILED - CPU data != GPU data at entry " << i << std::endl;
+	    return false;
+	  }
+	}
+      }
     }
     std::cout << "PASSED - CPU data == GPU data " << std::endl;   
     return true;

diff --git a/cpuGroupby.h b/cpuGroupby.h
@@ -68,7 +68,7 @@ class cpuGroupby {
     ~cpuGroupby();  // To do - make sure arrays are freed
 
     // GPU Validation
-    bool validGPUResult(int* GPUKeys, int* GPUValues, int GPUOutputRows);
+  bool validGPUResult(int* GPUKeys, int* GPUValues, int GPUOutputRows, bool isSorted=true);
 };
 
 #endif /* cpuGroupby_hpp */
diff --git a/groupby_hash_templates.cu b/groupby_hash_templates.cu
@@ -26,7 +26,6 @@ bool keyEqualRM(T* key_columns, size_t idx1, size_t idx2, size_t num_key_rows, s
 
 // hashKey generating
 template <typename T> __host__ __device__
-__host__ __device__
 size_t HashKey(size_t idx, T* key_columns, size_t num_key_rows, size_t num_key_columns) {
   size_t hash_key = 0;
   for (size_t i=0; i < num_key_columns; ++i) {

diff --git a/main_hash.cu b/main_hash.cu
@@ -73,7 +73,7 @@ int main(int argc, const char * argv[]) {
   std::cout << "CPU time: " << cpu_duration.count() << " s" << std::endl;
   std::cout << "GPU time: " << gpu_duration.count() << " s" << std::endl;
 
-  slowGroupby.validGPUResult(gpu_output_keys, gpu_output_values, gpu_output_rows);
+  slowGroupby.validGPUResult(gpu_output_keys, gpu_output_values, gpu_output_rows, false);
 
   cudaFreeHost(original_value_columns);
   cudaFreeHost(original_key_columns);

diff --git a/makefile b/makefile
@@ -1,6 +1,5 @@
 # Makefile for GPU GroupBy Project
 # EE-5351 Fall 2018
-dbg = 1
 NVCC        = nvcc
 NVCC_FLAGS  = -I/usr/local/cuda/include -gencode=arch=compute_60,code=\"sm_60\" --relocatable-device-code true
 CXX_FLAGS   = -std=c++11