Skip to content

Commit

Permalink
add user set hash table sizeAdd hash table profiling
Browse files Browse the repository at this point in the history
  • Loading branch information
chrispypatt committed Apr 30, 2019
1 parent 3693bed commit 0c31060
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 11 deletions.
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,19 @@ To build the program, type 'make' in the root folder of the files.
Command line usage for sorting based GroupBy:
```
make
./groupby # Data Entries: 100k, key_columns: 2, row_columns: 3, unique keys per column: 4
./groupby <num_rows> # Data Entries: num_rows, key_columns: 2, row_columns: 3, unique keys per column: 4
./groupby <num_rows> <key_cols> <val_cols> # Data Entries: num_rows, key_columns: key_cols, row_columns: val_cols, unique keys per column: 4
./groupby # Data Entries: 100k, key_columns: 2, row_columns: 3, unique keys: 4
./groupby <num_rows> # Data Entries: num_rows, key_columns: 2, row_columns: 3, unique keys: 4
./groupby <num_rows> <key_cols> <val_cols> # Data Entries: num_rows, key_columns: key_cols, row_columns: val_cols, unique keys: 4
./groupby <num_rows> <key_cols> <val_cols> <distinct_keys> # Data Entries: num_rows, key_columns: key_cols, row_columns: val_cols, unique keys : distinct_keys
```
Notice: If the number of distinct keys in each column is m, n key_columns will generate m^n distinct keys.
Command line usage for hashed based GroupBy:
```
make groupby_hash
./groupby_hash # Data Entries: 100k, key_columns: 2, row_columns: 3, unique keys per column: 4
./groupby_hash <num_rows> # Data Entries: num_rows, key_columns: 2, row_columns: 3, unique keys per column: 4
./groupby_hash <num_rows> <key_cols> <val_cols> # Data Entries: num_rows, key_columns: key_cols, row_columns: val_cols, unique keys per column: 4
./groupby_hash <num_rows> <key_cols> <val_cols> <distinct_keys> # Data Entries: num_rows, key_columns: key_cols, row_columns: val_cols, unique keys : distinct_keys
./groupby_hash # Data Entries: 100k, key_columns: 2, row_columns: 3, unique keys: 4
./groupby_hash <num_rows> # Data Entries: num_rows, key_columns: 2, row_columns: 3, unique keys: 4
./groupby_hash <num_rows> <key_cols> <val_cols> # Data Entries: num_rows, key_columns: key_cols, row_columns: val_cols, unique keys: 4
./groupby_hash <num_rows> <key_cols> <val_cols> <distinct_keys> <hash_table_size> # Data Entries: num_rows, key_columns: key_cols, row_columns: val_cols, unique keys : distinct_keys, hashtable rows: hash_table_size
```

The program will populate random data, compute on CPU and GPU then validate the results.
Expand Down
4 changes: 2 additions & 2 deletions groupby_hash.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ __constant__ reductionType ops_c[512];
#include "groupby_hash_templates.cu"


void groupby_hash_GPU(const int* key_columns_h, int num_key_columns, int num_key_rows,
void groupby_hash_GPU(const int hash_size, const int* key_columns_h, int num_key_columns, int num_key_rows,
const int* value_columns_h, int num_value_columns, int num_value_rows,
reductionType* ops, int num_ops, int* output_keys, int* output_values, int &num_output_rows)
{
constexpr unsigned int BLOCKDIM = 1024;
constexpr unsigned int HASH_TABLE_SIZE = 1003;
unsigned int HASH_TABLE_SIZE = hash_size;
constexpr unsigned int GRIDDIM = 40; // 40 as GTX 1080 only have 20 SMs and it can schedule 2048 threads
// change to 56*2 = 112 if testing on Tesla P100

Expand Down
2 changes: 1 addition & 1 deletion groupby_hash.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t
}
}

void groupby_hash_GPU(const int* key_columns_h, int num_key_columns, int num_key_rows,
void groupby_hash_GPU(const int hash_size, const int* key_columns_h, int num_key_columns, int num_key_rows,
const int* value_columns_h, int num_value_columns, int num_value_rows,
reductionType* ops, int num_ops, int* output_keys, int* output_values, int &num_output_rows);

Expand Down
45 changes: 45 additions & 0 deletions hash_size_profiling.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
CPU : 0.251699 s
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 100, hash table rows 133
GPU : 0.0164055 s
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 100, hash table rows 1003
GPU : 0.0164877 s
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 100, hash table rows 10003
GPU : 0.0168231 s
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 100, hash table rows 100003
GPU : 0.016795 s

CPU : 0.281081 s
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 1000, hash table rows 1003
GPU : 0.0340608 s
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 1000, hash table rows 1333
GPU : 0.0169413 s
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 1000, hash table rows 10003
GPU : 0.016829 s
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 1000, hash table rows 100003
GPU : 0.0164798 s

CPU : 0.402252 s
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 10000, hash table rows 1003
GPU : NA
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 10000, hash table rows 10003
GPU : 0.068726 s
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 10000, hash table rows 13333
GPU : 0.0172377 s
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 10000, hash table rows 100003
GPU : 0.0167698 s
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 10000, hash table rows 1000003
GPU : 0.0194097 s

CPU : 1.58248 s
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 100000, hash table rows 1003
GPU : NA
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 100000, hash table rows 10003
GPU : NA
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 100000, hash table rows 100003
GPU : 0.401156 s
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 100000, hash table rows 133333
GPU : 0.0201403 s
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 100000, hash table rows 1000003
GPU : 0.0230148 s
Data Entries: 1M, key_columns: 2, row_columns: 3, unique keys: 100000, hash table rows 10000003
GPU : 0.0336232 s
9 changes: 8 additions & 1 deletion main_hash.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ int main(int argc, const char * argv[]) {
int num_key_cols = 2;
int num_val_cols = 3;
int num_distinct_keys = 10;
int hash_table_rows = 1003;
std::vector<std::string> args(argv, argv+argc);
if (argc == 2){
num_rows = stoi(args.at(1));
Expand All @@ -26,6 +27,12 @@ int main(int argc, const char * argv[]) {
num_key_cols = stoi(args.at(2));
num_val_cols = stoi(args.at(3));
num_distinct_keys = stoi(args.at(4));
}else if(argc == 6){
num_rows = stoi(args.at(1));
num_key_cols = stoi(args.at(2));
num_val_cols = stoi(args.at(3));
num_distinct_keys = stoi(args.at(4));
hash_table_rows = stoi(args.at(5));
} else {
if (argc != 1) {
std::cerr << "Invalid arguments" << std::endl;
Expand Down Expand Up @@ -59,7 +66,7 @@ int main(int argc, const char * argv[]) {

start = Time::now();

groupby_hash_GPU(original_key_columns, slowGroupby.num_key_columns,
groupby_hash_GPU(hash_table_rows,original_key_columns, slowGroupby.num_key_columns,
slowGroupby.num_key_rows, original_value_columns,
slowGroupby.num_value_columns, slowGroupby.num_value_rows,
slowGroupby.ops, slowGroupby.num_ops,
Expand Down

0 comments on commit 0c31060

Please sign in to comment.