-
Notifications
You must be signed in to change notification settings - Fork 10
FBU Queries (PDSW19)
KDahlgren edited this page Aug 12, 2019
·
8 revisions
-
Clone and build skyhook.
-
cd build/
-
Get the data sets.
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity3_50000_rows.txt ;
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity3_500000_rows.txt ;
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity3_5000000_rows.txt ;
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity4_42000_rows.txt ;
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity4_420000_rows.txt ;
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity4_4200000_rows.txt ;
- (Re)launch the virtual cluster, if applicable.
../src/stop.sh;
make -j12 vstart;
../src/stop.sh;
../src/vstart.sh -d -n -x;
bin/rados mkpool tpchflatbuf ;
bin/ceph osd pool set tpchflatbuf size 1 ;
- Write a data set into a ceph object. See the following syntax.
parameter | description |
---|---|
--filename | the name of the file containing the pipe-delimited relation to save in ceph |
--write_type | rows or cols? |
--debug | yes or no? |
--schema_datatypes | comma-delimited list of data types. must match table schema |
--schema_attnames | comma-delimited list of attribute names |
--table_name | name you want to give the table |
--nrows | number of rows in the input file |
--ncols | number of columns in the input file |
--targetoid | name of the ceph object this is going to occupy |
--targetpool | name of the ceph pool to use |
--writeto | ceph or disk? |
--targetformat | SFT_FLATBUF_UNION_ROW or SFT_FLATBUF_UNION_COL? |
# FBU_Rows arity-3 1mb (50,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_50000_rows.txt --write_type rows --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_1mb --nrows 50000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --targetformat SFT_FLATBUF_UNION_ROW ;
# FBU_Rows arity-3 1mb (500,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_500000_rows.txt --write_type rows --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_100mb --nrows 500000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --targetformat SFT_FLATBUF_UNION_ROW ;
# FBU_Rows arity-3 1mb (5,000,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_500000_rows.txt --write_type rows --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_100mb --nrows 500000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --targetformat SFT_FLATBUF_UNION_ROW ;
# FBU_Cols arity-3 1mb (50,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_50000_rows.txt --write_type cols --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_1mb --nrows 50000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --cols_per_fb 1 --targetformat SFT_FLATBUF_UNION_COL ;
# FBU_Cols arity-3 1mb (500,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_500000_rows.txt --write_type cols --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_1mb --nrows 500000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --cols_per_fb 1 --targetformat SFT_FLATBUF_UNION_COL ;
# FBU_Cols arity-3 1mb (5,000,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_5000000_rows.txt --write_type cols --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_1mb --nrows 5000000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --cols_per_fb 1 --targetformat SFT_FLATBUF_UNION_COL ;
- Run your queries. Note Skyhook currently only queries objects with the "obj." suffix.
bin/run-query --num-objs 1 --pool tpchflatbuf --wthreads 1 --qdepth 10 --query flatbuf --select "*" ;
bin/run-query --num-objs 1 --pool tpchflatbuf --wthreads 1 --qdepth 10 --query flatbuf --select "att0,lt,25;att1,lt,25.0;" --project-cols att0,att1,att2,att3 --data-schema "0 8 0 0 ATT0 ; 1 12 0 0 ATT1 ; 2 15 0 0 ATT2 ;" ;
bin/run-query --num-objs 1 --pool tpchflatbuf --wthreads 1 --qdepth 10 --query flatbuf --select "att0,lt,25;att1,lt,25.0;" --project-cols att0,att2 --data-schema "0 8 0 0 ATT0 ; 1 12 0 0 ATT1 ; 2 15 0 0 ATT2 ;" ;
bin/run-query --num-objs 1 --pool tpchflatbuf --wthreads 1 --qdepth 10 --query flatbuf --select-preds ";att0,sum,0;" --table-name "atable" --data-schema "0 8 0 0 ATT0 ; 1 12 0 0 ATT1 ; 2 15 0 0 ATT2 ;" ;
Concepts
> Architecture
> Data formats
> > Flatbuffers
> > Arrow
> Test Integration
Tutorials
> Build
> Dev/Test environment
> Run test queries
> Ceph-SkyhookDM cluster setup
Technical Reports
> Google Summer of Code 2019 Report
> Google Summer of Code 2020 Report
> Flatbuffers and Flexbuffers access experiments
Archives
> CloudLab Ceph Deployment Notes
> Deploy Notes
> Running CloudLab Experiments
> Installing a Non Release Fork on CloudLab
> Installing with Skyhook-Ansible
> FBU Queries (PDSW19)
> Paper Experiments
> Skyhook Development on CloudLab
> Building Deb Files for Cloudlab Installs (Sp2019)
> CephFS FUSE on CloudLab