-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_mb.sh
110 lines (92 loc) · 4.37 KB
/
run_mb.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env bash
# Copyright 2023 Yuhao Zhang and Arun Kumar. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
source runner_helper.sh
num_layers=${5:-"3"}
lotan_dataset=${6:-"2"}
aggPushDown=${7:-"1"}
noReverseGraph=${8:-"0"}
ioType=${9:-"1"}
ipcType=${10:-"1"}
E2D=${11:-"0"}
normalize=${12:-"1"}
sparse=${13:-"0"}
# dump=${8:-"0"}
if [ "$lotan_dataset" -eq "1" ]
then
dataset="ogbn-products"
elif [ "$lotan_dataset" -eq "2" ]
then
dataset="ogbn-arxiv"
elif [ "$lotan_dataset" -eq "3" ]
then
dataset="ogbn-papers100M"
fi
echo $OPTIONS
echo $num_layers
echo $dataset
# spark_worker_cores=40
# echo "Overwriting SPARK CONFIGS"
# $PARALLEL_SSH_ALL "cd /mnt/nfs/gsys; \"$DGL_PY\" overwrite_spark_env.py --spark_worker_cores ${spark_worker_cores}"
# echo "Restarting SPARK"
RESTART_SPARK
cd graphp
sbt assembly
cd ..
CMD_STR='spark-submit --conf "spark.executor.extraJavaOptions=-Ddev.ludovic.netlib.blas.nativeLibPath=/usr/lib/x86_64-linux-gnu/mkl/liblapack.so -Ddev.ludovic.netlib.lapack.nativeLibPath=/usr/lib/x86_64-linux-gnu/mkl/liblapack.so" --class "org.apache.spark.graphx.lotan.main.SimpleApp" graphp/target/scala-2.12/simple-project_2.12-1.0.jar'
# -----------------------------------------------------------------------------
epochs=$EPOCHS
export EXP_NAME="spark_dump"
$DGL_PY dgl_to_spark_data.py --self_loop --undirected --dataset ${dataset}
# -------------------------------------MB-------------------------------------
# --lotan_model_batching
options="--io_type byte --hard_partition $OPTIONS"
# MASTER="master"
# -----------------------------------server-----------------------------------
export EXP_NAME="server"
MAKE_CLIENT_LOG_DIR
# some dummy configs, deprecated, true configuration is in gsys/constants.py
lr=0.01
dropout=0.0
optimizer="adam"
$PARALLEL_SSH_ALL "export DGL_PY=$DGL_PY; cd $LOTAN_NFS_ROOT/bin; bash run_server_main.sh \"$options --master ${MASTER_IP} --model_num_layers ${num_layers} --dataset ${dataset} --model_lr ${lr} --model_epochs ${epochs} --save_model_root ${MODEL_DIR} --size ${SIZE} --model_dropout ${dropout} --model_optimizer ${optimizer}\" 2>&1 | tee -a ${LOG_DIR}/${EXP_NAME}/"'$WORKER_NAME.log' &
echo "Use the following command to monitor the DL Engine"
echo "------------------------------------------------------------------------"
echo "tail -f ${LOG_DIR}/${EXP_NAME}/"'$WORKER_NAME.log'
echo "------------------------------------------------------------------------"
# ----------------------------------------------------------------------------
# -----------------------------------graphx-----------------------------------
numMachines=$SIZE
# CPUs=${spark_worker_cores}
export EXP_NAME="graphx"
# --drillDown 1
SPARK_BASE_CMD="$CMD_STR --E2D ${E2D} --numVParts ${numVParts} --ipcType ${ipcType} --ioType ${ioType} --noReverseGraph ${noReverseGraph} --drillDown 1 --fillFeatures 1 --normalize ${normalize} --sparse ${sparse} --run 1 --numMachines ${numMachines} --numEParts ${numEParts} --numEpochs ${epochs} --dataset ${lotan_dataset} --miniBatchSize 500000 --savePlain 0 --aggPushDown ${aggPushDown} --numLayers ${num_layers}"
# ----------------------------------------------------------------------------
# ----------------------------hardPartition-----------------------------------
set +e
$HADOOP_HOME/bin/hdfs dfs -rm -r /edgesRev
$HADOOP_HOME/bin/hdfs dfs -rm -r /edges
$HADOOP_HOME/bin/hdfs dfs -rm -r /vertices
set -e
EXECUTE_CMD="$SPARK_BASE_CMD --hardPartition 1"
RUN_EXP "$EXECUTE_CMD"
echo "Dumped graph to HDFS"
EXECUTE_CMD="$SPARK_BASE_CMD --hardPartitionRead 1"
RUN_EXP "$EXECUTE_CMD"
# ----------------------------------------------------------------------------
set +e
$PARALLEL_SSH_ALL "pkill -f server_main.py; pkill -f pipe.py"
# ----------------------------------------------------------------------------
touch $LOG_DIR/"${OPTIONS}_${numEParts}_${num_layers}"