-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_general.sh
executable file
·80 lines (74 loc) · 1.87 KB
/
run_general.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/bin/bash
/bin/hostname -s
export NCCL_BLOCKING_WAIT=1
echo "Num of node, $SLURM_JOB_NUM_NODES"
echo "Num of GPU per node, $NPROC_PER_NODE"
echo "PROCID: $SLURM_PROCID"
echo "LOCALID: $SLURM_LOCALID"
MODEL_DIR=$1
DATA_DIR=$2
TRAIN_FILE=$3
OUT_DIR=$4
EPOCH=$5
BATCH_SIZE=$6
LR=$7
LENGTH=$8
TEMP=$9
DYNAMIC=${10}
SUPERVISE=${11}
MLM=${12}
DROPOUT_POSPAIR=${13}
MLM_WEIGHT=${14}
CLS=${15}
CLS_WEIGHT=${16}
LABEL2IND=${17}
SOFT_SUP_PMI=${18}
WEIGHT_FILE=${19}
SOFT_SUP_CLS=${20}
CLS_SCALE=${21}
DUAL_CLS=${22}
MASK_POSITIVE=${23}
MASK_POSITIVE_LEN=${24}
SELF_SUPERVISE=${25}
echo $CLS_SCALE
echo ${21}
echo ${23}
mkdir $SLURM_TMPDIR/data/
cp -r $DATA_DIR/* $SLURM_TMPDIR/data
python3 -m torch.distributed.launch \
--nproc_per_node=$NPROC_PER_NODE \
--nnodes=$SLURM_JOB_NUM_NODES \
--node_rank=$SLURM_PROCID \
--master_addr="$PARENT" --master_port="$MPORT" \
./train.py \
--model_name_or_path $MODEL_DIR \
--train_file $SLURM_TMPDIR/data/$TRAIN_FILE \
--output_dir $OUT_DIR \
--num_train_epochs $EPOCH \
--do_dynamic_supervise $DYNAMIC \
--per_device_train_batch_size $BATCH_SIZE \
--do_supervise $SUPERVISE \
--do_selfsupervise $SELF_SUPERVISE \
--do_soft_supervise_pmi $SOFT_SUP_PMI \
--supercl_pmiweights_file $WEIGHT_FILE \
--do_soft_supervise_cls $SOFT_SUP_CLS \
--cls_weight_scale $CLS_SCALE \
--dual_training_cls $DUAL_CLS \
--do_mlm $MLM \
--mlm_weight $MLM_WEIGHT \
--do_cls $CLS \
--cls_weight $CLS_WEIGHT \
--do_mask_positive $MASK_POSITIVE \
--mask_positive_len $MASK_POSITIVE_LEN \
--do_pospair_dropout $DROPOUT_POSPAIR \
--learning_rate $LR \
--max_seq_length $LENGTH \
--save_strategy "epoch" \
--load_best_model_at_end \
--pooler_type cls \
--mlp_only_train \
--overwrite_output_dir \
--temp $TEMP \
--do_train \
--fp16 \
--label2ind_file $LABEL2IND