forked from rkh/hadoop-scripting
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmarkov.sh
executable file
·49 lines (41 loc) · 1.12 KB
/
markov.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/bin/bash
SIZES="10 50 100 500 1000 2000 3000 4000 5000 3000 4000"
function time_that() {
rm $HOME/logs/$3_$1_mb_$2_run
/usr/bin/time -a -o $HOME/logs/$3_$1_mb_$2_run $4 1>>$HOME/logs/$3_$1_mb_$2_run 2>>$HOME/logs/$3_$1_mb_$2_run
}
function run_hadoop() {
echo "Running Hadoop on $1 MB in run no $2"
hadoop fs -rmr javachains
time_that $1 $2 "java" "hadoop jar /home/hadoop02/hadoop02.jar"
}
function run_pig() {
echo "Running Pig on $1 MB in run no $2"
hadoop fs -rmr wikipedia.sql
current_wd="$(pwd)"
cd /home/hadoop02/pig
time_that $1 $2 "pig" "pig /home/hadoop02/hadoop-scripting/pig/splitsuc/splitsuc.pig"
cd "$current_wd"
}
function run_jaql() {
echo "Running Jaql on $1 MB in run no $2"
}
for i in $SIZES; do
dd if=corpera/full.out of=current.dat bs=1M count=$i
hadoop fs -rm corpera/current.dat
hadoop fs -put current.dat corpera/current.dat
if [ $SIZE -eq 3000 ]; then
for j in `seq 3 4`; do
run_hadoop $i $j
run_pig $i $j
#run_jaql $i $j
done
else
for j in `seq 1 4`; do
run_hadoop $i $j
run_pig $i $j
#run_jaql $i $j
done
fi
done
done