-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path.my_bashrc
116 lines (111 loc) · 3.29 KB
/
.my_bashrc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/bin/bash
function count()
{
FIELD=$1
FILENAME=$2
awk 'BEGIN{FS="\t";OFS="\t";}
{sum+=$"'$FIELD'"}
END{print sum}' $FILENAME
}
function stat()
{
FIELD=$1
FILENAME=$2
RECORD_NUM=`wc -l ${FILENAME} | awk '{print $1}'`
sort -t$'\t' -k${FIELD},${FIELD}g ${FILENAME} |
awk 'BEGIN{FS="\t";OFS="\t";}
{
if(NR == 1)
MIN = $"'$FIELD'"
else if(NR == int("'${RECORD_NUM}'" * 0.1))
print "10%------>", $"'$FIELD'"
else if(NR == int("'${RECORD_NUM}'" * 0.2))
print "20%------>", $"'$FIELD'"
else if(NR == int("'${RECORD_NUM}'" * 0.25))
percent_25 = $"'$FIELD'"
else if(NR == int("'${RECORD_NUM}'" * 0.3))
print "30%------>", $"'$FIELD'"
else if(NR == int("'${RECORD_NUM}'" * 0.4))
print "40%------>", $"'$FIELD'"
else if(NR == int("'${RECORD_NUM}'" * 0.5)){
percent_50 = $"'$FIELD'"
print "50%------>", $"'$FIELD'"
}
else if(NR == int("'${RECORD_NUM}'" * 0.6))
print "60%------>", $"'$FIELD'"
else if(NR == int("'${RECORD_NUM}'" * 0.7))
print "70%------>", $"'$FIELD'"
else if(NR == int("'${RECORD_NUM}'" * 0.75))
percent_75 = $"'$FIELD'"
else if(NR == int("'${RECORD_NUM}'" * 0.8))
print "80%------>", $"'$FIELD'"
else if(NR == int("'${RECORD_NUM}'" * 0.9))
print "90%------>", $"'$FIELD'"
else if(NR == "'${RECORD_NUM}'"){
print "100%------>", $"'$FIELD'"
MAX = $"'$FIELD'"
}
sum+=$"'$FIELD'"
}
END{print "____________________________________";
print "MIN=", MIN;
print "AVG=", sum/"'${RECORD_NUM}'";
print "MAX=", MAX;
print "25 percentile=", percent_25;
print "50 percentile=", percent_50;
print "75 percentile=", percent_75;
}'
}
function table()
{
FIELD=$1
FILENAME=$2
awk 'BEGIN{FS="\t";OFS="\t";total=0}
{sum[$"'$FIELD'"]++;total++}
END{for(i in sum) print i, sum[i], sum[i]*1.0/total}' $FILENAME |
sort -k2,2n
}
function sample()
{
RATE=$1
FILENAME=$2
FLAG=`expr ${RATE} \< 1`
if [ ${FLAG} -eq 1 ]
then
awk 'BEGIN{FS="\t";OFS="\t"}
{
if(rand()<"'${RATE}'")
print $0 > "'${FILENAME}'"".train"
else
print $0 > "'${FILENAME}'"".test"
}' ${FILENAME}
else
awk 'BEGIN{FS="\t";OFS="\t";sample_number=int("'${RATE}'");}
{
if(NR <= sample_number)
sample_set[NR]=$0
else if(rand() < sample_number * 1.0 / NR)
sample_set[int(rand()*sample_number)+1] = $0
}END{
for(i in sample_set)
print sample_set[i]
}' ${FILENAME}
fi
}
function oversample()
{
FIELD=$1
KEY=$2
MULTIPLE=$3
FILENAME=$4
awk 'BEGIN{FS="\t";OFS="\t"}
{
if($"'${FIELD}'" == "'${KEY}'"){
for(i=1;i<=int("'${MULTIPLE}'");i++)
print $0
}
else{
print $0
}
}' ${FILENAME}
}