forked from douglasgscofield/tinyutils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
table
executable file
·75 lines (66 loc) · 2.07 KB
/
table
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/awk -f
#
# Copyright (c) 2012,2014 Douglas G. Scofield, Uppsala University
#
# This code is covered by the Gnu GPLv2, please see LICENSE.
# Bugs and suggestions to https://github.com/douglasgscofield/tinyutils/issues.
#
# Calculates a table of input values placed into bins of unique values. Values may
# be anything, thanks to awk's associative arrays. If round_down is true, then
# input values are assumed to be floats and are *floored down* to the next integer.
# See hist if you want a histogram... this just prints a table of values with no
# particular ordering in the output.
#
# CHANGELOG
# 2014-05-27 : don't quit on missing-column error
# 2013-05-15 : make FS and OFS be "\t" by default
# 2012-12-03 : clean the script up a bit
# 2012-11-27 : create the script
BEGIN {
# parameters
FS = "\t";
OFS = "\t";
header = 0; # does the input have a header? if so how many lines?
print_header = 0; # print a simple "val", "count" header on the output
skip_comment = 1; # skip '#' lines on input
round_down = 0; # if non-zero, assume input is float and round values down
col = 1; # which column do we operate on?
# internal variables
n_a = 0; # number of unique entries in the array
n_total = 0; # number of data points seen
}
function floor(v, ans) {
ans = int(v);
return((ans > v) ? ans - 1 : ans);
}
{
if (header && NR <= header) next;
if (skip_comment && $0 ~ /^#/) next;
if (NF < col) {
print "on input line " NR " missing requested column" > "/dev/stderr";
next;
}
val = $(col);
++n_total;
if (round_down)
val = floor(val + 0.0);
if (! a[val]) { # initialize counts as floats
a[val] = 1.0;
n_a += 1.0;
} else {
a[val] += 1.0;
}
}
END {
if (print_header)
print "val","count";
for (i in a) {
if (round_down) {
printf("%.0f%s%.0f\n", i, OFS, a[i]);
} else {
printf("%s%s%.0f\n", i, OFS, a[i]);
}
}
}