-
Notifications
You must be signed in to change notification settings - Fork 0
/
taxo-browser
executable file
·233 lines (214 loc) · 6.82 KB
/
taxo-browser
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
#!/bin/sh
#
# taxo-browser - Command line taxonomy browser
# Copyright (C) 2017 Marco van Zwetselaar <[email protected]>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Home: http://github.com/zwets/taxo
PRETTY_PRINTFMT='%7d\040%-16s\040%s\n' # The \040 are escaped spaces
TABSEP_PRINTFMT='%s\t%s\t%s\n'
# Exit this script with an error message on stderr
err_exit() {
echo "$(basename "$0"): $*" >&2
exit 1
}
# Show usage information and exit
usage_exit() {
echo "
Usage: $(basename $0) [OPTIONS]
Browse a local copy of the NCBI taxonomy database.
OPTIONS
-t, --tabs Use TAB rather than fancily aligned output
-q, --quiet Do not show prompts and other informational output
-h, --help This information
For instructions, type '?' at the taxo-browser command prompt.
This tool uses at its backend the taxo-db tool which keeps a SQLite3
database caching the information from the NCBI taxdump archive.
" >&2
exit ${1:-1}
}
# Parse options
PRINTFMT="$PRETTY_PRINTFMT"
QUIET=0
while [ $# -ne 0 -a "$(expr "$1" : '\(.\).*')" = "-" ]; do
case $1 in
-q|--quiet)
QUIET=1
;;
-t|--tabs)
PRINTFMT="$TABSEP_PRINTFMT"
;;
-h|--help)
usage_exit 0
;;
*) usage_exit
;;
esac
shift
done
[ $# -eq 0 ] || usage_exit
# Locate the taxo-db backend
TAXO_DB="$(which taxo-db 2>/dev/null)"
[ -n "$TAXO_DB" ] || TAXO_DB="$(dirname "$0")/taxo-db"
[ -x "$TAXO_DB" ] || err_exit "cannot find taxo-db"
# Check whether the taxo-db backend can do regexp search
NO_REGEXP=0
$TAXO_DB -r </dev/null >/dev/null 2>&1 || NO_REGEXP=1
# Enter interactive mode with awk processing commands.
# Note that the SELECT statements to taxo-db all terminate with "SELECT 0".
# We need this "Done" marker because without output rows, getline blocks.
awk -b -O -v QUIET=$QUIET -v TAXO_DB="$TAXO_DB" -v NO_REGEXP="$NO_REGEXP" -e '
BEGIN {
FS="\t"
if (!NO_REGEXP) TAXO_RE = TAXO_DB " -r"
if (!QUIET) print "Welcome to taxo-browser. Press 'h' for help or 'q' to quit."
set_cur(1)
prompt()
}
END {
if (!QUIET) print "Bye."
close(TAXO_DB)
close(TAXO_RE)
}
function set_cur(i) {
CUR = i
CUR_NAME = name(i)
}
function print_rows() {
TAXO_DB |& getline
while ($1 != 0) {
printf "'$PRINTFMT'", $1, $2, $3
TAXO_DB |& getline
}
}
function parent_id(i) {
printf "SELECT parent_id FROM SciNamesView WHERE tax_id = %d;\n", i |& TAXO_DB
TAXO_DB |& getline
return $1
}
function valid_id(i) {
printf "SELECT COUNT(*) FROM SciNamesView WHERE tax_id = %d;\n", i |& TAXO_DB
return TAXO_DB |& getline && $0 == 1
}
function name(i) {
printf "SELECT sci_name FROM SciNamesView WHERE tax_id = %d;\n", i |& TAXO_DB
TAXO_DB |& getline
return $1
}
function show(i) {
printf "SELECT tax_id, rank_name, sci_name FROM SciNamesView WHERE tax_id = %d; SELECT 0;\n", i |& TAXO_DB
print_rows()
}
function jump(new) {
if (valid_id(new)) {
set_cur(new)
show(CUR)
}
else {
print "taxonomy ID not found: " new
}
}
function up() {
if (CUR != 1) set_cur(parent_id(CUR))
show(CUR)
}
function parent() {
if (CUR != 1) show(parent_id(CUR))
}
function recurse_ancestors(i) {
if (i != 1) {
recurse_ancestors(parent_id(i))
show(i)
}
}
function ancestry() {
recurse_ancestors(CUR)
}
function children() {
printf "SELECT tax_id, rank_name, sci_name FROM SciNamesView WHERE parent_id = %d AND tax_id != 1; SELECT 0;\n", CUR |& TAXO_DB
print_rows()
}
function siblings( p) { # declare p local
if (CUR != 1) {
p = parent_id(CUR)
printf "SELECT tax_id, rank_name, sci_name FROM SciNamesView WHERE parent_id = %d AND tax_id != %d; SELECT 0;\n", p, CUR |& TAXO_DB
print_rows()
}
}
function descendants_of(p, a,i) {
printf "SELECT tax_id, rank_name, sci_name FROM SciNamesView WHERE parent_id = %d AND tax_id != 1; SELECT 0;\n", p |& TAXO_DB
i = 0
TAXO_DB |& getline
while ($1 != 0) {
a[++i] = $1
printf "'$PRINTFMT'", $1, $2, $3
TAXO_DB |& getline
}
for (i in a) descendants_of(a[i]);
}
function descendants() {
descendants_of(CUR);
}
function like_search(l) {
printf "SELECT tax_id, rank_name, sci_name FROM SciNamesView WHERE sci_name LIKE \"%%%s%%\" AND tax_id != 1; SELECT 0;\n", l |& TAXO_DB
print_rows()
}
function regex_search(r) {
if (NO_REGEXP) {
printf "Regular expression search unavailable, SQLite3 lacks the REGEXP extension"
}
else {
printf "SELECT tax_id, rank_name, sci_name FROM SciNamesView WHERE sci_name REGEXP \"%s\" AND tax_id != 1; SELECT 0;\n", r |& TAXO_RE
TAXO_RE |& getline
while ($1 != 0) {
printf "'$PRINTFMT'", $1, $2, $3
TAXO_RE |& getline
}
}
}
function help() {
print "taxo - Navigate the NCBI taxonomy"
print ""
print "Commands:"
print "- ENTER key displays current node ID, rank and name"
print "- NUMBER jump to node with taxid NUMBER"
print "- %TEXT search for nodes whose name contains TEXT"
print "- /REGEX search for nodes whose name matches REGEX"
print "- u(p) move current node pointer to parent node"
print "- p(arent) show parent but do not move current node pointer there"
print "- a(ncestors) show lineage of current node all the way up to root"
print "- s(iblings) show all siblings of the current node"
print "- c(hildren) show all children of the current node"
print "- D(escendants) show all descendants of the current node"
print "- q(uit) or ^D leave"
}
function prompt() {
if (!QUIET) printf "\n(" CUR_NAME ") ? "
}
# Repeat for every line
/^[h\?](elp)?$/ { help() }
/^[0-9]+$/ { jump($0) }
/^$/ { show(CUR) }
/^\/.+/ { regex_search(substr($0,2,length($0)-1)) }
/^%.+/ { like_search(substr($0,2,length($0)-1)) }
/^up?$/ { up() }
/^p(arent)?$/ { parent() }
/^a(ncest(ry|ors))?$/ { ancestry() }
/^s(ib(ling)?s)?$/ { siblings() }
/^c(hildren)?$/ { children() }
/^D(escendants)?$/ { descendants() }
/^q(uit)?$/ { exit 0 }
{ prompt() }'
# vim: sts=4:sw=4:ai:si:et