|
| 1 | +#!/usr/bin/env k8 |
| 2 | + |
| 3 | +"use strict"; |
| 4 | + |
| 5 | +Array.prototype.delete_at = function(i) { |
| 6 | + for (let j = i; j < this.length - 1; ++j) |
| 7 | + this[j] = this[j + 1]; |
| 8 | + --this.length; |
| 9 | +} |
| 10 | + |
| 11 | +function* getopt(argv, ostr, longopts) { |
| 12 | + if (argv.length == 0) return; |
| 13 | + let pos = 0, cur = 0; |
| 14 | + while (cur < argv.length) { |
| 15 | + let lopt = "", opt = "?", arg = ""; |
| 16 | + while (cur < argv.length) { // skip non-option arguments |
| 17 | + if (argv[cur][0] == "-" && argv[cur].length > 1) { |
| 18 | + if (argv[cur] == "--") cur = argv.length; |
| 19 | + break; |
| 20 | + } else ++cur; |
| 21 | + } |
| 22 | + if (cur == argv.length) break; |
| 23 | + let a = argv[cur]; |
| 24 | + if (a[0] == "-" && a[1] == "-") { // a long option |
| 25 | + pos = -1; |
| 26 | + let c = 0, k = -1, tmp = "", o; |
| 27 | + const pos_eq = a.indexOf("="); |
| 28 | + if (pos_eq > 0) { |
| 29 | + o = a.substring(2, pos_eq); |
| 30 | + arg = a.substring(pos_eq + 1); |
| 31 | + } else o = a.substring(2); |
| 32 | + for (let i = 0; i < longopts.length; ++i) { |
| 33 | + let y = longopts[i]; |
| 34 | + if (y[y.length - 1] == "=") y = y.substring(0, y.length - 1); |
| 35 | + if (o.length <= y.length && o == y.substring(0, o.length)) { |
| 36 | + k = i, tmp = y; |
| 37 | + ++c; // c is the number of matches |
| 38 | + if (o == y) { // exact match |
| 39 | + c = 1; |
| 40 | + break; |
| 41 | + } |
| 42 | + } |
| 43 | + } |
| 44 | + if (c == 1) { // find a unique match |
| 45 | + lopt = tmp; |
| 46 | + if (pos_eq < 0 && longopts[k][longopts[k].length-1] == "=" && cur + 1 < argv.length) { |
| 47 | + arg = argv[cur+1]; |
| 48 | + argv.delete_at(cur + 1); |
| 49 | + } |
| 50 | + } |
| 51 | + } else { // a short option |
| 52 | + if (pos == 0) pos = 1; |
| 53 | + opt = a[pos++]; |
| 54 | + let k = ostr.indexOf(opt); |
| 55 | + if (k < 0) { |
| 56 | + opt = "?"; |
| 57 | + } else if (k + 1 < ostr.length && ostr[k+1] == ":") { // requiring an argument |
| 58 | + if (pos >= a.length) { |
| 59 | + arg = argv[cur+1]; |
| 60 | + argv.delete_at(cur + 1); |
| 61 | + } else arg = a.substring(pos); |
| 62 | + pos = -1; |
| 63 | + } |
| 64 | + } |
| 65 | + if (pos < 0 || pos >= argv[cur].length) { |
| 66 | + argv.delete_at(cur); |
| 67 | + pos = 0; |
| 68 | + } |
| 69 | + if (lopt != "") yield { opt: `--${lopt}`, arg: arg }; |
| 70 | + else if (opt != "?") yield { opt: `-${opt}`, arg: arg }; |
| 71 | + else yield { opt: "?", arg: "" }; |
| 72 | + } |
| 73 | +} |
| 74 | + |
| 75 | +function* k8_readline(fn) { |
| 76 | + let buf = new Bytes(); |
| 77 | + let file = new File(fn); |
| 78 | + while (file.readline(buf) >= 0) { |
| 79 | + yield buf.toString(); |
| 80 | + } |
| 81 | + file.close(); |
| 82 | + buf.destroy(); |
| 83 | +} |
| 84 | + |
| 85 | +function main(args) { |
| 86 | + let opt = { min_cov:.9, max_dv:.01 }; |
| 87 | + for (const o of getopt(args, "c:d:", [])) { |
| 88 | + if (o.opt == '-c') opt.min_cov = parseFloat(o.arg); |
| 89 | + else if (o.opt == '-d') opt.max_dv = parseFloat(o.arg); |
| 90 | + } |
| 91 | + if (args.length == 0) { |
| 92 | + print("Usage: pafcluster.js [options] <ava.paf>"); |
| 93 | + print("Options:"); |
| 94 | + print(` -c FLOAT min coverage [${opt.min_cov}]`); |
| 95 | + print(` -d FLOAT max divergence [${opt.max_dv}]`); |
| 96 | + return; |
| 97 | + } |
| 98 | + |
| 99 | + // read |
| 100 | + let a = [], len = {}; |
| 101 | + for (const line of k8_readline(args[0])) { |
| 102 | + let m, t = line.split("\t"); |
| 103 | + if (t[4] != "+") continue; |
| 104 | + const len1 = parseInt(t[1]), len2 = parseInt(t[6]); |
| 105 | + let s1 = -1, dv = -1.0; |
| 106 | + for (let i = 12; i < t.length; ++i) { |
| 107 | + if ((m = /^(s1|dv):\S:(\S+)/.exec(t[i])) != null) { |
| 108 | + if (m[1] == "s1") s1 = parseInt(m[2]); |
| 109 | + else if (m[1] == "dv") dv = parseFloat(m[2]); |
| 110 | + } |
| 111 | + } |
| 112 | + if (s1 < 0 || dv < 0) continue; |
| 113 | + const cov1 = (parseInt(t[3]) - parseInt(t[2])) / len1; |
| 114 | + const cov2 = (parseInt(t[8]) - parseInt(t[7])) / len2; |
| 115 | + const min_cov = cov1 < cov2? cov1 : cov2; |
| 116 | + const max_cov = cov1 > cov2? cov1 : cov2; |
| 117 | + a.push({ name1:t[0], name2:t[5], len1:len1, len2:len2, min_cov:min_cov, max_cov:max_cov, s1:s1, dv:dv }); |
| 118 | + len[t[0]] = len1, len[t[5]] = len2; |
| 119 | + } |
| 120 | + warn(`Read ${a.length} hits`); |
| 121 | + |
| 122 | + // filter duplicated hits |
| 123 | + let pair = {}, n_flt = 0, b = []; |
| 124 | + a.sort(function(x, y) { return y.s1 - x.s1 }); |
| 125 | + for (let i = 0; i < a.length; ++i) { |
| 126 | + const key = `${a[i].name1}\t${a[i].name2}`; |
| 127 | + if (pair[key] != null) { |
| 128 | + ++n_flt; |
| 129 | + continue; |
| 130 | + } |
| 131 | + pair[key] = 1; |
| 132 | + b.push(a[i]); |
| 133 | + } |
| 134 | + pair = null; |
| 135 | + warn(`Filtered ${n_flt} hits`); |
| 136 | + a = b; |
| 137 | + |
| 138 | + // core loop |
| 139 | + while (a.length > 1) { |
| 140 | + // select the sequence with the highest sum of s1 |
| 141 | + let h = {}; |
| 142 | + for (let i = 0; i < a.length; ++i) { |
| 143 | + if (h[a[i].name1] == null) h[a[i].name1] = 0; |
| 144 | + h[a[i].name1] += a[i].s1; |
| 145 | + } |
| 146 | + let max_s1 = 0, max_name = ""; |
| 147 | + for (const name in h) |
| 148 | + if (max_s1 < h[name]) |
| 149 | + max_s1 = h[name], max_name = name; |
| 150 | + // find contigs in the same group |
| 151 | + h = {}; |
| 152 | + h[max_name] = 1; |
| 153 | + for (let i = 0; i < a.length; ++i) { |
| 154 | + if (a[i].name1 != max_name && a[i].name2 != max_name) |
| 155 | + continue; |
| 156 | + if (a[i].min_cov >= opt.min_cov && a[i].dv <= opt.max_dv) |
| 157 | + h[a[i].name1] = h[a[i].name2] = 1; |
| 158 | + } |
| 159 | + let n = 0; |
| 160 | + for (const key in h) ++n; |
| 161 | + print(`SD\t${max_name}\t${n}`); |
| 162 | + for (const key in h) print(`CL\t${key}\t${len[key]}`); |
| 163 | + print("//"); |
| 164 | + // filter out redundant hits |
| 165 | + let b = []; |
| 166 | + for (let i = 0; i < a.length; ++i) |
| 167 | + if (h[a[i].name1] == null && h[a[i].name2] == null) |
| 168 | + b.push(a[i]); |
| 169 | + warn(`Reduced the number of hits from ${a.length} to ${b.length}`); |
| 170 | + a = b; |
| 171 | + } |
| 172 | +} |
| 173 | + |
| 174 | +main(arguments); |
0 commit comments