diff --git a/Changelog b/Changelog index c313fa17..dd993e22 100644 --- a/Changelog +++ b/Changelog @@ -1,3 +1,8 @@ +2018-01-09 Youri Hoogstrate v0.15.0 + * Bugfix resulting in higher number of detected frame shifts + * `dr-disco integrate --fasta ` provides edit distance to + canonical splice junction motif (quick impementation) + 2017-12-20 Youri Hoogstrate v0.14.6 * New improvement to entropy filter diff --git a/bin/dr-disco b/bin/dr-disco index 36ced9a2..5a6a3b81 100755 --- a/bin/dr-disco +++ b/bin/dr-disco @@ -140,12 +140,14 @@ def CLI_classify(table_input_file, table_output_file, only_valid, blacklist_regi @click.argument('table_input_file', type=click.Path(exists=True)) @click.argument('table_output_file') @click.option('--gtf', help="Use gene annotation (GTF file)") -def CLI_integrate(table_input_file, table_output_file, gtf): +@click.option('--fasta', help="Use FASTA sequence file to estimate edit distances to splice junction motifs") +def CLI_integrate(table_input_file, table_output_file, gtf, fasta): cl = DetectOutput(table_input_file) - if gtf: - cl.integrate(table_output_file, str(gtf)) - else: - cl.integrate(table_output_file, None) + + gtf = str(gtf) if gtf else None + fasta = str(fasta) if fasta else None + + cl.integrate(table_output_file, gtf, fasta) if __name__ == '__main__': diff --git a/drdisco/DetectFrameShifts.py b/drdisco/DetectFrameShifts.py index c52b2a02..5fccd089 100644 --- a/drdisco/DetectFrameShifts.py +++ b/drdisco/DetectFrameShifts.py @@ -189,7 +189,13 @@ def evaluate(self, _from, _to, offset): """ Offset may be convenient because STAR sometimes has problems aligning/clipping the first 2 bases after an exon Values of 4 and larger do not make sense. + + Args: + _from ([chr, pos, strand]): donor break position + _to ([chr, pos, strand]): acceptor position position + """ + from_l_fgd = [] to_l_fgd = [] diff --git a/drdisco/DetectOutput.py b/drdisco/DetectOutput.py index 71315fca..b6c8e68e 100644 --- a/drdisco/DetectOutput.py +++ b/drdisco/DetectOutput.py @@ -6,8 +6,10 @@ from drdisco import log from drdisco.DetectFrameShifts import DetectFrameShifts +from drdisco.utils import reverse_complement, is_gzip import gzip import HTSeq +from pyfaidx import Fasta """[License: GNU General Public License v3 (GPLv3)] @@ -37,18 +39,9 @@ """ -def is_gzip(filename): - try: - f = gzip.GzipFile(filename, 'rb') - f.read() - return True - except Exception: - return False - - class DetectOutputEntry: def __init__(self, line_in_results_file): - self.line = line_in_results_file.strip().split("\t") + self.line = line_in_results_file.strip("\r\n").split("\t") self.parse() def parse(self): @@ -130,13 +123,15 @@ def parse(self): self.break_A_max_AS = int(self.line[44]) self.break_B_max_AS = int(self.line[45]) + self.edit_dist_to_splice_motif = "" + self.structure = self.line[46] inv = {'-': '+', '+': '-'} if self.acceptorA > self.donorA: self.RNAstrandA = self.strandA self.RNAstrandB = inv[self.strandB] - elif self.donorA < self.acceptorA: + elif self.acceptorA < self.donorA: self.RNAstrandA = inv[self.strandA] self.RNAstrandB = self.strandB else: @@ -151,14 +146,15 @@ def parse(self): def get_donors_acceptors(self, gtf_file): idx = {} for a in self.structure.split('&'): - for b in a.split(':', 3)[3].strip('()').split(','): - c = b.split(':') - c[0] = c[0].replace('_1', '_[12]').replace('_2', '_[12]') - if c[0] != 'discordant_mates': - if c[0] not in idx: - idx[c[0]] = 0 + if a != '': + for b in a.split(':', 3)[3].strip('()').split(','): + c = b.split(':') + c[0] = c[0].replace('_1', '_[12]').replace('_2', '_[12]') + if c[0] != 'discordant_mates': + if c[0] not in idx: + idx[c[0]] = 0 - idx[c[0]] += int(c[1]) + idx[c[0]] += int(c[1]) def pos_to_gene_str(pos_chr, pos_pos): if pos_chr[0:3] == 'chr': @@ -188,6 +184,66 @@ def pos_to_gene_str(pos_chr, pos_pos): else: return genesB + '<->' + genesA + def is_on_splice_junction_motif(self, fasta_fh): + """ + +motif: + +5' exon: + +[ ...{AC}{A}{G} ] {G}{T}{AG}{A}{G}{T} . . . {C}{A}{G} [ {G}... ] + + """ + + pos5_in_exon_length = 3 + pos5_post_exon_length = 6 + + pos3_pre_exon_length = 3 + pos3_in_exon_length = 1 + + if self.donorA > self.donorB: + pos5p = [self.chrA, self.posA, self.strandA] + pos3p = [self.chrB, self.posB, self.strandB] + elif self.donorA < self.donorB: + pos5p = [self.chrB, self.posB, self.strandB] + pos3p = [self.chrA, self.posA, self.strandA] + else: + pos5p = None + + if pos5p: + if pos5p[2] == '-': + seq_in_5p_exon = str(fasta_fh[pos5p[0]][pos5p[1] - pos5_in_exon_length:pos5p[1]]).upper() + seq_post_5p_exon = str(fasta_fh[pos5p[0]][pos5p[1]:pos5p[1] + pos5_post_exon_length]).upper() + else: + seq_in_5p_exon = reverse_complement(str(fasta_fh[pos5p[0]][pos5p[1]:pos5p[1] + pos5_in_exon_length])) + seq_post_5p_exon = reverse_complement(str(fasta_fh[pos5p[0]][pos5p[1] - pos5_post_exon_length:pos5p[1]])) + + if pos3p[2] == '+': + seq_pre_3p_exon = str(fasta_fh[pos3p[0]][pos3p[1] - pos3_pre_exon_length:pos3p[1]]).upper() + seq_in_3p_exon = str(fasta_fh[pos3p[0]][pos3p[1]:pos3p[1] + pos3_in_exon_length]).upper() + else: + seq_in_3p_exon = reverse_complement(str(fasta_fh[pos3p[0]][pos3p[1] - pos3_in_exon_length:pos3p[1]])) + seq_pre_3p_exon = reverse_complement(str(fasta_fh[pos3p[0]][pos3p[1]:pos3p[1] + pos3_pre_exon_length])) + + def calc_dist(pat, subseq): + d = 0 + + if len(pat) != len(subseq): + raise Exception("invalid pattern size") + for i in range(len(pat)): + if subseq[i] not in pat[i]: + d += 1 + + return d + + dist = calc_dist(["AC", "A", "G"], seq_in_5p_exon) + calc_dist(["G", "T", "AG", "A", "G", "T"], seq_post_5p_exon) + calc_dist(["C", "A", "G"], seq_pre_3p_exon) + calc_dist(["G"], seq_in_3p_exon) + # print "[ ... " + seq_in_5p_exon + " ] " + seq_post_5p_exon + " ... ... " + seq_pre_3p_exon + " [ " + seq_in_3p_exon + " ... ] ---> " + str(dist) + self.edit_dist_to_splice_motif = str(dist) + + return dist + else: + return "" + def __str__(self): line = self.line line[11] = self.status @@ -382,17 +438,17 @@ def classify_intronic_exonic(): log.info("Classified " + str(k) + "/" + str(n) + " as valid") - def integrate(self, output_table, gtf_file): - def insert_in_index(index, entries, score): + def integrate(self, output_table, gtf_file, fasta_file): + def insert_in_index(index, entries, score, i): if score not in index: index[score] = {} - key = entries[0].chrA + ':' + str(entries[0].posA) + '(' + entries[0].strandA + ')-' + entries[0].chrB + ':' + str(entries[0].posB) + '(' + entries[0].strandB + ')' + key = entries[0].chrA + ':' + str(entries[0].posA) + '(' + entries[0].strandA + ')-' + entries[0].chrB + ':' + str(entries[0].posB) + '(' + entries[0].strandB + ')_' + str(i) index[score][key] = entries with open(output_table, 'w') as fh_out: header = self.header.split("\t") - header = "\t".join(header[:-5] + ['full-gene-dysregulation', 'frameshift=0', 'frameshift=+1', 'frameshift=+2'] + header[-5:]) + header = "\t".join(header[:-5] + ['full-gene-dysregulation', 'frameshift=0', 'frameshift=+1', 'frameshift=+2', 'splice-motif-edit-distance'] + header[-5:]) fh_out.write("shared-id\tfusion\t" + header) @@ -403,6 +459,8 @@ def insert_in_index(index, entries, score): gene_annotation = GeneAnnotation(gtf_file) dfs = DetectFrameShifts(gtf_file) if gtf_file else None + ffs = Fasta(fasta_file) if fasta_file else None + intronic_linear = [] remainder = [] @@ -425,30 +483,34 @@ def insert_in_index(index, entries, score): frameshifts_2 = [x[0][0] + '(+' + str(x[0][1]) + ')->' + x[1][0] + '(+' + str(x[1][1]) + ')' for x in frame_shifts[2]] for additional_breaks in e.structure.split('&'): - params = additional_breaks.split(':(') - n_split_reads = sum([int(x.split(':')[1]) for x in params[1].rstrip(')').split(',') if x.split(':')[0] != 'discordant_mates']) + if additional_breaks != '': + params = additional_breaks.split(':(') + n_split_reads = sum([int(x.split(':')[1]) for x in params[1].rstrip(')').split(',') if x.split(':')[0] != 'discordant_mates']) - posAB = params[0].split(':') - posA, posB = int(posAB[1].split('/')[0]), int(posAB[2].split('/')[0]) + posAB = params[0].split(':') + posA, posB = int(posAB[1].split('/')[0]), int(posAB[2].split('/')[0]) - if params[0] not in done_breaks and n_split_reads > 0: - if e.donorA > e.donorB: - frame_shifts = dfs.evaluate([e.chrA, posA, e.RNAstrandA], [e.chrB, posB, e.RNAstrandB], 2) - else: - frame_shifts = dfs.evaluate([e.chrB, posB, e.RNAstrandB], [e.chrA, posA, e.RNAstrandA], 2) + if params[0] not in done_breaks and n_split_reads > 0: + if e.donorA > e.donorB: # nice, use same thing to swap if necessary + frame_shifts = dfs.evaluate([e.chrA, posA, e.RNAstrandA], [e.chrB, posB, e.RNAstrandB], 2) + else: + frame_shifts = dfs.evaluate([e.chrB, posB, e.RNAstrandB], [e.chrA, posA, e.RNAstrandA], 2) - fgd += [x[0] + '->' + x[1] for x in frame_shifts['fgd']] - frameshifts_0 += [x[0][0] + '->' + x[1][0] for x in frame_shifts[0]] - frameshifts_1 += [x[0][0] + '(+' + str(x[0][1]) + ')->' + x[1][0] + '(+' + str(x[1][1]) + ')' for x in frame_shifts[1]] - frameshifts_2 += [x[0][0] + '(+' + str(x[0][1]) + ')->' + x[1][0] + '(+' + str(x[1][1]) + ')' for x in frame_shifts[2]] + fgd += [x[0] + '->' + x[1] for x in frame_shifts['fgd']] + frameshifts_0 += [x[0][0] + '->' + x[1][0] for x in frame_shifts[0]] + frameshifts_1 += [x[0][0] + '(+' + str(x[0][1]) + ')->' + x[1][0] + '(+' + str(x[1][1]) + ')' for x in frame_shifts[1]] + frameshifts_2 += [x[0][0] + '(+' + str(x[0][1]) + ')->' + x[1][0] + '(+' + str(x[1][1]) + ')' for x in frame_shifts[2]] - done_breaks.add(params[0]) + done_breaks.add(params[0]) e.fgd = ','.join(sorted(list(set(fgd)))) e.frameshift_0 = ','.join(sorted(list(set(frameshifts_0)))) e.frameshift_1 = ','.join(sorted(list(set(frameshifts_1)))) e.frameshift_2 = ','.join(sorted(list(set(frameshifts_2)))) + if ffs: + e.is_on_splice_junction_motif(ffs) + if e.x_onic == 'intronic' and e.circ_lin == 'linear': intronic_linear.append(e) else: @@ -470,7 +532,7 @@ def insert(pos, e): # Reorder idx2 = {} - + q = 0 for e in intronic_linear: results = {} positions = [(e.chrA, e.posA, e.strandA), (e.chrB, e.posB, e.strandB)] @@ -497,7 +559,7 @@ def insert(pos, e): results[e2] += 1 top_result = (None, 9999999999999) - for r in results: + for r in sorted(results.keys()): if results[r] >= 2: d1 = (r.posA - e.posA) d2 = (r.posB - e.posB) @@ -505,16 +567,20 @@ def insert(pos, e): shared_score = math.sqrt((pow(e.score, 2) + pow(r.score, 2)) * 0.5) penalty = 1.0 * sq_d / shared_score + if penalty < top_result[1]: top_result = (r, penalty) if top_result[0]: - insert_in_index(idx2, [e, top_result[0]], e.score + top_result[0].score) + insert_in_index(idx2, [e, top_result[0]], e.score + top_result[0].score, q) else: - insert_in_index(idx2, [e], e.score) + insert_in_index(idx2, [e], e.score, q) + + q += 1 for e in remainder: - insert_in_index(idx2, [e], e.score) + insert_in_index(idx2, [e], e.score, q) + q += 1 log.info("Determining fusion gene names and generate output") # Generate output @@ -526,7 +592,7 @@ def insert(pos, e): for entry in idx2[score][key]: if entry not in exported: acceptors_donors = entry.get_donors_acceptors(gene_annotation) - line = entry.line[:-5] + [entry.fgd, entry.frameshift_0, entry.frameshift_1, entry.frameshift_2] + entry.line[-5:] + line = entry.line[:-5] + [entry.fgd, entry.frameshift_0, entry.frameshift_1, entry.frameshift_2, entry.edit_dist_to_splice_motif] + entry.line[-5:] fh_out.write(str(i) + "\t" + acceptors_donors + "\t" + "\t".join(line) + "\n") exported.add(entry) diff --git a/drdisco/__init__.py b/drdisco/__init__.py index d6410ca3..b0225b00 100644 --- a/drdisco/__init__.py +++ b/drdisco/__init__.py @@ -31,7 +31,7 @@ import logging import sys -__version_info__ = ('0', '14', '6') +__version_info__ = ('0', '15', '0') __version__ = '.'.join(__version_info__) if (len(__version_info__) == 3) else '.'.join(__version_info__[0:3]) + "-" + __version_info__[3] __author__ = 'Youri Hoogstrate' __homepage__ = 'https://github.com/yhoogstrate/dr-disco' diff --git a/drdisco/utils.py b/drdisco/utils.py new file mode 100644 index 00000000..71359065 --- /dev/null +++ b/drdisco/utils.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# *- coding: utf-8 -*- +# vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 textwidth=79: + + +import gzip + + +alt_map = {'ins': '0'} +complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'} + + +def reverse_complement(seq): + seq = seq.upper() + for k, v in alt_map.iteritems(): + seq = seq.replace(k, v) + bases = list(seq) + bases = reversed([complement.get(base, base) for base in bases]) + bases = ''.join(bases) + for k, v in alt_map.iteritems(): + bases = bases.replace(v, k) + return bases + + +def is_gzip(filename): + try: + f = gzip.GzipFile(filename, 'rb') + f.read() + return True + except Exception: + return False diff --git a/requirements.txt b/requirements.txt index 6abd4a0f..f45ae29a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ HTSeq==0.6.1 numpy pysam==0.10.0 scipy +pyfaidx==0.5.1 diff --git a/share/blacklist-junctions.hg38.txt b/share/blacklist-junctions.hg38.txt index da1b16ff..783ee0b2 100644 --- a/share/blacklist-junctions.hg38.txt +++ b/share/blacklist-junctions.hg38.txt @@ -4050,3 +4050,9 @@ chr13 66903600 66903611 - chr13 67298227 67298238 + PCDH9 new exon chr13 111865377 111865388 - chr13 112095469 112095481 + SOX1 new exon(s) chr13 111865377 111865388 - chr13 112098310 112098322 + SOX1 new exon(s) chr13 45338700 45338750 - chr13 45339500 45339560 - TPT1 weird ribosomal/LINC-rna low entropy stuff again +chr2 81548742 81548753 - chr2 81797812 81797823 + most likely new gene brca +chr2 81548742 81548753 - chr2 81822264 81822275 + most likely new gene brca +chr2 81548742 81548753 - chr2 81848747 81848758 + most likely new gene brca +chr2 81570408 81570419 - chr2 81797812 81797823 + most likely new gene brca +chr2 81570408 81570419 - chr2 81848747 81848758 + most likely new gene brca +chr2 173533339 173533340 - chr2 173761578 173761579 + most likely new gene brca diff --git a/tests/chim_overhang/test_01_integrate.out.txt b/tests/chim_overhang/test_01_integrate.out.txt index 5eb9a6d8..7f99ed6b 100644 --- a/tests/chim_overhang/test_01_integrate.out.txt +++ b/tests/chim_overhang/test_01_integrate.out.txt @@ -1,2 +1,2 @@ -shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-B-acceptor pos-B-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 median-AS-A median-AS-B max-AS-A max-AS-B data-structure -1 chr2:17281790->chr11:5566704 chr2 17281790 + 0 22 chr11 5566704 - 22 0 inf entropy=0.7372<0.7382,chim_overhang=21<25 linear intronic 33 22 11 0 1530 10 1 1 1 0 0 0.7372 0.7372 0.0000 0.0000 0.3818 18.0000 0.8367 0.0013 0.0833 2.0455 38.7727 0.9652 0.0000 0.1847 0.0000 0.3333 2.0000 21 49 21 57 chr2:17281790/17281791(+)->chr11:5566704/5566705(-):(spanning_paired_1:11,spanning_paired_2:11) +shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-B-acceptor pos-B-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 splice-motif-edit-distance median-AS-A median-AS-B max-AS-A max-AS-B data-structure +1 chr2:17281790->chr11:5566704 chr2 17281790 + 0 22 chr11 5566704 - 22 0 inf entropy=0.7372<0.7382,chim_overhang=21<25 linear intronic 33 22 11 0 1530 10 1 1 1 0 0 0.7372 0.7372 0.0000 0.0000 0.3818 18.0000 0.8367 0.0013 0.0833 2.0455 38.7727 0.9652 0.0000 0.1847 0.0000 0.3333 2.0000 21 49 21 57 chr2:17281790/17281791(+)->chr11:5566704/5566705(-):(spanning_paired_1:11,spanning_paired_2:11) diff --git a/tests/chim_overhang/test_02_integrate.out.txt b/tests/chim_overhang/test_02_integrate.out.txt index 482a9db1..328caaad 100644 --- a/tests/chim_overhang/test_02_integrate.out.txt +++ b/tests/chim_overhang/test_02_integrate.out.txt @@ -1,2 +1,2 @@ -shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-B-acceptor pos-B-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 median-AS-A median-AS-B max-AS-A max-AS-B data-structure -1 chr5:105166412->chr7:12213276 chr5 105166412 + 0 22 chr7 12213276 + 22 0 inf entropy=0.7372<0.7382,chim_overhang=16<25 linear intronic 33 22 11 0 1528 14 1 1 1 0 0 0.7372 0.7372 0.0000 0.0000 0.1273 15.0000 0.8367 0.0013 0.0278 0.6091 53.9545 0.9419 0.0000 0.0724 0.0000 0.3333 2.0000 16 57 16 59 chr5:105166412/105166413(+)->chr7:12213276/12213277(+):(spanning_paired_1:11,spanning_paired_2:11) +shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-B-acceptor pos-B-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 splice-motif-edit-distance median-AS-A median-AS-B max-AS-A max-AS-B data-structure +1 chr5:105166412->chr7:12213276 chr5 105166412 + 0 22 chr7 12213276 + 22 0 inf entropy=0.7372<0.7382,chim_overhang=16<25 linear intronic 33 22 11 0 1528 14 1 1 1 0 0 0.7372 0.7372 0.0000 0.0000 0.1273 15.0000 0.8367 0.0013 0.0278 0.6091 53.9545 0.9419 0.0000 0.0724 0.0000 0.3333 2.0000 16 57 16 59 chr5:105166412/105166413(+)->chr7:12213276/12213277(+):(spanning_paired_1:11,spanning_paired_2:11) diff --git a/tests/chim_overhang/test_03_integrate.out.txt b/tests/chim_overhang/test_03_integrate.out.txt index 656f3689..70bb5bf5 100644 --- a/tests/chim_overhang/test_03_integrate.out.txt +++ b/tests/chim_overhang/test_03_integrate.out.txt @@ -1,2 +1,2 @@ -shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-B-acceptor pos-B-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 median-AS-A median-AS-B max-AS-A max-AS-B data-structure -1 chr8:86472668->chr17:40685761 chr8 86472668 - 6 8 chr17 40685761 + 8 6 inf chim_overhang=18<25 linear intronic 21 8 7 0 998 6 1 1 1 0 0 0.8277 0.8277 0.6547 0.0000 0.3214 53.7500 0.9186 0.0035 0.0619 0.2143 16.9286 0.8660 0.0117 0.0553 0.0000 0.1905 2.0000 55 18 56 18 chr8:86472668/86472669(-)->chr17:40685761/40685762(+):(spanning_paired_1:3,spanning_paired_1_t:4,spanning_paired_2:3,spanning_paired_2_t:4) +shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-B-acceptor pos-B-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 splice-motif-edit-distance median-AS-A median-AS-B max-AS-A max-AS-B data-structure +1 chr8:86472668->chr17:40685761 chr8 86472668 - 6 8 chr17 40685761 + 8 6 inf chim_overhang=18<25 linear intronic 21 8 7 0 998 6 1 1 1 0 0 0.8277 0.8277 0.6547 0.0000 0.3214 53.7500 0.9186 0.0035 0.0619 0.2143 16.9286 0.8660 0.0117 0.0553 0.0000 0.1905 2.0000 55 18 56 18 chr8:86472668/86472669(-)->chr17:40685761/40685762(+):(spanning_paired_1:3,spanning_paired_1_t:4,spanning_paired_2:3,spanning_paired_2_t:4) diff --git a/tests/integrate/splice_junction_motif.fa b/tests/integrate/splice_junction_motif.fa new file mode 100644 index 00000000..6c7f04fd --- /dev/null +++ b/tests/integrate/splice_junction_motif.fa @@ -0,0 +1,17 @@ +>perfect_fwd +tttttttttttttttttttttttttttttttttttttttt +tttttttttttttttttttttttttttttttttttttttt +ttttttttttCAggTGAGTttttttttttttttttttttt +ttttttttttCAggtttttttttttttttttttttttttt +tttttttttttttttttttttttttttttttttttttttt +tttttttttttttttttttttttttttttttttttttttt +tttttttttttttttttttttttttttttttttttttttt +>imperfect_fwd +tttttttttttttttttttttttttttttttttttttttt +tttttttttttttttttttttttttttttttttttttttt +tttttttttttttttttttttttttttttttttttttttt +ttttttttttCAggTAAGAttttttttttttttttttttt +ttttttttttCAggtttttttttttttttttttttttttt +tttttttttttttttttttttttttttttttttttttttt +tttttttttttttttttttttttttttttttttttttttt +tttttttttttttttttttttttttttttttttttttttt diff --git a/tests/integrate/test_frameshift-prediction_01-complementary.in.dbed b/tests/integrate/test_frameshift-prediction_01-complementary.in.dbed new file mode 100644 index 00000000..335e4da6 --- /dev/null +++ b/tests/integrate/test_frameshift-prediction_01-complementary.in.dbed @@ -0,0 +1,17 @@ +chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge median-AS-A median-AS-B max-AS-A max-AS-B data-structure +chr1 1035203 + 0 200 chr1 999610 - 200 0 3052497 valid linear exonic 800 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +chr1 999610 + 200 0 chr1 1035203 - 0 200 3052497 valid linear exonic 750 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +1 1035203 + 0 200 1 999610 - 200 0 2992369 valid linear intronic 700 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +1 999610 + 200 0 1 1035203 - 0 200 2992369 valid linear intronic 650 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +chr1 1035203 - 0 200 chr1 999610 + 200 0 3052497 valid linear exonic 600 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +chr1 999610 - 200 0 chr1 1035203 + 0 200 3052497 valid linear exonic 550 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +1 1035203 - 0 200 1 999610 + 200 0 2992369 valid linear intronic 500 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +1 999610 - 200 0 1 1035203 + 0 200 2992369 valid linear intronic 450 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +chr1 1035203 + 0 200 chr1 999610 + 200 0 3052497 valid linear exonic 400 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +chr1 999610 + 200 0 chr1 1035203 + 0 200 3052497 valid linear exonic 350 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +1 1035203 + 0 200 1 999610 + 200 0 2992369 valid linear intronic 300 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +1 999610 + 200 0 1 1035203 + 0 200 2992369 valid linear intronic 250 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +chr1 1035203 - 200 0 chr1 999610 - 0 200 3052497 valid linear exonic 200 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +chr1 999610 - 0 200 chr1 1035203 - 200 0 3052497 valid linear exonic 150 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +1 1035203 - 200 0 1 999610 - 0 200 2992369 valid linear intronic 100 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +1 999610 - 0 200 1 1035203 - 200 0 2992369 valid linear intronic 50 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 diff --git a/tests/integrate/test_frameshift-prediction_01-complementary.out.txt b/tests/integrate/test_frameshift-prediction_01-complementary.out.txt new file mode 100644 index 00000000..0fcc81c3 --- /dev/null +++ b/tests/integrate/test_frameshift-prediction_01-complementary.out.txt @@ -0,0 +1,17 @@ +shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 splice-motif-edit-distance median-AS-A median-AS-B max-AS-A max-AS-B data-structure +1 1:1035203->1:999610 1 1035203 - 0 200 1 999610 + 200 0 2992369 valid linear intronic 500 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +1 chr1:1035203->chr1:999610 chr1 1035203 + 0 200 chr1 999610 - 200 0 3052497 valid linear exonic 800 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +2 1:1035203->1:999610 1 999610 + 200 0 1 1035203 - 0 200 2992369 valid linear intronic 650 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +2 chr1:1035203->chr1:999610 chr1 999610 - 200 0 chr1 1035203 + 0 200 3052497 valid linear exonic 550 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +3 1:1035203->1:999610 1 1035203 + 0 200 1 999610 + 200 0 2992369 valid linear intronic 300 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +4 1:1035203->1:999610 1 999610 + 200 0 1 1035203 + 0 200 2992369 valid linear intronic 250 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +5 chr1:1035203->chr1:999610 chr1 999610 + 200 0 chr1 1035203 - 0 200 3052497 valid linear exonic 750 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +6 1:1035203->1:999610 1 1035203 + 0 200 1 999610 - 200 0 2992369 valid linear intronic 700 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +7 chr1:1035203->chr1:999610 chr1 1035203 - 0 200 chr1 999610 + 200 0 3052497 valid linear exonic 600 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +8 1:1035203->1:999610 1 999610 - 200 0 1 1035203 + 0 200 2992369 valid linear intronic 450 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +9 chr1:1035203->chr1:999610 chr1 1035203 + 0 200 chr1 999610 + 200 0 3052497 valid linear exonic 400 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +10 chr1:1035203->chr1:999610 chr1 999610 + 200 0 chr1 1035203 + 0 200 3052497 valid linear exonic 350 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +11 chr1:999610->chr1:1035203 chr1 1035203 - 200 0 chr1 999610 - 0 200 3052497 valid linear exonic 200 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +12 chr1:999610->chr1:1035203 chr1 999610 - 0 200 chr1 1035203 - 200 0 3052497 valid linear exonic 150 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +13 1:999610->1:1035203 1 1035203 - 200 0 1 999610 - 0 200 2992369 valid linear intronic 100 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +14 1:999610->1:1035203 1 999610 - 0 200 1 1035203 - 200 0 2992369 valid linear intronic 50 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 diff --git a/tests/integrate/test_frameshift-prediction_01.in.dbed b/tests/integrate/test_frameshift-prediction_01.in.dbed new file mode 100644 index 00000000..a01729e3 --- /dev/null +++ b/tests/integrate/test_frameshift-prediction_01.in.dbed @@ -0,0 +1,5 @@ +chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge median-AS-A median-AS-B max-AS-A max-AS-B data-structure +chr1 1035203 - 0 200 chr1 999610 - 200 0 3052497 valid linear exonic 588 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +chr1 999610 - 200 0 chr1 1035203 - 0 200 3052497 valid linear exonic 588 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +1 1035203 - 0 200 1 999610 - 200 0 2992369 valid linear intronic 151 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +1 999610 - 200 0 1 1035203 - 0 200 2992369 valid linear intronic 151 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 diff --git a/tests/integrate/test_frameshift-prediction_01.out.txt b/tests/integrate/test_frameshift-prediction_01.out.txt new file mode 100644 index 00000000..57e271c9 --- /dev/null +++ b/tests/integrate/test_frameshift-prediction_01.out.txt @@ -0,0 +1,5 @@ +shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 splice-motif-edit-distance median-AS-A median-AS-B max-AS-A max-AS-B data-structure +1 chr1:1035203->chr1:999610 chr1 1035203 - 0 200 chr1 999610 - 200 0 3052497 valid linear exonic 588 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 AGRN(ENST00000620552.4)-ensembl->HES4(ENST00000304952.10)-ensembl_havana 50 50 50 50 +2 chr1:1035203->chr1:999610 chr1 999610 - 200 0 chr1 1035203 - 0 200 3052497 valid linear exonic 588 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 AGRN(ENST00000620552.4)-ensembl->HES4(ENST00000304952.10)-ensembl_havana 50 50 50 50 +3 1:1035203->1:999610 1 1035203 - 0 200 1 999610 - 200 0 2992369 valid linear intronic 151 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 AGRN(ENST00000620552.4)-ensembl->HES4(ENST00000304952.10)-ensembl_havana 50 50 50 50 +4 1:1035203->1:999610 1 999610 - 200 0 1 1035203 - 0 200 2992369 valid linear intronic 151 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 AGRN(ENST00000620552.4)-ensembl->HES4(ENST00000304952.10)-ensembl_havana 50 50 50 50 diff --git a/tests/integrate/test_frameshift-prediction_02.in.dbed b/tests/integrate/test_frameshift-prediction_02.in.dbed new file mode 100644 index 00000000..2ea6cec2 --- /dev/null +++ b/tests/integrate/test_frameshift-prediction_02.in.dbed @@ -0,0 +1,5 @@ +chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge median-AS-A median-AS-B max-AS-A max-AS-B data-structure +chr1 1035203 - 0 200 chr1 999020 - 200 0 3052497 valid linear exonic 588 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +chr1 999020 - 200 0 chr1 1035203 - 0 200 3052497 valid linear exonic 588 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +1 1035203 - 0 200 1 999020 - 200 0 2992369 valid linear intronic 151 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +1 999020 - 200 0 1 1035203 - 0 200 2992369 valid linear intronic 151 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 diff --git a/tests/integrate/test_frameshift-prediction_02.out.txt b/tests/integrate/test_frameshift-prediction_02.out.txt new file mode 100644 index 00000000..80bc007d --- /dev/null +++ b/tests/integrate/test_frameshift-prediction_02.out.txt @@ -0,0 +1,5 @@ +shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 splice-motif-edit-distance median-AS-A median-AS-B max-AS-A max-AS-B data-structure +1 chr1:1035203->chr1:999020 chr1 1035203 - 0 200 chr1 999020 - 200 0 3052497 valid linear exonic 588 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 AGRN(ENST00000620552.4)-ensembl(+0)->HES4(ENST00000304952.10)-ensembl_havana(+2) 50 50 50 50 +2 chr1:1035203->chr1:999020 chr1 999020 - 200 0 chr1 1035203 - 0 200 3052497 valid linear exonic 588 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 AGRN(ENST00000620552.4)-ensembl(+0)->HES4(ENST00000304952.10)-ensembl_havana(+2) 50 50 50 50 +3 1:1035203->1:999020 1 1035203 - 0 200 1 999020 - 200 0 2992369 valid linear intronic 151 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 AGRN(ENST00000620552.4)-ensembl(+0)->HES4(ENST00000304952.10)-ensembl_havana(+2) 50 50 50 50 +4 1:1035203->1:999020 1 999020 - 200 0 1 1035203 - 0 200 2992369 valid linear intronic 151 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 AGRN(ENST00000620552.4)-ensembl(+0)->HES4(ENST00000304952.10)-ensembl_havana(+2) 50 50 50 50 diff --git a/tests/integrate/test_frameshift-prediction_03.in.dbed b/tests/integrate/test_frameshift-prediction_03.in.dbed new file mode 100644 index 00000000..4811ba69 --- /dev/null +++ b/tests/integrate/test_frameshift-prediction_03.in.dbed @@ -0,0 +1,5 @@ +chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge median-AS-A median-AS-B max-AS-A max-AS-B data-structure +chr1 1040604 - 0 200 chr1 999020 - 200 0 3052497 valid linear exonic 588 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +chr1 999020 - 200 0 chr1 1040604 - 0 200 3052497 valid linear exonic 588 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 +1 1040604 - 0 200 1 999020 - 200 0 2992369 valid linear intronic 151 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 +1 999020 - 200 0 1 1040604 - 0 200 2992369 valid linear intronic 151 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 diff --git a/tests/integrate/test_frameshift-prediction_03.out.txt b/tests/integrate/test_frameshift-prediction_03.out.txt new file mode 100644 index 00000000..ce136ba3 --- /dev/null +++ b/tests/integrate/test_frameshift-prediction_03.out.txt @@ -0,0 +1,5 @@ +shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 splice-motif-edit-distance median-AS-A median-AS-B max-AS-A max-AS-B data-structure +1 chr1:1040604->chr1:999020 chr1 1040604 - 0 200 chr1 999020 - 200 0 3052497 valid linear exonic 588 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 AGRN(ENST00000620552.4)-ensembl->HES4(ENST00000304952.10)-ensembl_havana 50 50 50 50 +2 chr1:1040604->chr1:999020 chr1 999020 - 200 0 chr1 1040604 - 0 200 3052497 valid linear exonic 588 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 AGRN(ENST00000620552.4)-ensembl->HES4(ENST00000304952.10)-ensembl_havana 50 50 50 50 +3 1:1040604->1:999020 1 1040604 - 0 200 1 999020 - 200 0 2992369 valid linear intronic 151 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 AGRN(ENST00000620552.4)-ensembl->HES4(ENST00000304952.10)-ensembl_havana 50 50 50 50 +4 1:1040604->1:999020 1 999020 - 200 0 1 1040604 - 0 200 2992369 valid linear intronic 151 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 AGRN(ENST00000620552.4)-ensembl->HES4(ENST00000304952.10)-ensembl_havana 50 50 50 50 diff --git a/tests/integrate/test_in_frame_non_hybrid_protein.out.txt b/tests/integrate/test_in_frame_non_hybrid_protein.out.txt index b3519f3a..eb0b0d71 100644 --- a/tests/integrate/test_in_frame_non_hybrid_protein.out.txt +++ b/tests/integrate/test_in_frame_non_hybrid_protein.out.txt @@ -1,2 +1,2 @@ -shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 median-AS-A median-AS-B max-AS-A max-AS-B data-structure -1 chr21:41508080->chr21:38445621 chr21 38445621 - 238 0 chr21 41508080 + 0 238 3062459 valid linear exonic 494 318 159 17 10000 0 12 5 8 2 0 0.6676 0.7788 0.2243 0.8525 0.2600 81.0521 0.8788 0.0000 0.0135 0.2575 16.2608 0.8745 0.0000 0.0137 0.1069 0.3219 1.0833 TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000429727.6)-ensembl 50 50 50 50 chr21:38445621/38445622(-)->chr21:41508080/41508081(+):(spanning_paired_1:111,spanning_paired_2:111) +shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 splice-motif-edit-distance median-AS-A median-AS-B max-AS-A max-AS-B data-structure +1 chr21:41508080->chr21:38445621 chr21 38445621 - 238 0 chr21 41508080 + 0 238 3062459 valid linear exonic 494 318 159 17 10000 0 12 5 8 2 0 0.6676 0.7788 0.2243 0.8525 0.2600 81.0521 0.8788 0.0000 0.0135 0.2575 16.2608 0.8745 0.0000 0.0137 0.1069 0.3219 1.0833 TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000429727.6)-ensembl 50 50 50 50 chr21:38445621/38445622(-)->chr21:41508080/41508081(+):(spanning_paired_1:111,spanning_paired_2:111) diff --git a/tests/integrate/test_terg_s041.out.txt b/tests/integrate/test_terg_s041.out.txt index 60317f55..5d84a9bb 100644 --- a/tests/integrate/test_terg_s041.out.txt +++ b/tests/integrate/test_terg_s041.out.txt @@ -1,6 +1,6 @@ -shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 median-AS-A median-AS-B max-AS-A max-AS-B data-structure -1 TMPRSS2->ERG chr21 38487350 - 200 0 chr21 41479719 + 0 200 2992369 valid linear intronic 151 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 chr21:38487350/38487351(-)->chr21:41479719/41479720(+):(discordant_mates:14,spanning_paired_1:41,spanning_paired_1_t:7,spanning_paired_2:41,spanning_paired_2_t:7) -1 TMPRSS2->ERG chr21 38445621 - 200 0 chr21 41498118 + 0 200 3052497 valid linear exonic 588 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000429727.6)-ensembl,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000468474.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000473107.1)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000481609.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000485493.1)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000492833.5)-ensembl_havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000429727.6)-ensembl,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000468474.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000473107.1)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000481609.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000485493.1)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000492833.5)-ensembl_havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000429727.6)-ensembl,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000468474.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000473107.1)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000481609.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000492833.5)-ensembl_havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000429727.6)-ensembl,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000468474.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000473107.1)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000481609.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000492833.5)-ensembl_havana,TMPRSS2(ENST00000463138.1)-havana->ERG(ENST00000398897.5)-ensembl_havana,TMPRSS2(ENST00000463138.1)-havana->ERG(ENST00000429727.6)-ensembl,TMPRSS2(ENST00000463138.1)-havana->ERG(ENST00000453032.6)-ensembl_havana,TMPRSS2(ENST00000463138.1)-havana->ERG(ENST00000468474.5)-havana,TMPRSS2(ENST00000463138.1)-havana->ERG(ENST00000473107.1)-havana,TMPRSS2(ENST00000463138.1)-havana->ERG(ENST00000481609.5)-havana,TMPRSS2(ENST00000463138.1)-havana->ERG(ENST00000485493.1)-havana,TMPRSS2(ENST00000463138.1)-havana->ERG(ENST00000492833.5)-ensembl_havana,TMPRSS2(ENST00000497881.5)-havana->ERG(ENST00000398897.5)-ensembl_havana,TMPRSS2(ENST00000497881.5)-havana->ERG(ENST00000429727.6)-ensembl,TMPRSS2(ENST00000497881.5)-havana->ERG(ENST00000453032.6)-ensembl_havana,TMPRSS2(ENST00000497881.5)-havana->ERG(ENST00000468474.5)-havana,TMPRSS2(ENST00000497881.5)-havana->ERG(ENST00000473107.1)-havana,TMPRSS2(ENST00000497881.5)-havana->ERG(ENST00000481609.5)-havana,TMPRSS2(ENST00000497881.5)-havana->ERG(ENST00000485493.1)-havana,TMPRSS2(ENST00000497881.5)-havana->ERG(ENST00000492833.5)-ensembl_havana TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000442448.5)-ensembl TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000288319.11)-ensembl_havana(+1),TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000398905.5)-havana(+1),TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000398907.5)-havana(+1),TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000398910.5)-havana(+1),TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000398911.5)-havana(+1),TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000398919.6)-ensembl_havana(+1),TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000417133.6)-ensembl_havana(+1),TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000429727.6)-ensembl(+1),TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000442448.5)-ensembl(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000288319.11)-ensembl_havana(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000398905.5)-havana(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000398907.5)-havana(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000398910.5)-havana(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000398911.5)-havana(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000398919.6)-ensembl_havana(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000417133.6)-ensembl_havana(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000429727.6)-ensembl(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000442448.5)-ensembl(+1),TMPRSS2(ENST00000398585.7)-ensembl(+1)->ERG(ENST00000288319.11)-ensembl_havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+1)->ERG(ENST00000398905.5)-havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+1)->ERG(ENST00000398907.5)-havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+1)->ERG(ENST00000398910.5)-havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+1)->ERG(ENST00000398911.5)-havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+1)->ERG(ENST00000398919.6)-ensembl_havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+1)->ERG(ENST00000417133.6)-ensembl_havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+1)->ERG(ENST00000442448.5)-ensembl(+0),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000288319.11)-ensembl_havana(+1),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000398905.5)-havana(+1),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000398907.5)-havana(+1),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000398910.5)-havana(+1),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000398911.5)-havana(+1),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000398919.6)-ensembl_havana(+1),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000417133.6)-ensembl_havana(+1),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000429727.6)-ensembl(+1),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000442448.5)-ensembl(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000288319.11)-ensembl_havana(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000398905.5)-havana(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000398907.5)-havana(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000398910.5)-havana(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000398911.5)-havana(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000398919.6)-ensembl_havana(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000417133.6)-ensembl_havana(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000429727.6)-ensembl(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000442448.5)-ensembl(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000288319.11)-ensembl_havana(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000398905.5)-havana(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000398907.5)-havana(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000398910.5)-havana(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000398911.5)-havana(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000398919.6)-ensembl_havana(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000417133.6)-ensembl_havana(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000429727.6)-ensembl(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000442448.5)-ensembl(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000288319.11)-ensembl_havana(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000398905.5)-havana(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000398907.5)-havana(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000398910.5)-havana(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000398911.5)-havana(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000398919.6)-ensembl_havana(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000417133.6)-ensembl_havana(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000429727.6)-ensembl(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000442448.5)-ensembl(+1) TMPRSS2(ENST00000332149.9)-ensembl_havana(+2)->ERG(ENST00000288319.11)-ensembl_havana(+0),TMPRSS2(ENST00000332149.9)-ensembl_havana(+2)->ERG(ENST00000398905.5)-havana(+0),TMPRSS2(ENST00000332149.9)-ensembl_havana(+2)->ERG(ENST00000398907.5)-havana(+0),TMPRSS2(ENST00000332149.9)-ensembl_havana(+2)->ERG(ENST00000398910.5)-havana(+0),TMPRSS2(ENST00000332149.9)-ensembl_havana(+2)->ERG(ENST00000398911.5)-havana(+0),TMPRSS2(ENST00000332149.9)-ensembl_havana(+2)->ERG(ENST00000398919.6)-ensembl_havana(+0),TMPRSS2(ENST00000332149.9)-ensembl_havana(+2)->ERG(ENST00000417133.6)-ensembl_havana(+0),TMPRSS2(ENST00000332149.9)-ensembl_havana(+2)->ERG(ENST00000442448.5)-ensembl(+0),TMPRSS2(ENST00000398585.7)-ensembl(+2)->ERG(ENST00000288319.11)-ensembl_havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+2)->ERG(ENST00000398905.5)-havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+2)->ERG(ENST00000398907.5)-havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+2)->ERG(ENST00000398910.5)-havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+2)->ERG(ENST00000398911.5)-havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+2)->ERG(ENST00000398919.6)-ensembl_havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+2)->ERG(ENST00000417133.6)-ensembl_havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+2)->ERG(ENST00000442448.5)-ensembl(+0),TMPRSS2(ENST00000424093.5)-havana(+2)->ERG(ENST00000288319.11)-ensembl_havana(+0),TMPRSS2(ENST00000424093.5)-havana(+2)->ERG(ENST00000398905.5)-havana(+0),TMPRSS2(ENST00000424093.5)-havana(+2)->ERG(ENST00000398907.5)-havana(+0),TMPRSS2(ENST00000424093.5)-havana(+2)->ERG(ENST00000398910.5)-havana(+0),TMPRSS2(ENST00000424093.5)-havana(+2)->ERG(ENST00000398911.5)-havana(+0),TMPRSS2(ENST00000424093.5)-havana(+2)->ERG(ENST00000398919.6)-ensembl_havana(+0),TMPRSS2(ENST00000424093.5)-havana(+2)->ERG(ENST00000417133.6)-ensembl_havana(+0),TMPRSS2(ENST00000424093.5)-havana(+2)->ERG(ENST00000442448.5)-ensembl(+0),TMPRSS2(ENST00000454499.5)-havana(+2)->ERG(ENST00000288319.11)-ensembl_havana(+0),TMPRSS2(ENST00000454499.5)-havana(+2)->ERG(ENST00000398905.5)-havana(+0),TMPRSS2(ENST00000454499.5)-havana(+2)->ERG(ENST00000398907.5)-havana(+0),TMPRSS2(ENST00000454499.5)-havana(+2)->ERG(ENST00000398910.5)-havana(+0),TMPRSS2(ENST00000454499.5)-havana(+2)->ERG(ENST00000398911.5)-havana(+0),TMPRSS2(ENST00000454499.5)-havana(+2)->ERG(ENST00000398919.6)-ensembl_havana(+0),TMPRSS2(ENST00000454499.5)-havana(+2)->ERG(ENST00000417133.6)-ensembl_havana(+0),TMPRSS2(ENST00000454499.5)-havana(+2)->ERG(ENST00000442448.5)-ensembl(+0),TMPRSS2(ENST00000458356.5)-havana(+2)->ERG(ENST00000288319.11)-ensembl_havana(+0),TMPRSS2(ENST00000458356.5)-havana(+2)->ERG(ENST00000398905.5)-havana(+0),TMPRSS2(ENST00000458356.5)-havana(+2)->ERG(ENST00000398907.5)-havana(+0),TMPRSS2(ENST00000458356.5)-havana(+2)->ERG(ENST00000398910.5)-havana(+0),TMPRSS2(ENST00000458356.5)-havana(+2)->ERG(ENST00000398911.5)-havana(+0),TMPRSS2(ENST00000458356.5)-havana(+2)->ERG(ENST00000398919.6)-ensembl_havana(+0),TMPRSS2(ENST00000458356.5)-havana(+2)->ERG(ENST00000417133.6)-ensembl_havana(+0),TMPRSS2(ENST00000458356.5)-havana(+2)->ERG(ENST00000442448.5)-ensembl(+0) 50 50 50 50 chr21:38445621/38445622(-)->chr21:41498118/41498119(+):(discordant_mates:14,spanning_paired_1:114,spanning_paired_1_t:4,spanning_paired_2:114,spanning_paired_2_t:4)&chr21:38445621/38445622(-)->chr21:41508080/41508081(+):(spanning_paired_1:33,spanning_paired_2:33)&chr21:38445621/38445622(-)->chr21:41507949/41507950(+):(discordant_mates:8,spanning_paired_1:15,spanning_paired_2:15)&chr21:38474121/38474122(-)->chr21:41498118/41498119(+):(spanning_paired_1:14,spanning_paired_2:14)&chr21:38445621/38445622(-)->chr21:41480475/41480476(+):(discordant_mates:6,spanning_paired_1:2,spanning_paired_2:2)&chr21:38423561/38423562(-)->chr21:41498118/41498119(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:38445621/38445622(-)->chr21:41506444/41506445(+):(discordant_mates:6,spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:38445621/38445622(-)->chr21:41504366/41504367(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38446208/38446209(-)->chr21:41498118/41498119(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38474121/38474122(-)->chr21:41480475/41480476(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38474121/38474122(-)->chr21:41508080/41508081(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38423567/38423568(-)->chr21:41485824/41485825(+):(discordant_mates:4)&chr21:38445482/38445483(-)->chr21:41500346/41500347(+):(discordant_mates:2)&chr21:38474093/38474094(-)->chr21:41507966/41507967(+):(discordant_mates:2) -2 chr21:36338771->chr21:36344707 chr21 36338771 + 0 10 chr21 36344707 - 10 0 5936 valid circular exonic 117 58 38 3 10000 0 2 2 2 0 0 0.8536 0.8591 2.436 0.871 2.1714 19.1829 0.9605 0 0.1021 2.0941 32.6902 0.9725 0 0.0814 0.0789 0.2479 2 50 50 50 50 chr21:36338771/36338772(+)->chr21:36344707/36344708(-):(discordant_mates:6,spanning_paired_1:14,spanning_paired_1_t:23,spanning_paired_2:14,spanning_paired_2_t:23)&chr21:36338841/36338842(+)->chr21:36341534/36341535(-):(spanning_paired_1:1,spanning_paired_2:1) -3 chr21:29329693->chr21:29321220 chr21 29321220 + 10 0 chr21 29329693 - 0 10 8473 valid circular exonic 49 35 15 4 10000 0 4 4 3 0 0 0.8605 0.8018 0 0.6667 8.3531 34.3077 0.9863 0 0.4412 9.4685 14.5897 0.9701 0 0.749 0.2667 0.3571 1.75 50 50 50 50 chr21:29321220/29321221(+)->chr21:29329693/29329694(-):(discordant_mates:4,spanning_paired_1:9,spanning_paired_1_t:1,spanning_paired_2:9,spanning_paired_2_t:1)&chr21:29326058/29326059(+)->chr21:29329693/29329694(-):(spanning_paired_1:4,spanning_paired_2:4)&chr21:29327324/29327325(+)->chr21:29329704/29329705(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:29321216/29321217(+)->chr21:29329647/29329648(-):(discordant_mates:4) -4 chr21:33432871<->chr21:33414887 chr21 33414887 + 5 5 chr21 33432871 - 5 5 17984 valid circular exonic 20 12 6 2 10000 0 3 3 2 1 0 0.8277 0.9167 0 0.75 12.6 50.8 0.7995 0.1045 5.4663 12.8 24.2 0.8072 0.0987 5.4049 0.3333 0.3 1.6667 50 50 50 50 chr21:33414887/33414888(+)->chr21:33432871/33432872(-):(spanning_paired_1:2,spanning_paired_1_t:3,spanning_paired_2:2,spanning_paired_2_t:3)&chr21:33426883/33426884(+)->chr21:33432871/33432872(-):(spanning_paired_1:1,spanning_paired_2:1)&chr21:33414915/33414916(+)->chr21:33421509/33421510(-):(discordant_mates:4) +shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 splice-motif-edit-distance median-AS-A median-AS-B max-AS-A max-AS-B data-structure +1 TMPRSS2->ERG chr21 38487350 - 200 0 chr21 41479719 + 0 200 2992369 valid linear intronic 151 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 chr21:38487350/38487351(-)->chr21:41479719/41479720(+):(discordant_mates:14,spanning_paired_1:41,spanning_paired_1_t:7,spanning_paired_2:41,spanning_paired_2_t:7) +1 TMPRSS2->ERG chr21 38445621 - 200 0 chr21 41498118 + 0 200 3052497 valid linear exonic 588 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000429727.6)-ensembl,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000468474.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000473107.1)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000481609.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000485493.1)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000492833.5)-ensembl_havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000429727.6)-ensembl,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000468474.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000473107.1)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000481609.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000485493.1)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000492833.5)-ensembl_havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000429727.6)-ensembl,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000468474.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000473107.1)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000481609.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000492833.5)-ensembl_havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000429727.6)-ensembl,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000468474.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000473107.1)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000481609.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000492833.5)-ensembl_havana,TMPRSS2(ENST00000463138.1)-havana->ERG(ENST00000398897.5)-ensembl_havana,TMPRSS2(ENST00000463138.1)-havana->ERG(ENST00000429727.6)-ensembl,TMPRSS2(ENST00000463138.1)-havana->ERG(ENST00000453032.6)-ensembl_havana,TMPRSS2(ENST00000463138.1)-havana->ERG(ENST00000468474.5)-havana,TMPRSS2(ENST00000463138.1)-havana->ERG(ENST00000473107.1)-havana,TMPRSS2(ENST00000463138.1)-havana->ERG(ENST00000481609.5)-havana,TMPRSS2(ENST00000463138.1)-havana->ERG(ENST00000485493.1)-havana,TMPRSS2(ENST00000463138.1)-havana->ERG(ENST00000492833.5)-ensembl_havana,TMPRSS2(ENST00000497881.5)-havana->ERG(ENST00000398897.5)-ensembl_havana,TMPRSS2(ENST00000497881.5)-havana->ERG(ENST00000429727.6)-ensembl,TMPRSS2(ENST00000497881.5)-havana->ERG(ENST00000453032.6)-ensembl_havana,TMPRSS2(ENST00000497881.5)-havana->ERG(ENST00000468474.5)-havana,TMPRSS2(ENST00000497881.5)-havana->ERG(ENST00000473107.1)-havana,TMPRSS2(ENST00000497881.5)-havana->ERG(ENST00000481609.5)-havana,TMPRSS2(ENST00000497881.5)-havana->ERG(ENST00000485493.1)-havana,TMPRSS2(ENST00000497881.5)-havana->ERG(ENST00000492833.5)-ensembl_havana TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000442448.5)-ensembl TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000288319.11)-ensembl_havana(+1),TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000398905.5)-havana(+1),TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000398907.5)-havana(+1),TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000398910.5)-havana(+1),TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000398911.5)-havana(+1),TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000398919.6)-ensembl_havana(+1),TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000417133.6)-ensembl_havana(+1),TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000429727.6)-ensembl(+1),TMPRSS2(ENST00000332149.9)-ensembl_havana(+0)->ERG(ENST00000442448.5)-ensembl(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000288319.11)-ensembl_havana(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000398905.5)-havana(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000398907.5)-havana(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000398910.5)-havana(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000398911.5)-havana(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000398919.6)-ensembl_havana(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000417133.6)-ensembl_havana(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000429727.6)-ensembl(+1),TMPRSS2(ENST00000398585.7)-ensembl(+0)->ERG(ENST00000442448.5)-ensembl(+1),TMPRSS2(ENST00000398585.7)-ensembl(+1)->ERG(ENST00000288319.11)-ensembl_havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+1)->ERG(ENST00000398905.5)-havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+1)->ERG(ENST00000398907.5)-havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+1)->ERG(ENST00000398910.5)-havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+1)->ERG(ENST00000398911.5)-havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+1)->ERG(ENST00000398919.6)-ensembl_havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+1)->ERG(ENST00000417133.6)-ensembl_havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+1)->ERG(ENST00000442448.5)-ensembl(+0),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000288319.11)-ensembl_havana(+1),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000398905.5)-havana(+1),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000398907.5)-havana(+1),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000398910.5)-havana(+1),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000398911.5)-havana(+1),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000398919.6)-ensembl_havana(+1),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000417133.6)-ensembl_havana(+1),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000429727.6)-ensembl(+1),TMPRSS2(ENST00000424093.5)-havana(+0)->ERG(ENST00000442448.5)-ensembl(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000288319.11)-ensembl_havana(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000398905.5)-havana(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000398907.5)-havana(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000398910.5)-havana(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000398911.5)-havana(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000398919.6)-ensembl_havana(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000417133.6)-ensembl_havana(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000429727.6)-ensembl(+1),TMPRSS2(ENST00000454499.5)-havana(+0)->ERG(ENST00000442448.5)-ensembl(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000288319.11)-ensembl_havana(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000398905.5)-havana(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000398907.5)-havana(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000398910.5)-havana(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000398911.5)-havana(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000398919.6)-ensembl_havana(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000417133.6)-ensembl_havana(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000429727.6)-ensembl(+1),TMPRSS2(ENST00000455813.1)-havana(+0)->ERG(ENST00000442448.5)-ensembl(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000288319.11)-ensembl_havana(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000398905.5)-havana(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000398907.5)-havana(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000398910.5)-havana(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000398911.5)-havana(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000398919.6)-ensembl_havana(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000417133.6)-ensembl_havana(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000429727.6)-ensembl(+1),TMPRSS2(ENST00000458356.5)-havana(+0)->ERG(ENST00000442448.5)-ensembl(+1) TMPRSS2(ENST00000332149.9)-ensembl_havana(+2)->ERG(ENST00000288319.11)-ensembl_havana(+0),TMPRSS2(ENST00000332149.9)-ensembl_havana(+2)->ERG(ENST00000398905.5)-havana(+0),TMPRSS2(ENST00000332149.9)-ensembl_havana(+2)->ERG(ENST00000398907.5)-havana(+0),TMPRSS2(ENST00000332149.9)-ensembl_havana(+2)->ERG(ENST00000398910.5)-havana(+0),TMPRSS2(ENST00000332149.9)-ensembl_havana(+2)->ERG(ENST00000398911.5)-havana(+0),TMPRSS2(ENST00000332149.9)-ensembl_havana(+2)->ERG(ENST00000398919.6)-ensembl_havana(+0),TMPRSS2(ENST00000332149.9)-ensembl_havana(+2)->ERG(ENST00000417133.6)-ensembl_havana(+0),TMPRSS2(ENST00000332149.9)-ensembl_havana(+2)->ERG(ENST00000442448.5)-ensembl(+0),TMPRSS2(ENST00000398585.7)-ensembl(+2)->ERG(ENST00000288319.11)-ensembl_havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+2)->ERG(ENST00000398905.5)-havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+2)->ERG(ENST00000398907.5)-havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+2)->ERG(ENST00000398910.5)-havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+2)->ERG(ENST00000398911.5)-havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+2)->ERG(ENST00000398919.6)-ensembl_havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+2)->ERG(ENST00000417133.6)-ensembl_havana(+0),TMPRSS2(ENST00000398585.7)-ensembl(+2)->ERG(ENST00000442448.5)-ensembl(+0),TMPRSS2(ENST00000424093.5)-havana(+2)->ERG(ENST00000288319.11)-ensembl_havana(+0),TMPRSS2(ENST00000424093.5)-havana(+2)->ERG(ENST00000398905.5)-havana(+0),TMPRSS2(ENST00000424093.5)-havana(+2)->ERG(ENST00000398907.5)-havana(+0),TMPRSS2(ENST00000424093.5)-havana(+2)->ERG(ENST00000398910.5)-havana(+0),TMPRSS2(ENST00000424093.5)-havana(+2)->ERG(ENST00000398911.5)-havana(+0),TMPRSS2(ENST00000424093.5)-havana(+2)->ERG(ENST00000398919.6)-ensembl_havana(+0),TMPRSS2(ENST00000424093.5)-havana(+2)->ERG(ENST00000417133.6)-ensembl_havana(+0),TMPRSS2(ENST00000424093.5)-havana(+2)->ERG(ENST00000442448.5)-ensembl(+0),TMPRSS2(ENST00000454499.5)-havana(+2)->ERG(ENST00000288319.11)-ensembl_havana(+0),TMPRSS2(ENST00000454499.5)-havana(+2)->ERG(ENST00000398905.5)-havana(+0),TMPRSS2(ENST00000454499.5)-havana(+2)->ERG(ENST00000398907.5)-havana(+0),TMPRSS2(ENST00000454499.5)-havana(+2)->ERG(ENST00000398910.5)-havana(+0),TMPRSS2(ENST00000454499.5)-havana(+2)->ERG(ENST00000398911.5)-havana(+0),TMPRSS2(ENST00000454499.5)-havana(+2)->ERG(ENST00000398919.6)-ensembl_havana(+0),TMPRSS2(ENST00000454499.5)-havana(+2)->ERG(ENST00000417133.6)-ensembl_havana(+0),TMPRSS2(ENST00000454499.5)-havana(+2)->ERG(ENST00000442448.5)-ensembl(+0),TMPRSS2(ENST00000458356.5)-havana(+2)->ERG(ENST00000288319.11)-ensembl_havana(+0),TMPRSS2(ENST00000458356.5)-havana(+2)->ERG(ENST00000398905.5)-havana(+0),TMPRSS2(ENST00000458356.5)-havana(+2)->ERG(ENST00000398907.5)-havana(+0),TMPRSS2(ENST00000458356.5)-havana(+2)->ERG(ENST00000398910.5)-havana(+0),TMPRSS2(ENST00000458356.5)-havana(+2)->ERG(ENST00000398911.5)-havana(+0),TMPRSS2(ENST00000458356.5)-havana(+2)->ERG(ENST00000398919.6)-ensembl_havana(+0),TMPRSS2(ENST00000458356.5)-havana(+2)->ERG(ENST00000417133.6)-ensembl_havana(+0),TMPRSS2(ENST00000458356.5)-havana(+2)->ERG(ENST00000442448.5)-ensembl(+0) 50 50 50 50 chr21:38445621/38445622(-)->chr21:41498118/41498119(+):(discordant_mates:14,spanning_paired_1:114,spanning_paired_1_t:4,spanning_paired_2:114,spanning_paired_2_t:4)&chr21:38445621/38445622(-)->chr21:41508080/41508081(+):(spanning_paired_1:33,spanning_paired_2:33)&chr21:38445621/38445622(-)->chr21:41507949/41507950(+):(discordant_mates:8,spanning_paired_1:15,spanning_paired_2:15)&chr21:38474121/38474122(-)->chr21:41498118/41498119(+):(spanning_paired_1:14,spanning_paired_2:14)&chr21:38445621/38445622(-)->chr21:41480475/41480476(+):(discordant_mates:6,spanning_paired_1:2,spanning_paired_2:2)&chr21:38423561/38423562(-)->chr21:41498118/41498119(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:38445621/38445622(-)->chr21:41506444/41506445(+):(discordant_mates:6,spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:38445621/38445622(-)->chr21:41504366/41504367(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38446208/38446209(-)->chr21:41498118/41498119(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38474121/38474122(-)->chr21:41480475/41480476(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38474121/38474122(-)->chr21:41508080/41508081(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38423567/38423568(-)->chr21:41485824/41485825(+):(discordant_mates:4)&chr21:38445482/38445483(-)->chr21:41500346/41500347(+):(discordant_mates:2)&chr21:38474093/38474094(-)->chr21:41507966/41507967(+):(discordant_mates:2) +2 chr21:36338771->chr21:36344707 chr21 36338771 + 0 10 chr21 36344707 - 10 0 5936 valid circular exonic 117 58 38 3 10000 0 2 2 2 0 0 0.8536 0.8591 2.436 0.871 2.1714 19.1829 0.9605 0 0.1021 2.0941 32.6902 0.9725 0 0.0814 0.0789 0.2479 2 50 50 50 50 chr21:36338771/36338772(+)->chr21:36344707/36344708(-):(discordant_mates:6,spanning_paired_1:14,spanning_paired_1_t:23,spanning_paired_2:14,spanning_paired_2_t:23)&chr21:36338841/36338842(+)->chr21:36341534/36341535(-):(spanning_paired_1:1,spanning_paired_2:1) +3 chr21:29329693->chr21:29321220 chr21 29321220 + 10 0 chr21 29329693 - 0 10 8473 valid circular exonic 49 35 15 4 10000 0 4 4 3 0 0 0.8605 0.8018 0 0.6667 8.3531 34.3077 0.9863 0 0.4412 9.4685 14.5897 0.9701 0 0.749 0.2667 0.3571 1.75 50 50 50 50 chr21:29321220/29321221(+)->chr21:29329693/29329694(-):(discordant_mates:4,spanning_paired_1:9,spanning_paired_1_t:1,spanning_paired_2:9,spanning_paired_2_t:1)&chr21:29326058/29326059(+)->chr21:29329693/29329694(-):(spanning_paired_1:4,spanning_paired_2:4)&chr21:29327324/29327325(+)->chr21:29329704/29329705(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:29321216/29321217(+)->chr21:29329647/29329648(-):(discordant_mates:4) +4 chr21:33432871<->chr21:33414887 chr21 33414887 + 5 5 chr21 33432871 - 5 5 17984 valid circular exonic 20 12 6 2 10000 0 3 3 2 1 0 0.8277 0.9167 0 0.75 12.6 50.8 0.7995 0.1045 5.4663 12.8 24.2 0.8072 0.0987 5.4049 0.3333 0.3 1.6667 50 50 50 50 chr21:33414887/33414888(+)->chr21:33432871/33432872(-):(spanning_paired_1:2,spanning_paired_1_t:3,spanning_paired_2:2,spanning_paired_2_t:3)&chr21:33426883/33426884(+)->chr21:33432871/33432872(-):(spanning_paired_1:1,spanning_paired_2:1)&chr21:33414915/33414916(+)->chr21:33421509/33421510(-):(discordant_mates:4) diff --git a/tests/integrate/test_terg_s041_b.out.txt b/tests/integrate/test_terg_s041_b.out.txt index 5adfb7cb..697bede1 100644 --- a/tests/integrate/test_terg_s041_b.out.txt +++ b/tests/integrate/test_terg_s041_b.out.txt @@ -1,6 +1,6 @@ -shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 median-AS-A median-AS-B max-AS-A max-AS-B data-structure -1 chr21:41479719->chr21:38487350 chr21 38487350 - 200 0 chr21 41479719 + 0 200 2992369 valid linear intronic 151 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 chr21:38487350/38487351(-)->chr21:41479719/41479720(+):(discordant_mates:14,spanning_paired_1:41,spanning_paired_1_t:7,spanning_paired_2:41,spanning_paired_2_t:7) -1 chr21:41498118->chr21:38445621 chr21 38445621 - 200 0 chr21 41498118 + 0 200 3052497 valid linear exonic 588 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 chr21:38445621/38445622(-)->chr21:41498118/41498119(+):(discordant_mates:14,spanning_paired_1:114,spanning_paired_1_t:4,spanning_paired_2:114,spanning_paired_2_t:4)&chr21:38445621/38445622(-)->chr21:41508080/41508081(+):(spanning_paired_1:33,spanning_paired_2:33)&chr21:38445621/38445622(-)->chr21:41507949/41507950(+):(discordant_mates:8,spanning_paired_1:15,spanning_paired_2:15)&chr21:38474121/38474122(-)->chr21:41498118/41498119(+):(spanning_paired_1:14,spanning_paired_2:14)&chr21:38445621/38445622(-)->chr21:41480475/41480476(+):(discordant_mates:6,spanning_paired_1:2,spanning_paired_2:2)&chr21:38423561/38423562(-)->chr21:41498118/41498119(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:38445621/38445622(-)->chr21:41506444/41506445(+):(discordant_mates:6,spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:38445621/38445622(-)->chr21:41504366/41504367(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38446208/38446209(-)->chr21:41498118/41498119(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38474121/38474122(-)->chr21:41480475/41480476(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38474121/38474122(-)->chr21:41508080/41508081(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38423567/38423568(-)->chr21:41485824/41485825(+):(discordant_mates:4)&chr21:38445482/38445483(-)->chr21:41500346/41500347(+):(discordant_mates:2)&chr21:38474093/38474094(-)->chr21:41507966/41507967(+):(discordant_mates:2) -2 chr21:36338771->chr21:36344707 chr21 36338771 + 0 10 chr21 36344707 - 10 0 5936 valid circular exonic 117 58 38 3 10000 0 2 2 2 0 0 0.8536 0.8591 2.436 0.871 2.1714 19.1829 0.9605 0 0.1021 2.0941 32.6902 0.9725 0 0.0814 0.0789 0.2479 2 50 50 50 50 chr21:36338771/36338772(+)->chr21:36344707/36344708(-):(discordant_mates:6,spanning_paired_1:14,spanning_paired_1_t:23,spanning_paired_2:14,spanning_paired_2_t:23)&chr21:36338841/36338842(+)->chr21:36341534/36341535(-):(spanning_paired_1:1,spanning_paired_2:1) -3 chr21:29329693->chr21:29321220 chr21 29321220 + 10 0 chr21 29329693 - 0 10 8473 valid circular exonic 49 35 15 4 10000 0 4 4 3 0 0 0.8605 0.8018 0 0.6667 8.3531 34.3077 0.9863 0 0.4412 9.4685 14.5897 0.9701 0 0.749 0.2667 0.3571 1.75 50 50 50 50 chr21:29321220/29321221(+)->chr21:29329693/29329694(-):(discordant_mates:4,spanning_paired_1:9,spanning_paired_1_t:1,spanning_paired_2:9,spanning_paired_2_t:1)&chr21:29326058/29326059(+)->chr21:29329693/29329694(-):(spanning_paired_1:4,spanning_paired_2:4)&chr21:29327324/29327325(+)->chr21:29329704/29329705(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:29321216/29321217(+)->chr21:29329647/29329648(-):(discordant_mates:4) -4 chr21:33432871<->chr21:33414887 chr21 33414887 + 5 5 chr21 33432871 - 5 5 17984 valid circular exonic 20 12 6 2 10000 0 3 3 2 1 0 0.8277 0.9167 0 0.75 12.6 50.8 0.7995 0.1045 5.4663 12.8 24.2 0.8072 0.0987 5.4049 0.3333 0.3 1.6667 50 50 50 50 chr21:33414887/33414888(+)->chr21:33432871/33432872(-):(spanning_paired_1:2,spanning_paired_1_t:3,spanning_paired_2:2,spanning_paired_2_t:3)&chr21:33426883/33426884(+)->chr21:33432871/33432872(-):(spanning_paired_1:1,spanning_paired_2:1)&chr21:33414915/33414916(+)->chr21:33421509/33421510(-):(discordant_mates:4) +shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 splice-motif-edit-distance median-AS-A median-AS-B max-AS-A max-AS-B data-structure +1 chr21:41479719->chr21:38487350 chr21 38487350 - 200 0 chr21 41479719 + 0 200 2992369 valid linear intronic 151 96 48 7 10000 0 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 50 50 50 50 chr21:38487350/38487351(-)->chr21:41479719/41479720(+):(discordant_mates:14,spanning_paired_1:41,spanning_paired_1_t:7,spanning_paired_2:41,spanning_paired_2_t:7) +1 chr21:41498118->chr21:38445621 chr21 38445621 - 200 0 chr21 41498118 + 0 200 3052497 valid linear exonic 588 378 189 21 10000 0 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 50 50 50 50 chr21:38445621/38445622(-)->chr21:41498118/41498119(+):(discordant_mates:14,spanning_paired_1:114,spanning_paired_1_t:4,spanning_paired_2:114,spanning_paired_2_t:4)&chr21:38445621/38445622(-)->chr21:41508080/41508081(+):(spanning_paired_1:33,spanning_paired_2:33)&chr21:38445621/38445622(-)->chr21:41507949/41507950(+):(discordant_mates:8,spanning_paired_1:15,spanning_paired_2:15)&chr21:38474121/38474122(-)->chr21:41498118/41498119(+):(spanning_paired_1:14,spanning_paired_2:14)&chr21:38445621/38445622(-)->chr21:41480475/41480476(+):(discordant_mates:6,spanning_paired_1:2,spanning_paired_2:2)&chr21:38423561/38423562(-)->chr21:41498118/41498119(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:38445621/38445622(-)->chr21:41506444/41506445(+):(discordant_mates:6,spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:38445621/38445622(-)->chr21:41504366/41504367(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38446208/38446209(-)->chr21:41498118/41498119(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38474121/38474122(-)->chr21:41480475/41480476(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38474121/38474122(-)->chr21:41508080/41508081(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38423567/38423568(-)->chr21:41485824/41485825(+):(discordant_mates:4)&chr21:38445482/38445483(-)->chr21:41500346/41500347(+):(discordant_mates:2)&chr21:38474093/38474094(-)->chr21:41507966/41507967(+):(discordant_mates:2) +2 chr21:36338771->chr21:36344707 chr21 36338771 + 0 10 chr21 36344707 - 10 0 5936 valid circular exonic 117 58 38 3 10000 0 2 2 2 0 0 0.8536 0.8591 2.436 0.871 2.1714 19.1829 0.9605 0 0.1021 2.0941 32.6902 0.9725 0 0.0814 0.0789 0.2479 2 50 50 50 50 chr21:36338771/36338772(+)->chr21:36344707/36344708(-):(discordant_mates:6,spanning_paired_1:14,spanning_paired_1_t:23,spanning_paired_2:14,spanning_paired_2_t:23)&chr21:36338841/36338842(+)->chr21:36341534/36341535(-):(spanning_paired_1:1,spanning_paired_2:1) +3 chr21:29329693->chr21:29321220 chr21 29321220 + 10 0 chr21 29329693 - 0 10 8473 valid circular exonic 49 35 15 4 10000 0 4 4 3 0 0 0.8605 0.8018 0 0.6667 8.3531 34.3077 0.9863 0 0.4412 9.4685 14.5897 0.9701 0 0.749 0.2667 0.3571 1.75 50 50 50 50 chr21:29321220/29321221(+)->chr21:29329693/29329694(-):(discordant_mates:4,spanning_paired_1:9,spanning_paired_1_t:1,spanning_paired_2:9,spanning_paired_2_t:1)&chr21:29326058/29326059(+)->chr21:29329693/29329694(-):(spanning_paired_1:4,spanning_paired_2:4)&chr21:29327324/29327325(+)->chr21:29329704/29329705(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:29321216/29321217(+)->chr21:29329647/29329648(-):(discordant_mates:4) +4 chr21:33432871<->chr21:33414887 chr21 33414887 + 5 5 chr21 33432871 - 5 5 17984 valid circular exonic 20 12 6 2 10000 0 3 3 2 1 0 0.8277 0.9167 0 0.75 12.6 50.8 0.7995 0.1045 5.4663 12.8 24.2 0.8072 0.0987 5.4049 0.3333 0.3 1.6667 50 50 50 50 chr21:33414887/33414888(+)->chr21:33432871/33432872(-):(spanning_paired_1:2,spanning_paired_1_t:3,spanning_paired_2:2,spanning_paired_2_t:3)&chr21:33426883/33426884(+)->chr21:33432871/33432872(-):(spanning_paired_1:1,spanning_paired_2:1)&chr21:33414915/33414916(+)->chr21:33421509/33421510(-):(discordant_mates:4) diff --git a/tests/splice_site_motif/test_splice_site_motif_01.dbed b/tests/splice_site_motif/test_splice_site_motif_01.dbed new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/tests/splice_site_motif/test_splice_site_motif_01.dbed @@ -0,0 +1 @@ + diff --git a/tests/splice_site_motif/test_splice_site_motif_01.in.fa b/tests/splice_site_motif/test_splice_site_motif_01.in.fa new file mode 100644 index 00000000..6836280a --- /dev/null +++ b/tests/splice_site_motif/test_splice_site_motif_01.in.fa @@ -0,0 +1,71 @@ +>chr2 +NNNNNNNNNTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTG +TTCTTAAAGGTAAGTTTTTT +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNCAGG +GCCCCAGGATCCTCTATTAA +ATGTGTGGTCCATGAACCAG +CAGCTTCAGCATGACCTGAG +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN diff --git a/tests/splice_site_motif/test_splice_site_motif_01.in.sam b/tests/splice_site_motif/test_splice_site_motif_01.in.sam new file mode 100644 index 00000000..6cd7ef84 --- /dev/null +++ b/tests/splice_site_motif/test_splice_site_motif_01.in.sam @@ -0,0 +1,47 @@ +@HD VN:1.4 SO:coordinate +@SQ SN:chr1 LN:248956422 +@SQ SN:chr2 LN:242193529 +@SQ SN:chr3 LN:198295559 +@SQ SN:chr4 LN:190214555 +@SQ SN:chr5 LN:181538259 +@SQ SN:chr6 LN:170805979 +@SQ SN:chr7 LN:159345973 +@SQ SN:chr8 LN:145138636 +@SQ SN:chr9 LN:138394717 +@SQ SN:chr10 LN:133797422 +@SQ SN:chr11 LN:135086622 +@SQ SN:chr12 LN:133275309 +@SQ SN:chr13 LN:114364328 +@SQ SN:chr14 LN:107043718 +@SQ SN:chr15 LN:101991189 +@SQ SN:chr16 LN:90338345 +@SQ SN:chr17 LN:83257441 +@SQ SN:chr18 LN:80373285 +@SQ SN:chr19 LN:58617616 +@SQ SN:chr20 LN:64444167 +@SQ SN:chr21 LN:46709983 +@SQ SN:chr22 LN:50818468 +@SQ SN:chrX LN:156040895 +@SQ SN:chrY LN:57227415 +@SQ SN:chrM LN:16569 +@CO user command line: STAR --outSAMstrandField intronMotif --outFilterIntronMotifs RemoveNoncanonicalUnannotated --chimSegmentMin 12 --chimJunctionOverhangMin 12 --alignSJDBoverhangMin 10 --alignMatesGapMax 200000 --alignIntronMax 200000 --outSAMtype BAM Unsorted --outSAMunmapped Within --alignEndsType Local --chimOutType WithinBAM --twopassMode Basic --twopass1readsN -1 --quantMode GeneCounts --runThreadN 10 +read_001 163 chr2 10 3 75M = 176 180 GGTGCGCGCACCCACTGACCTGCGCCCACTGTCTGGCACTCCGTAGTGAGATGAACCCGGTACCTCAGATGGAAA ?E;DDEFBDEBCF:CEHGHFH??DEDDFHBFCDDFF?JFEDDDA8F9EDG>EFFGGEBDD7DG88-.G88@@HG? NH:i:2 HI:i:2 AS:i:87 nM:i:0 NM:i:0 +read_002 161 chr2 42 3 75M = 1100 0 CTGGCACTCCGTAGTGAGATGAACCCGGTACCTCAGATGGAAATGCAGAAATCACCCATCTTCTGCGTCACTCAA ?DADDEFBFECCGG?DGFDDGFFBGHGFHEGFDGGFHHFEGFHGGFGGFGGFCEGDFFIHGAHIFGAFHEHFHGE NH:i:2 HI:i:2 AS:i:73 nM:i:0 NM:i:0 +read_003 385 chr2 142 3 48M27S = 1099 0 TCCTATTCGGCCATCTTGGCTCCTCCGCTCTATTTGCAGTTCTTAAAGGGCCCCAGGATCCTCTATTAAATGTGT 9DADFEFFFEGFFFGHGGH9HF78GAGFFFHHHG:GHFHEGEFE.DGGHGCGIHFHGEGFGHFIHIGGHH@GDG? NH:i:2 HI:i:2 AS:i:47 nM:i:0 NM:i:0 +read_004 385 chr2 149 3 41M34S = 1026 0 CGGCCATCTTGGCTCCTCCGCTCTATTTGCAGTTCTTAAAGGGCCCCAGGATCCTCTATTAAATGTGTGGTCCAT ?CDDDEFFFEGAGFEFDF;FH?DGFHGFFEHHAGFGHHHGGGICGIGGEHGDIFFGEEIHGHGKFFGHHHGGGGE NH:i:2 HI:i:2 AS:i:40 nM:i:0 NM:i:0 +read_005 385 chr2 149 3 41M34S = 1026 0 CGGCCATCTTGGCTCCTCCGCTCTATTTGCAGTTCTTAAAGGGCCCCAGGATCCTCTATTAAATGTGTGGTCCAT ?CDDFAFGDEGFFFFHGFFFGCFGBGGGFEHCFGFGEJHEGH?EGFGGHHAEGFGHHHGHEHGGFBGFDHGGEFG NH:i:2 HI:i:2 AS:i:40 nM:i:0 NM:i:0 +read_006 385 chr2 149 3 41M34S = 1026 0 CGGCCATCTTGGCTCCTCCGCTCTATTTGCAGTTCTTAAAGGGCCCCAGGATCCTCTATTAAATGTGTGGTCCAT ?EEDFEFFFEGHFFEGGGHFHGDGEGHFFGHHFGFGGEHIGGIEGIGGDGGGIGFGHFIFGHHGJFFGEHGGHGH NH:i:2 HI:i:2 AS:i:40 nM:i:0 NM:i:0 +read_007 385 chr2 163 3 27M48S = 1210 0 CCTCCGCTCTATTTGCAGTTCTTAAAGGGCCCCAGGATCCTCTATTAAATGTGTGGTCCATGAACCAGCAGCTTC >ACDF?FACDF?F NH:i:2 HI:i:1 AS:i:107 nM:i:0 NM:i:0 +read_006 83 chr2 1026 3 75M = 1000 -101 TGGTCCATGAACCAGCAGCTTCAGCATGACCTGAGAGCTCATAACCTCGTCTCTACAAAAAATACAAAAAAAGTT FGHFGGEGHIHGGGGIHG?IEIFH7GGHFGGHGCFKHHGFGHGDIGGFHEIGHEGGHHHGHHEFGGGGFFFGDDE NH:i:2 HI:i:1 AS:i:107 nM:i:0 NM:i:0 +read_003 83 chr2 1099 3 75M = 1000 -174 TTAGNCAGGCATGGTGGTACGCGCCTGTGGTCTCAGCAACTTGGGAGGCTGAGGTGAGTGGATTGCTTGAACCTG FEHF!GF:FECFGGEGGFHGEEGGGGGDEGGGDHBGFFFFGG=GFGGDHFFGFFGHGHGDHGHGGFGGFGEDDBE NH:i:2 HI:i:1 AS:i:95 nM:i:2 NM:i:0 +read_002 81 chr2 1100 3 75M = 42 0 TAGCCAGGCATGGTGGTACGCGCCTGTGGTCTCAGCAACTTGGGAGGCTGAGGTGAGTGGATTGCTTGAACCTGG FGGHEGHGGGHDFF:GHGEGGIEGGGGHIFGDFGEAHHFGGHGEFGGFHGFGCGGGFHEGFHGEAGGGFFE>DDD NH:i:2 HI:i:1 AS:i:69 nM:i:2 NM:i:0 +read_007 83 chr2 1210 3 75M = 1000 -285 CTCCGGCCTGGGAGATGGAGCCGGACACTGTCTCAAAAGAAAAAAAAAAGAAATGCAGAACCTCAGGCTGTTCCC F9BCFDEHGFCDGFD:DFCGEFFGD90FEDGG@ACGEHchr2:999 chr2 189 - 0 14 chr2 999 + 14 0 810 unclassified linear intronic 19 12 6 1 1019 4 1 1 1 0 0 0.7580 0.7580 0.0000 1.0000 8.0357 16.8929 0.9213 0.0032 1.5165 7.5714 22.0000 0.9311 0.0023 1.3263 0.1667 0.3158 2.0000 0 41 34 75 75 chr2:189/190(-)->chr2:999/1000(+):(discordant_mates:2,spanning_paired_1:5,spanning_paired_1_t:1,spanning_paired_2:5,spanning_paired_2_t:1) diff --git a/tests/splice_site_motif/test_splice_site_motif_02.in.fa b/tests/splice_site_motif/test_splice_site_motif_02.in.fa new file mode 100644 index 00000000..10fce745 --- /dev/null +++ b/tests/splice_site_motif/test_splice_site_motif_02.in.fa @@ -0,0 +1,38 @@ +>chr21 +NNNNNNNNNNTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCCTGNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNACTTACC +TGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAAAAAAAAAAAACTTACCTGAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA diff --git a/tests/splice_site_motif/test_splice_site_motif_02.in.sam b/tests/splice_site_motif/test_splice_site_motif_02.in.sam new file mode 100644 index 00000000..fb7531fb --- /dev/null +++ b/tests/splice_site_motif/test_splice_site_motif_02.in.sam @@ -0,0 +1,57 @@ +@HD VN:1.4 SO:coordinate +@SQ SN:chrM LN:16571 +@SQ SN:chr1 LN:249250621 +@SQ SN:chr2 LN:243199373 +@SQ SN:chr3 LN:198022430 +@SQ SN:chr4 LN:191154276 +@SQ SN:chr5 LN:180915260 +@SQ SN:chr6 LN:171115067 +@SQ SN:chr7 LN:159138663 +@SQ SN:chr8 LN:146364022 +@SQ SN:chr9 LN:141213431 +@SQ SN:chr10 LN:135534747 +@SQ SN:chr11 LN:135006516 +@SQ SN:chr12 LN:133851895 +@SQ SN:chr13 LN:115169878 +@SQ SN:chr14 LN:107349540 +@SQ SN:chr15 LN:102531392 +@SQ SN:chr16 LN:90354753 +@SQ SN:chr17 LN:81195210 +@SQ SN:chr18 LN:78077248 +@SQ SN:chr19 LN:59128983 +@SQ SN:chr20 LN:63025520 +@SQ SN:chr21 LN:48129895 +@SQ SN:chr22 LN:51304566 +@SQ SN:chrX LN:155270560 +@SQ SN:chrY LN:59373566 +@PG ID:STAR PN:STAR VN:STAR_2.4.2a_modified CL:STAR --runThreadN 9 --genomeDir STAR_index_hg19/ --readFilesIn R1_paired.fastq.gz R2_paired.fastq.gz --readFilesCommand zcat --outFileNamePrefix paired/ --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --outFilterIntronMotifs None --alignIntronMax 200000 --alignMatesGapMax 200000 --alignSJDBoverhangMin 10 --alignEndsType Local --chimSegmentMin 12 --chimJunctionOverhangMin 12 --sjdbGTFfile gencode.v19.annotation.gtf --sjdbOverhang 100 --quantMode GeneCounts --twopass1readsN 18446744073709551615 --twopassMode Basic +@CO user command line: STAR --genomeDir STAR_index_hg19/ --sjdbOverhang 100 --sjdbGTFfile gencode.v19.annotation.gtf --quantMode GeneCounts --outSAMstrandField intronMotif --outFilterIntronMotifs None --chimSegmentMin 12 --chimJunctionOverhangMin 12 --alignSJDBoverhangMin 10 --alignMatesGapMax 200000 --alignIntronMax 200000 --outSAMtype BAM SortedByCoordinate --alignEndsType Local --readFilesCommand zcat --twopassMode Basic --twopass1readsN -1 --outFileNamePrefix paired/ --runThreadN 9 --readFilesIn L004_R1_paired.fastq.gz L004_R2_paired.fastq.gz +D00xxx:000:x00x0xxxx:3:1106:8700:84313 99 chr21 11 3 109M = 529 579 CATGTTTGGGGGTGGCATGTGCTTCTCCTCCATGTAGCTGCCGTAGTTCATCCCAACGGTGTCTGGGCTGCCCACCATCTTCCCGCCTTTGGCCACACTGCATTCATCA ?>BGGGGFGGG@CBDGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGB/FEFGCGGGGGF@FGF>F0EFEGDGGFGGGGGGEEGGG.CAGGGG0BCGGG;GGGGGGGGGGG NH:i:2 HI:i:1 AS:i:166 nM:i:0 +D00xxx:000:x00x0xxxx:6:1110:4542:86939 99 chr21 14 3 115M243N11M = 490 576 GTTTGGGGGTGGCATGTGCTTCTCCTCCATGTAGCTGCCGTAGTTCATCCCAACGGTGTCTGGGCTGCCCACCATCTTCCCGCCTTTGGCCACACTGCATTCATCAGGAGAGTTCCTTGAGCCATT <3?@BGGGGGDAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGGGGGGBG@FGGGGGGGGGGGGGG@CGG=GBGD@GBGGEE/EGGGGE NH:i:2 HI:i:1 AS:i:224 nM:i:0 XS:A:- +D00xxx:000:x00x0xxxx:5:1209:11373:21218 99 chr21 58 3 71M243N20M = 495 532 TCATCCCAACGGTGTCTGGGCTGCCCACCATCTTCCCGCCTTTGGCCACACTGCATTCATCAGGAGAGTTCCTTGAGCCATTCACCTGGCT >BBCFFGGGGGGGGGGGGGEGGBGGG@EBCGGGEGGGGGBFGGF1FCGGGGGGGGGGGEGFGGGGGFDFGEGGGCGGGGGGGGFGGGEGGG NH:i:2 HI:i:1 AS:i:184 nM:i:0 XS:A:- +D00xxx:000:x00x0xxxx:4:2205:7737:30184 97 chr21 348 3 125M = 1607 0 AGAAAGGGGCGGAAGTCTCCTTACCTTGAGCCATTCACCTGGCTAGGGTTACATTCCATTTTGATGGTGACCCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTT >ABACGGGGGGGGDGGGGGGGGGGGGGEGGGGGGGGGGGGGDGGGBGGGGGGGGGGGDGGGGGBG>0EFGGGGGGGGEGGBCGADGEGGGEGEGGGGGBBEGBDEGEGGGGAA>>CGGGGGBEGG NH:i:2 HI:i:1 AS:i:123 nM:i:0 +D00xxx:000:x00x0xxxx:1:1314:13313:61767 97 chr21 395 3 126M = 1633 0 GTTACATTCCATTTTGATGGTGACCCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCTGTCCATAGTCGCTGGAGGAGGACGCGGTCATCTCTGTCTT A:@BBGGGGGGGGGGGGGGGGGGGGGGGGGGFGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGCGGFEEGGGGGGGGGGGGGGGGGBGGGGGGGDBGBBGGGGGD0FGGGGGGD NH:i:2 HI:i:1 AS:i:78 nM:i:0 +D00xxx:000:x00x0xxxx:6:1110:4542:86939 147 chr21 490 3 100M25S = 14 -576 CGCTGGAGGAGGACGCGGTCATCTCTGTCTTAGCCAGGTGTGGCGTTCCGTAGGCACACTCAAACAACGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCGCGCTCCAGGCGGCGCTCCC GGGGGGGGGGGDDGGGGGGGGEGED=GGEEGEGEGGGGEGGGGDGGGDGF>GGGEF>GGGEGGGGGGGGGGGGGGGGCGGGEGFGEEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG@ABBB NH:i:2 HI:i:1 AS:i:224 nM:i:0 XS:A:- +D00xxx:000:x00x0xxxx:2:2102:3982:10406 147 chr21 490 3 100M26S = 465 -125 CGCTGGAGGAGGACGCGGTCATCTCTGTCTTAGCCAGGTGTGGCGTTCCGTAGGCACACTCAAACAACGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCGCGCTCCAGGCGGCGCTCCCC GGGGGGGGGGGGGGGGBGGGGGGGGGGGGGBGGGEGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBCBBB NH:i:2 HI:i:1 AS:i:221 nM:i:0 +D00xxx:000:x00x0xxxx:5:1209:11373:21218 147 chr21 495 3 95M31S = 58 -532 GAGGAGGACGCGGTCATCTCTGTCTTAGCCAGGTGTGGCGTTCCGTAGGCACACTCAAACAACGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCGCGCTCCAGGCGGCGCTCCCCGCCCC GGGGGA:GGGGGGGGGGGDEBGG@EBGEEGGGGGGGGGGGDBGGGGGGEGGGGE=GGGGGGGGGGGGGE=EGGGGGGD@D0=GGED;GEEGGGGGGFGGGGGGGEGEGGGEBB@GGGG@BBBB NH:i:2 HI:i:1 AS:i:184 nM:i:0 XS:A:- +D00xxx:000:x00x0xxxx:2:1105:9810:99004 147 chr21 495 3 95M31S = 424 -166 GAGGAGGACGCGGTCATCTCTGTCTTAGCCAGGTGTGGCGTTCCGTAGGCACACTCAAACAACGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCGCGCTCCAGGCGGCGCTCCCCGCCCC GGGGGGGGGGGGGGEGEGGGGGEGGGEGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGCCCCC NH:i:2 HI:i:1 AS:i:216 nM:i:0 +D00xxx:000:x00x0xxxx:3:1106:8700:84313 147 chr21 529 3 61M65S = 11 -579 GTGGCGTTCCGTAGGCACACTCAAACAACGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCGCGCTCCAGGCGGCGCTCCCCGCCCCTCGCCCTCCGCCTCCGCCTCCGCCTCCTGCTTAG GGGG>.B>GGGGEGGGGGGGGGGGGGGGGEGGEGGGEEDC=G@G@D@@GEGGE=EGGD@CDGDGAGGGGGBGGGGGGGGGGGGGGGDGGGGGGGGGBGGGGGGG@FGFCD@GGGGGGFGGGA0BBA NH:i:2 HI:i:1 AS:i:166 nM:i:0 +D00xxx:000:x00x0xxxx:7:2203:2111:6137 256 chr21 557 3 33M50S * 0 0 CGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCGCGCCGCGCTCCTCACACCCGCTTTCAACTCCGGGCGGGGCAGGG BBBBGGGGGGGGGGGGBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBDGGEGGGGGGGGGGGGGGGGG NH:i:2 HI:i:1 AS:i:32 nM:i:0 +D00xxx:000:x00x0xxxx:1:2103:12116:101378 256 chr21 557 3 33M50S * 0 0 CGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCGCGCCGCGCTCCTCACACCCGCTTTCAACTCCGGGCGGGGCAGGG ?@BBGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGBFBFGCGGGGGGGGGGGGGGGGG NH:i:2 HI:i:1 AS:i:32 nM:i:0 +D00xxx:000:x00x0xxxx:7:2203:2111:6137 0 chr21 1600 3 33S50M * 0 0 CGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCGCGCCGCGCTCCTCACACCCGCTTTCAACTCCGGGCGGGGCAGGG BBBBGGGGGGGGGGGGBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBDGGEGGGGGGGGGGGGGGGGG NH:i:2 HI:i:2 AS:i:47 nM:i:1 +D00xxx:000:x00x0xxxx:1:2103:12116:101378 0 chr21 1600 3 33S50M * 0 0 CGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCGCGCCGCGCTCCTCACACCCGCTTTCAACTCCGGGCGGGGCAGGG ?@BBGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGBFBFGCGGGGGGGGGGGGGGGGG NH:i:2 HI:i:2 AS:i:47 nM:i:1 +D00xxx:000:x00x0xxxx:3:2102:7384:25660 145 chr21 1600 3 95M = 483 0 CTGCCGCGCCGCGCTCCTCACACCCGCTTTCAACTCCGGGCGGGGCAGGGGGCATCGGCGGGTCCCAGGCGCCCAGGTTCCCCTCCCCAGCCCGG C6GGGGAD:ADDGCC.C=GC>C?ADDGGGFE<.C8GGG@GCBFGGGDGGFGGBB/FGGGGGGBEGGGGGGGGGGGBBCCB NH:i:2 HI:i:2 AS:i:91 nM:i:1 +D00xxx:000:x00x0xxxx:4:2205:7737:30184 145 chr21 1607 3 93M = 348 0 GCCGCGCTCCTCACACCCGCTTTCAACTCCGGGCGGGGCAGGGGGCATCGGCGGGTCCCAGGCGCCCAGGTTCCCCTCCCCAGCCCGGACCCC AAGGGEGGGBGGAG?>A;GGGGEEBDC>GGGAGGGD6:GGGGGC>:DGDGGGGBE@DA><;GGGDDADDGGCDA@GGGGDGGGGGBGGGGGGDGGGGGGDGGGEEGFGGGGGGGGEGFGGGGGGGGGCBBCC NH:i:2 HI:i:2 AS:i:118 nM:i:0 +D00xxx:000:x00x0xxxx:3:1106:8700:84313 401 chr21 1731 3 61S65M = 11 0 GTGGCGTTCCGTAGGCACACTCAAACAACGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCGCGCTCCAGGCGGCGCTCCCCGCCCCTCGCCCTCCGCCTCCGCCTCCGCCTCCTGCTTAG GGGG>.B>GGGGEGGGGGGGGGGGGGGGGEGGEGGGEEDC=G@G@D@@GEGGE=EGGD@CDGDGAGGGGGBGGGGGGGGGGGGGGGDGGGGGGGGGBGGGGGGG@FGFCD@GGGGGGFGGGA0BBA NH:i:2 HI:i:2 AS:i:63 nM:i:0 +D00xxx:000:x00x0xxxx:6:1110:4542:86939 401 chr21 1731 3 100S25M = 14 0 CGCTGGAGGAGGACGCGGTCATCTCTGTCTTAGCCAGGTGTGGCGTTCCGTAGGCACACTCAAACAACGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCGCGCTCCAGGCGGCGCTCCC GGGGGGGGGGGDDGGGGGGGGEGED=GGEEGEGEGGGGEGGGGDGGGDGF>GGGEF>GGGEGGGGGGGGGGGGGGGGCGGGEGFGEEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG@ABBB NH:i:2 HI:i:2 AS:i:24 nM:i:0 +D00xxx:000:x00x0xxxx:5:1209:11373:21218 401 chr21 1731 3 95S31M = 58 0 GAGGAGGACGCGGTCATCTCTGTCTTAGCCAGGTGTGGCGTTCCGTAGGCACACTCAAACAACGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCGCGCTCCAGGCGGCGCTCCCCGCCCC GGGGGA:GGGGGGGGGGGDEBGG@EBGEEGGGGGGGGGGGDBGGGGGGEGGGGE=GGGGGGGGGGGGGE=EGGGGGGD@D0=GGED;GEEGGGGGGFGGGGGGGEGEGGGEBB@GGGG@BBBB NH:i:2 HI:i:2 AS:i:30 nM:i:0 +D00xxx:000:x00x0xxxx:2:2102:3982:10406 401 chr21 1731 3 100S26M = 465 0 CGCTGGAGGAGGACGCGGTCATCTCTGTCTTAGCCAGGTGTGGCGTTCCGTAGGCACACTCAAACAACGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCGCGCTCCAGGCGGCGCTCCCC GGGGGGGGGGGGGGGGBGGGGGGGGGGGGGBGGGEGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBCBBB NH:i:2 HI:i:2 AS:i:25 nM:i:0 +D00xxx:000:x00x0xxxx:2:1105:9810:99004 401 chr21 1731 3 95S31M = 424 0 GAGGAGGACGCGGTCATCTCTGTCTTAGCCAGGTGTGGCGTTCCGTAGGCACACTCAAACAACGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCGCGCTCCAGGCGGCGCTCCCCGCCCC GGGGGGGGGGGGGGEGEGGGGGEGGGEGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGCCCCC NH:i:2 HI:i:2 AS:i:30 nM:i:0 diff --git a/tests/splice_site_motif/test_splice_site_motif_02.out.dbed b/tests/splice_site_motif/test_splice_site_motif_02.out.dbed new file mode 100644 index 00000000..aabf22ef --- /dev/null +++ b/tests/splice_site_motif/test_splice_site_motif_02.out.dbed @@ -0,0 +1,2 @@ +shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-B-acceptor pos-B-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge full-gene-dysregulation frameshift=0 frameshift=+1 frameshift=+2 splice-motif-edit-distance median-AS-A median-AS-B max-AS-A max-AS-B data-structure +1 chr21:1730->chr21:589 chr21 589 - 10 0 chr21 1730 + 0 10 1141 unclassified linear intronic 24 14 7 5 2387 4 2 1 2 0 0 0.8277 0.9070 0.0000 0.9398 8.3000 73.6000 0.7947 0.1082 3.6601 8.5000 18.6000 0.8063 0.0993 3.5995 0.7143 0.2917 1.5000 0 95 50 126 120 chr21:589/590(-)->chr21:1730/1731(+):(spanning_paired_1:5,spanning_paired_2:5)&chr21:589/590(-)->chr21:1599/1600(+):(discordant_mates:10,spanning_singleton_1_r:2,spanning_singleton_2_r:2) diff --git a/tests/test_chim_overhang.py b/tests/test_chim_overhang.py index d9609133..dbf1a9f6 100755 --- a/tests/test_chim_overhang.py +++ b/tests/test_chim_overhang.py @@ -94,7 +94,7 @@ def test_01(self): # Step 04: dr-disco integrate cl = DetectOutput(drdisco_classify) - cl.integrate(drdisco_integrate, None) + cl.integrate(drdisco_integrate, None, None) self.assertTrue(filecmp.cmp(drdisco_integrate_test, drdisco_integrate), msg="diff '" + drdisco_integrate_test + "' '" + drdisco_integrate + "':\n" + subprocess.Popen(['diff', drdisco_integrate_test, drdisco_integrate], stdout=subprocess.PIPE).stdout.read()) @@ -133,7 +133,7 @@ def test_02(self): # Step 04: dr-disco integrate cl = DetectOutput(drdisco_classify) - cl.integrate(drdisco_integrate, None) + cl.integrate(drdisco_integrate, None, None) self.assertTrue(filecmp.cmp(drdisco_integrate_test, drdisco_integrate), msg="diff '" + drdisco_integrate_test + "' '" + drdisco_integrate + "':\n" + subprocess.Popen(['diff', drdisco_integrate_test, drdisco_integrate], stdout=subprocess.PIPE).stdout.read()) @@ -172,7 +172,7 @@ def test_03(self): # Step 04: dr-disco integrate cl = DetectOutput(drdisco_classify) - cl.integrate(drdisco_integrate, None) + cl.integrate(drdisco_integrate, None, None) self.assertTrue(filecmp.cmp(drdisco_integrate_test, drdisco_integrate), msg="diff '" + drdisco_integrate_test + "' '" + drdisco_integrate + "':\n" + subprocess.Popen(['diff', drdisco_integrate_test, drdisco_integrate], stdout=subprocess.PIPE).stdout.read()) diff --git a/tests/test_drdisco_integrate.py b/tests/test_drdisco_integrate.py index b3e82165..9db92021 100755 --- a/tests/test_drdisco_integrate.py +++ b/tests/test_drdisco_integrate.py @@ -51,7 +51,7 @@ def test_s041(self): for gtf_file in gtf_files: cl = DetectOutput(input_file) - cl.integrate(output_file, gtf_file) + cl.integrate(output_file, gtf_file, None) self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read()) @@ -64,7 +64,7 @@ def test_s041_nocrash(self): output_file = T_TEST_DIR + "test_" + test_id + ".out.txt" cl = DetectOutput(input_file) - cl.integrate(output_file, gtf_file) + cl.integrate(output_file, gtf_file, None) self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read()) @@ -77,7 +77,7 @@ def test_s041_no_gtf(self): output_file = T_TEST_DIR + "test_" + test_id + ".out.txt" cl = DetectOutput(input_file) - cl.integrate(output_file, gtf_file) + cl.integrate(output_file, gtf_file, None) self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read()) @@ -94,7 +94,7 @@ def test_in_frame_non_hybrid_protein(self): for gtf_file in gtf_files: cl = DetectOutput(input_file) - cl.integrate(output_file, gtf_file) + cl.integrate(output_file, gtf_file, None) self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read()) diff --git a/tests/test_functional.py b/tests/test_functional.py index 21967536..bc80e26c 100755 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -254,5 +254,181 @@ def test_02_s041_no_gtf(self): self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read()) +class TestFrameShiftPrediction(unittest.TestCase): + def __get_temp_dirs(self): + TEST_DIR = "tests/integrate/" + T_TEST_DIR = "tmp/" + TEST_DIR + + if not os.path.exists(T_TEST_DIR): + os.makedirs(T_TEST_DIR) + + return TEST_DIR, T_TEST_DIR + + def test_01(self): # example of in-frame fusion - strands are RNA strand + TEST_DIR, T_TEST_DIR = self.__get_temp_dirs() + + test_id = 'frameshift-prediction_01' + + # both do have their DNA strand at minus!! : + # + # <=(-)=| acceptor in negative strand at RNA + # =====(+)=====>| donor in positive strand at RNA + + # donor acceptor + # fusions = chr1', 1035203, '+'], ['chr1', 999610, '-']) + # 1', 1035203, '+'], ['1', 999610, '-'])] # strands are at RNA level, and gene order is DONOR, ACCEPTOR + + input_file = TEST_DIR + "test_" + test_id + ".in.dbed" + test_file = TEST_DIR + "test_" + test_id + ".out.txt" + output_file = T_TEST_DIR + "test_" + test_id + ".out.txt" + + gtf_files = [TEST_DIR + 'frameshift_example.gtf', TEST_DIR + 'frameshift_example.no_chr_prefix.gtf'] + for gtf_file in gtf_files: + command = ["bin/dr-disco", + "integrate", + "--gtf", gtf_file, + input_file, + output_file] + + self.assertEqual(subprocess.call(command), 0, msg=" ".join([str(x) for x in command])) + + self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read()) + + # must statisfy: + # self.assertEqual(str(frameshift_annotation[0]), "[(('AGRN(ENST00000620552.4)-ensembl', 0), ('HES4(ENST00000304952.10)-ensembl_havana', 0))]") + # self.assertEqual(len(frameshift_annotation[1]), 0) + # self.assertEqual(len(frameshift_annotation[2]), 0) + + def test_01_complementary(self): + TEST_DIR, T_TEST_DIR = self.__get_temp_dirs() + + test_id = 'frameshift-prediction_01-complementary' + + input_file = TEST_DIR + "test_" + test_id + ".in.dbed" + test_file = TEST_DIR + "test_" + test_id + ".out.txt" + output_file = T_TEST_DIR + "test_" + test_id + ".out.txt" + + gtf_files = [TEST_DIR + 'frameshift_example.gtf', TEST_DIR + 'frameshift_example.no_chr_prefix.gtf'] + for gtf_file in gtf_files: + command = ["bin/dr-disco", + "integrate", + "--gtf", gtf_file, + input_file, + output_file] + + self.assertEqual(subprocess.call(command), 0, msg=" ".join([str(x) for x in command])) + + self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read()) + + def test_02(self): # 0, +2 + TEST_DIR, T_TEST_DIR = self.__get_temp_dirs() + + test_id = 'frameshift-prediction_02' + + # fusions = [(['chr1', 1035203, '+'], ['chr1', 999020, '-']), (['1', 1035203, '+'], ['1', 999020, '-'])] # (from), (to) and strands are at RNA level! + input_file = TEST_DIR + "test_" + test_id + ".in.dbed" + test_file = TEST_DIR + "test_" + test_id + ".out.txt" + output_file = T_TEST_DIR + "test_" + test_id + ".out.txt" + + gtf_files = [TEST_DIR + 'frameshift_example.gtf', TEST_DIR + 'frameshift_example.no_chr_prefix.gtf'] + for gtf_file in gtf_files: + command = ["bin/dr-disco", + "integrate", + "--gtf", gtf_file, + input_file, + output_file] + + self.assertEqual(subprocess.call(command), 0, msg=" ".join([str(x) for x in command])) + + self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read()) + + # must statisfy: + # self.assertEqual(len(frameshift_annotation[0]), 0) + # self.assertEqual(len(frameshift_annotation[1]), 0) + # self.assertEqual(str(frameshift_annotation[2]), "[(('AGRN(ENST00000620552.4)-ensembl', 0), ('HES4(ENST00000304952.10)-ensembl_havana', 2))]") + + def test_03(self): # +1, +2 -> 0 + TEST_DIR, T_TEST_DIR = self.__get_temp_dirs() + + test_id = 'frameshift-prediction_03' + + # fusions = [(['chr1', 1040604, '+'], ['chr1', 999020, '-']), (['1', 1040604, '+'], ['1', 999020, '-'])] + + input_file = TEST_DIR + "test_" + test_id + ".in.dbed" + test_file = TEST_DIR + "test_" + test_id + ".out.txt" + output_file = T_TEST_DIR + "test_" + test_id + ".out.txt" + + gtf_files = [TEST_DIR + 'frameshift_example.gtf', TEST_DIR + 'frameshift_example.no_chr_prefix.gtf'] + for gtf_file in gtf_files: + command = ["bin/dr-disco", + "integrate", + "--gtf", gtf_file, + input_file, + output_file] + + self.assertEqual(subprocess.call(command), 0, msg=" ".join([str(x) for x in command])) + + self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read()) + + # must statisfy: + # self.assertEqual(str(frameshift_annotation[0]), "[(('AGRN(ENST00000620552.4)-ensembl', 1), ('HES4(ENST00000304952.10)-ensembl_havana', 2))]") + # self.assertEqual(len(frameshift_annotation[1]), 0) + # self.assertEqual(len(frameshift_annotation[2]), 0) + + +class TestFunctional_integrate_splice_site_motif(unittest.TestCase): + def __get_temp_dirs(self): + TEST_DIR = "tests/splice_site_motif/" + T_TEST_DIR = "tmp/" + TEST_DIR + + if not os.path.exists(T_TEST_DIR): + os.makedirs(T_TEST_DIR) + + return TEST_DIR, T_TEST_DIR + + def test_sj_01(self): + TEST_DIR, T_TEST_DIR = self.__get_temp_dirs() + + test_id = 'splice_site_motif_01' + + input_sam = TEST_DIR + "test_" + test_id + ".in.sam" + input_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam" + input_file = T_TEST_DIR + "test_" + test_id + ".dbed" + + # gtf_file = None + fasta_file = TEST_DIR + "test_" + test_id + ".in.fa" + + output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed" + test_file = TEST_DIR + "test_" + test_id + ".out.dbed" + + # sam -> fixed bam + command = ["bin/dr-disco", + "fix", + input_sam, + input_bam] + + self.assertEqual(subprocess.call(command), 0, msg=" ".join([str(x) for x in command])) + + # fixed bam -> dr-disco detect + command = ["bin/dr-disco", + "detect", + "-m", "0", + input_bam, + input_file] + + self.assertEqual(subprocess.call(command), 0, msg=" ".join([str(x) for x in command])) + + # dr-disco-detect (skip classify) -> dr-disco integrate + command = ["bin/dr-disco", + "integrate", + "--fasta", fasta_file, + input_file, + output_file] + + self.assertEqual(subprocess.call(command), 0, msg=" ".join([str(x) for x in command])) + + self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read()) + + if __name__ == '__main__': main() diff --git a/tests/test_splice_site_motif.py b/tests/test_splice_site_motif.py new file mode 100755 index 00000000..42738f53 --- /dev/null +++ b/tests/test_splice_site_motif.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python +# *- coding: utf-8 -*- +# vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 textwidth=79: + +""" +Dr. Disco - testing fix-chimeric + +[License: GNU General Public License v3 (GPLv3)] + + This file is part of Dr. Disco. + + FuMa is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Dr. Disco is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" + + +from drdisco.IntronDecomposition import IntronDecomposition + +import unittest +import filecmp +import os +import subprocess +from drdisco.DetectOutput import DetectOutput +from utils import main, sam_to_fixed_bam + + +TEST_DIR = "tests/splice_site_motif/" +T_TEST_DIR = "tmp/" + TEST_DIR + + +# Nosetests doesn't use main() +if not os.path.exists(T_TEST_DIR): + os.makedirs(T_TEST_DIR) + + +class TestIntronicBreakDetection(unittest.TestCase): + def test_sj_01(self): + test_id = 'splice_site_motif_01' + + input_sam = TEST_DIR + "test_" + test_id + ".in.sam" + input_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam" + input_file = T_TEST_DIR + "test_" + test_id + ".dbed" + + gtf_file = None + fasta_file = TEST_DIR + "test_" + test_id + ".in.fa" + + output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed" + test_file = TEST_DIR + "test_" + test_id + ".out.dbed" + + # sam -> fixed bam + sam_to_fixed_bam(input_sam, input_bam, T_TEST_DIR) + + # fixed bam -> dr-disco detect + ic = IntronDecomposition(input_bam) + ic.decompose(0) + fh = open(input_file, "w") + ic.export(fh) + fh.close() + + # dr-disco-detect (skip classify) -> dr-disco integrate + cl = DetectOutput(input_file) + cl.integrate(output_file, gtf_file, fasta_file) + + self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read()) + + def test_sj_02(self): + test_id = 'splice_site_motif_02' + + input_sam = TEST_DIR + "test_" + test_id + ".in.sam" + input_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam" + input_file = T_TEST_DIR + "test_" + test_id + ".dbed" + + gtf_file = None + fasta_file = TEST_DIR + "test_" + test_id + ".in.fa" + + output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed" + test_file = TEST_DIR + "test_" + test_id + ".out.dbed" + + # sam -> fixed bam + sam_to_fixed_bam(input_sam, input_bam, T_TEST_DIR) + + # fixed bam -> dr-disco detect + ic = IntronDecomposition(input_bam) + ic.decompose(0) + fh = open(input_file, "w") + ic.export(fh) + fh.close() + + # dr-disco-detect (skip classify) -> dr-disco integrate + cl = DetectOutput(input_file) + cl.integrate(output_file, gtf_file, fasta_file) + + self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read()) + + +if __name__ == '__main__': + main()