Skip to content

Commit

Permalink
Merge pull request #109 from yhoogstrate/additional_filter_intercept
Browse files Browse the repository at this point in the history
adding options specifically for FFPE data
  • Loading branch information
yhoogstrate authored Feb 2, 2018
2 parents 912d77a + a79df83 commit 0eb122b
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 55 deletions.
5 changes: 5 additions & 0 deletions Changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
2018-02-01 Youri Hoogstrate v0.15.2
* Makes mismatches per base classification more stringent
unless `--ffpe` is used. Using `--ffpe` the old behaviour
is preserved.

2018-01-25 Youri Hoogstrate v0.15.1
* Added filter lr/intercept

Expand Down
5 changes: 3 additions & 2 deletions bin/dr-disco
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ def CLI_logo_sequence(region, fasta_input_file, offset_negative, offset_positive
@click.option('--blacklist-regions', help="Blacklist these regions (BED file)")
@click.option('--blacklist-junctions', help="Blacklist these region-to-region junctions (custom format, see files in ./share/)")
@click.option('--min-chim-overhang', default=50, help="Minimum alignment length on each side of the junction. May need to be set to smaller values for read lengths smaller than 75bp. Larger values are more stringent. [default=50]")
def CLI_classify(table_input_file, table_output_file, only_valid, blacklist_regions, blacklist_junctions, min_chim_overhang):
@click.option('--ffpe', is_flag=True, default=False, help="Lowers the threshold for the relative amount of mismatches, as often found in FFPE material. Note that enabling this option will consequently result in more false positives.")
def CLI_classify(table_input_file, table_output_file, only_valid, blacklist_regions, blacklist_junctions, min_chim_overhang, ffpe):
blacklist = Blacklist()

if blacklist_junctions:
Expand All @@ -133,7 +134,7 @@ def CLI_classify(table_input_file, table_output_file, only_valid, blacklist_regi
blacklist.add_regions_from_bed(blacklist_regions)

cl = DetectOutput(table_input_file)
cl.classify(table_output_file, only_valid, blacklist, min_chim_overhang)
cl.classify(table_output_file, only_valid, blacklist, min_chim_overhang, ffpe)


@CLI.command(name='integrate', short_help='Maps junctions back together that are likely to correspond to the same fusion event.')
Expand Down
18 changes: 16 additions & 2 deletions drdisco/DetectOutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ def __iter__(self):
else:
header = False

def classify(self, output_file, only_valid, blacklist, min_chim_overhang):
def classify(self, output_file, only_valid, blacklist, min_chim_overhang, ffpe_mismatch_ratio):
log.info("Loading " + output_file + "[only_valid=" + {True: 'true', False: 'false'}[only_valid] + "]")
n = 0
k = 0
Expand Down Expand Up @@ -410,7 +410,21 @@ def classify_intronic_exonic():
status.append("log_ratio_rvalue=" + str(round(log_ratio_rvalue, 2)) + ">" + str(round(log_ratio_rvalue_max, 2)))

# @todo subfunc
log_value_max = -6.4 - ((e.score + 6750.0) / (4000.0 - (e.score + 6750.0)))
# FFPE material seems to have a substantial higher amount of mismatches per base, though randomly distributed
# if we ever make a v2 of dr-disco that incorporates the concordant reads, this variable can be dertemined with some kind of calibration
# now we're only estimating the MM ratio without entropy per position
# [CGCGCTATATCTCGATCGCCCTTAGAGATCCTTTCGAGAGAGCTCTAGAGCG] SOME KIND OF REFERENCE SEQUENCE
# CGCG*TATAT*TC TTTC*AGAGAGCT*TAG The more randomly dispersed mismatches are more trustworthy (right side example)
# CGCG*TATAT*TCGAT TTCGAGAG*GCTCT
# GCG*TATAT*TCG T*CGAGAGAG*TCTA
# GCG*TATAT*TCGA TTCG*GAGAGCTCTA
# CG*TATAT*TCGAT TTCGAG*GAGCTCTAG
# G*TATAT*TCG TCGA*AGA*CTCT
#
if ffpe_mismatch_ratio:
log_value_max = -6.4 - ((e.score + 6750.0) / (4000.0 - (e.score + 6750.0)))
else:
log_value_max = -4.7
log_value = math.log((float(e.mismatches) + 0.0000001) / float(e.alignment_score))
if log_value >= log_value_max:
status.append("many_muts=" + str(round(log_value, 2)) + ">" + str(round(log_value_max, 2)))
Expand Down
2 changes: 1 addition & 1 deletion drdisco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import logging
import sys

__version_info__ = ('0', '15', '1')
__version_info__ = ('0', '15', '2')
__version__ = '.'.join(__version_info__) if (len(__version_info__) == 3) else '.'.join(__version_info__[0:3]) + "-" + __version_info__[3]
__author__ = 'Youri Hoogstrate'
__homepage__ = 'https://github.com/yhoogstrate/dr-disco'
Expand Down
6 changes: 3 additions & 3 deletions tests/test_chim_overhang.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def test_01(self):

# Step 03: dr-disco classify
cl = DetectOutput(drdisco_detect)
cl.classify(drdisco_classify, False, Blacklist(), 25)
cl.classify(drdisco_classify, False, Blacklist(), 25, True)

self.assertTrue(filecmp.cmp(drdisco_classify_test, drdisco_classify), msg="diff '" + drdisco_classify_test + "' '" + drdisco_classify + "':\n" + subprocess.Popen(['diff', drdisco_classify_test, drdisco_classify], stdout=subprocess.PIPE).stdout.read())

Expand Down Expand Up @@ -127,7 +127,7 @@ def test_02(self):

# Step 03: dr-disco classify
cl = DetectOutput(drdisco_detect)
cl.classify(drdisco_classify, False, Blacklist(), 25)
cl.classify(drdisco_classify, False, Blacklist(), 25, True)

self.assertTrue(filecmp.cmp(drdisco_classify_test, drdisco_classify), msg="diff '" + drdisco_classify_test + "' '" + drdisco_classify + "':\n" + subprocess.Popen(['diff', drdisco_classify_test, drdisco_classify], stdout=subprocess.PIPE).stdout.read())

Expand Down Expand Up @@ -166,7 +166,7 @@ def test_03(self):

# Step 03: dr-disco classify
cl = DetectOutput(drdisco_detect)
cl.classify(drdisco_classify, False, Blacklist(), 25)
cl.classify(drdisco_classify, False, Blacklist(), 25, True)

self.assertTrue(filecmp.cmp(drdisco_classify_test, drdisco_classify), msg="diff '" + drdisco_classify_test + "' '" + drdisco_classify + "':\n" + subprocess.Popen(['diff', drdisco_classify_test, drdisco_classify], stdout=subprocess.PIPE).stdout.read())

Expand Down
Loading

0 comments on commit 0eb122b

Please sign in to comment.