Skip to content

Commit 0e30133

Browse files
committed
Add tests for is_proper_pair() and make it a static method (like in develop). plotEnrichment now uses the same method.
1 parent 650d06a commit 0e30133

File tree

4 files changed

+64
-46
lines changed

4 files changed

+64
-46
lines changed

deeptools/countReadsPerBin.py

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,67 @@ def get_coverage_of_region(self, bamHandle, chrom, regions,
610610
def getReadLength(self, read):
611611
return len(read)
612612

613+
@staticmethod
614+
def is_proper_pair(read, maxPairedFragmentLength):
615+
"""
616+
Checks if a read is proper pair meaning that both mates are facing each other and are in
617+
the same chromosome and are not to far away. The sam flag for proper pair can not
618+
always be trusted.
619+
:return: bool
620+
621+
>>> import pysam
622+
>>> import os
623+
>>> from deeptools.countReadsPerBin import CountReadsPerBin as cr
624+
>>> root = os.path.dirname(os.path.abspath(__file__)) + "/test/test_data/"
625+
>>> bam = pysam.AlignmentFile("{}/test_proper_pair_filtering.bam".format(root))
626+
>>> iter = bam.fetch()
627+
>>> read = next(iter)
628+
>>> cr.is_proper_pair(read, 1000) # "keep" read
629+
True
630+
>>> cr.is_proper_pair(read, 200) # "keep" read, but maxPairedFragmentLength is too short
631+
False
632+
>>> read = next(iter)
633+
>>> cr.is_proper_pair(read, 1000) # "improper pair"
634+
False
635+
>>> read = next(iter)
636+
>>> cr.is_proper_pair(read, 1000) # "mismatch chr"
637+
False
638+
>>> read = next(iter)
639+
>>> cr.is_proper_pair(read, 1000) # "same orientation1"
640+
False
641+
>>> read = next(iter)
642+
>>> cr.is_proper_pair(read, 1000) # "same orientation2"
643+
False
644+
>>> read = next(iter)
645+
>>> cr.is_proper_pair(read, 1000) # "rev first"
646+
False
647+
>>> read = next(iter)
648+
>>> cr.is_proper_pair(read, 1000) # "rev first OK"
649+
True
650+
>>> read = next(iter)
651+
>>> cr.is_proper_pair(read, 1000) # "for first"
652+
False
653+
>>> read = next(iter)
654+
>>> cr.is_proper_pair(read, 1000) # "for first"
655+
True
656+
"""
657+
if not read.is_proper_pair:
658+
return False
659+
if read.reference_id != read.next_reference_id:
660+
return False
661+
if not maxPairedFragmentLength > abs(read.template_length) > 0:
662+
return False
663+
# check that the mates face each other (inward)
664+
if read.is_reverse is read.mate_is_reverse:
665+
return False
666+
if read.is_reverse:
667+
if read.reference_start >= read.next_reference_start:
668+
return True
669+
else:
670+
if read.reference_start <= read.next_reference_start:
671+
return True
672+
return False
673+
613674
def get_fragment_from_read(self, read):
614675
"""Get read start and end position of a read.
615676
If given, the reads are extended as follows:
@@ -689,29 +750,6 @@ def get_fragment_from_read(self, read):
689750
>>> c.defaultFragmentLength = 200
690751
>>> assert(c.get_fragment_from_read(test.getRead("single-reverse")) == [(5001618, 5001654)])
691752
"""
692-
def is_proper_pair():
693-
"""
694-
Checks if a read is proper pair meaning that both mates are facing each other and are in
695-
the same chromosome and are not to far away. The sam flag for proper pair can not
696-
always be trusted.
697-
:return: bool
698-
"""
699-
if not read.is_proper_pair:
700-
return False
701-
if read.reference_id != read.next_reference_id:
702-
return False
703-
if not self.maxPairedFragmentLength > abs(read.template_length) > 0:
704-
return False
705-
# check that the mates face each other (inward)
706-
if read.is_reverse is read.mate_is_reverse:
707-
return False
708-
if read.is_reverse:
709-
if read.reference_start >= read.next_reference_start:
710-
return True
711-
else:
712-
if read.reference_start <= read.next_reference_start:
713-
return True
714-
return False
715753
# if no extension is needed, use pysam get_blocks
716754
# to identify start and end reference positions.
717755
# get_blocks return a list of start and end positions
@@ -723,8 +761,7 @@ def is_proper_pair():
723761
return read.get_blocks()
724762

725763
else:
726-
if is_proper_pair():
727-
764+
if self.is_proper_pair(read, self.maxPairedFragmentLength):
728765
if read.is_reverse:
729766
fragmentStart = read.next_reference_start
730767
fragmentEnd = read.reference_end

deeptools/plotEnrichment.py

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from deeptools.utilities import getCommonChrNames, mungeChromosome
1717
from deeptools.bamHandler import openBam
1818
from deeptoolsintervals import Enrichment, GTF
19+
from deeptools.countReadsPerBin import CountReadsPerBin as cr
1920

2021

2122
old_settings = np.seterr(all='ignore')
@@ -166,34 +167,14 @@ def getBAMBlocks(read, defaultFragmentLength, centerRead):
166167
"""
167168
This is basically get_fragment_from_read from countReadsPerBin
168169
"""
169-
def is_proper_pair():
170-
"""
171-
Checks if a read is proper pair meaning that both mates are facing each other and are in
172-
the same chromosome and are not to far away. The sam flag for proper pair can not
173-
always be trusted.
174-
:return: bool
175-
"""
176-
if not read.is_proper_pair:
177-
return False
178-
if read.reference_id != read.next_reference_id:
179-
return False
180-
if maxPairedFragmentLength > abs(read.template_length) > 0:
181-
return False
182-
# check that the mates face each other (inward)
183-
if read.reference_start < read.next_reference_start and not read.is_reverse and read.mate_is_reverse:
184-
return True
185-
if read.reference_start >= read.next_reference_start and read.is_reverse and not read.mate_is_reverse:
186-
return True
187-
return False
188-
189170
maxPairedFragmentLength = 0
190171
if defaultFragmentLength != "read length":
191172
maxPairedFragmentLength = 4 * defaultFragmentLength
192173

193174
if defaultFragmentLength == 'read length':
194175
return read.get_blocks()
195176
else:
196-
if is_proper_pair():
177+
if cr.is_proper_pair(read, maxPairedFragmentLength):
197178
if read.is_reverse:
198179
fragmentStart = read.next_reference_start
199180
fragmentEnd = read.reference_end
314 Bytes
Binary file not shown.
104 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)