@@ -610,6 +610,67 @@ def get_coverage_of_region(self, bamHandle, chrom, regions,
610610 def getReadLength (self , read ):
611611 return len (read )
612612
613+ @staticmethod
614+ def is_proper_pair (read , maxPairedFragmentLength ):
615+ """
616+ Checks if a read is proper pair meaning that both mates are facing each other and are in
617+ the same chromosome and are not to far away. The sam flag for proper pair can not
618+ always be trusted.
619+ :return: bool
620+
621+ >>> import pysam
622+ >>> import os
623+ >>> from deeptools.countReadsPerBin import CountReadsPerBin as cr
624+ >>> root = os.path.dirname(os.path.abspath(__file__)) + "/test/test_data/"
625+ >>> bam = pysam.AlignmentFile("{}/test_proper_pair_filtering.bam".format(root))
626+ >>> iter = bam.fetch()
627+ >>> read = next(iter)
628+ >>> cr.is_proper_pair(read, 1000) # "keep" read
629+ True
630+ >>> cr.is_proper_pair(read, 200) # "keep" read, but maxPairedFragmentLength is too short
631+ False
632+ >>> read = next(iter)
633+ >>> cr.is_proper_pair(read, 1000) # "improper pair"
634+ False
635+ >>> read = next(iter)
636+ >>> cr.is_proper_pair(read, 1000) # "mismatch chr"
637+ False
638+ >>> read = next(iter)
639+ >>> cr.is_proper_pair(read, 1000) # "same orientation1"
640+ False
641+ >>> read = next(iter)
642+ >>> cr.is_proper_pair(read, 1000) # "same orientation2"
643+ False
644+ >>> read = next(iter)
645+ >>> cr.is_proper_pair(read, 1000) # "rev first"
646+ False
647+ >>> read = next(iter)
648+ >>> cr.is_proper_pair(read, 1000) # "rev first OK"
649+ True
650+ >>> read = next(iter)
651+ >>> cr.is_proper_pair(read, 1000) # "for first"
652+ False
653+ >>> read = next(iter)
654+ >>> cr.is_proper_pair(read, 1000) # "for first"
655+ True
656+ """
657+ if not read .is_proper_pair :
658+ return False
659+ if read .reference_id != read .next_reference_id :
660+ return False
661+ if not maxPairedFragmentLength > abs (read .template_length ) > 0 :
662+ return False
663+ # check that the mates face each other (inward)
664+ if read .is_reverse is read .mate_is_reverse :
665+ return False
666+ if read .is_reverse :
667+ if read .reference_start >= read .next_reference_start :
668+ return True
669+ else :
670+ if read .reference_start <= read .next_reference_start :
671+ return True
672+ return False
673+
613674 def get_fragment_from_read (self , read ):
614675 """Get read start and end position of a read.
615676 If given, the reads are extended as follows:
@@ -689,29 +750,6 @@ def get_fragment_from_read(self, read):
689750 >>> c.defaultFragmentLength = 200
690751 >>> assert(c.get_fragment_from_read(test.getRead("single-reverse")) == [(5001618, 5001654)])
691752 """
692- def is_proper_pair ():
693- """
694- Checks if a read is proper pair meaning that both mates are facing each other and are in
695- the same chromosome and are not to far away. The sam flag for proper pair can not
696- always be trusted.
697- :return: bool
698- """
699- if not read .is_proper_pair :
700- return False
701- if read .reference_id != read .next_reference_id :
702- return False
703- if not self .maxPairedFragmentLength > abs (read .template_length ) > 0 :
704- return False
705- # check that the mates face each other (inward)
706- if read .is_reverse is read .mate_is_reverse :
707- return False
708- if read .is_reverse :
709- if read .reference_start >= read .next_reference_start :
710- return True
711- else :
712- if read .reference_start <= read .next_reference_start :
713- return True
714- return False
715753 # if no extension is needed, use pysam get_blocks
716754 # to identify start and end reference positions.
717755 # get_blocks return a list of start and end positions
@@ -723,8 +761,7 @@ def is_proper_pair():
723761 return read .get_blocks ()
724762
725763 else :
726- if is_proper_pair ():
727-
764+ if self .is_proper_pair (read , self .maxPairedFragmentLength ):
728765 if read .is_reverse :
729766 fragmentStart = read .next_reference_start
730767 fragmentEnd = read .reference_end
0 commit comments