Skip to content

Commit 8b4d6e7

Browse files
committed
Corrected and off-by-1 intron/exon overhang error when merging compatible transcripts
1 parent 74963b4 commit 8b4d6e7

File tree

1 file changed

+15
-15
lines changed

1 file changed

+15
-15
lines changed

tmerge

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,9 @@ unless (defined $ARGV[0]){
209209
-output => $filehandle } );
210210
}
211211

212-
die "minReadSupport value must be > 0, can't continue.\n" if $minReadSupport<1;
213-
die "exonOverhangTolerance value must be >= 0, can't continue.\n" if $exonOverhangTolerance<0;
214-
die "endFuzz value must be >= 0, can't continue.\n" if $transcriptEndFuzziness<0;
212+
die "ERROR: minReadSupport value must be > 0, can't continue.\n" if $minReadSupport<1;
213+
die "ERROR: exonOverhangTolerance value must be >= 0, can't continue.\n" if $exonOverhangTolerance<0;
214+
die "ERROR: endFuzz value must be >= 0, can't continue.\n" if $transcriptEndFuzziness<0;
215215

216216
print STDERR "######################################################################################################################
217217
######################################################################################################################
@@ -235,10 +235,10 @@ if(defined($ARGV[0])){
235235
$sortedGff=$ARGV[0];
236236
}
237237
else{
238-
die "Need input GTF file name as argument.\n";
238+
die "ERROR: Need input GTF file name as argument.\n";
239239
}
240240

241-
open GFF, "$sortedGff" or die $!;
241+
open GFF, "$sortedGff" or die "ERROR: ".$!;
242242

243243
my %transcript_to_transcript=();
244244
my %transcript_exons=();
@@ -279,9 +279,9 @@ while (<GFF>){
279279
$strand=0;
280280
}
281281
else{
282-
die "Unrecognized strand value '$str' at line $.\n";
282+
die "ERROR: Unrecognized strand value '$str' at line $.\n";
283283
}
284-
die "Corrupt GTF. Start coordinate cannot be greater than stop coordinate at line $. . Can't continue.\n" if($start > $stop);
284+
die "ERROR: Corrupt GTF. Start coordinate cannot be greater than stop coordinate at line $. . Can't continue.\n" if($start > $stop);
285285
unless(exists $transcript_seen{$GTFtranscript_id}){
286286
$read_count++;
287287
$transcript_seen{$GTFtranscript_id}=undef;
@@ -293,11 +293,11 @@ while (<GFF>){
293293
#Check for sorted input:
294294
die "ERROR: Unsorted GTF input (line $.). Must be sorted by chr, then start, then stop. Can't continue.\n" if ($chr eq $previous_chr && $start < $previous_start);
295295
if(exists $transcript_strand{$transcript_id} && $transcript_strand{$transcript_id} != $strand){
296-
die "Inconsistent strand for transcript $GTFtranscript_id in input file. Can't continue.\n";
296+
die "ERROR: Inconsistent strand for transcript $GTFtranscript_id in input file. Can't continue.\n";
297297
}
298298
$transcript_strand{$transcript_id}=$strand;
299299
if (exists $transcript_chr{$transcript_id} && $transcript_chr{$transcript_id} ne $chr){
300-
die "Inconsistent chr for transcript $GTFtranscript_id in input file. Can't continue.\n";
300+
die "ERROR: Inconsistent chr for transcript $GTFtranscript_id in input file. Can't continue.\n";
301301

302302
}
303303
$transcript_chr{$transcript_id}=$chr;
@@ -361,7 +361,7 @@ while (<GFF>){
361361
}
362362
close GFF;
363363
print STDERR "Done. Found $nr_exons exons and $read_count transcripts.\n";
364-
my $million_read_count=1000000/$read_count;
364+
my $million_read_count=1000000/($read_count+1);
365365
%transcript_seen=();
366366
%transcript_rev_index=();
367367

@@ -567,7 +567,7 @@ CONTIGS: foreach my $contig (keys %contig_to_transcripts){
567567
if($intronTr1Index > $#{$transcript_introns{$tr1}} #we've reached the last intron of tr1.
568568
&& $j < $#{$transcript_introns{$tr2}} #we've not reached the last intron of tr2.
569569
&& ${$transcript_introns{$tr1}}[0][1] == ${$transcript_introns{$tr2}}[0][1] #tr1 and tr2's respective intron chains start at the same coord
570-
&& ${$transcript_introns{$tr2}}[$j+1][1] >= ${$transcript_exons{$tr1}}[-1][2] - $exonOverhangTolerance){ #tr1's last exon does not overhang too much inside tr2's next intron
570+
&& ${$transcript_introns{$tr2}}[$j+1][1] > ${$transcript_exons{$tr1}}[-1][2] - $exonOverhangTolerance){ #tr1's last exon does not overhang too much inside tr2's next intron
571571
#Transfer remaining exons of tr2 to tr1, if they're compatible
572572
$lastTr1IntronMatchedToTr2=$i;
573573
print STDERR "DEBUG: reached last tr1 intron\n" if $debug;
@@ -1057,7 +1057,7 @@ sub endDistances{
10571057
last;
10581058
}
10591059
elsif ($containedLeftStart < ${${$transcript_exons{$container}}[$i]}[1]){
1060-
die "Program died due to a bug (in endDistances subroutine: $containedLeftStart < ${${$transcript_exons{$container}}[$i]}[1])\nsorry. Please contact author.\n";
1060+
die "ERROR: Program died due to a bug (in endDistances subroutine: $containedLeftStart < ${${$transcript_exons{$container}}[$i]}[1])\nsorry. Please contact author.\n";
10611061
}
10621062
}
10631063

@@ -1077,7 +1077,7 @@ sub endDistances{
10771077
last;
10781078
}
10791079
elsif ($containedRightEnd > ${${$transcript_exons{$container}}[$i]}[2]){
1080-
die "Program died due to a bug (in endDistances subroutine: $containedRightEnd > ${${$transcript_exons{$container}}[$i]}[2])\n, sorry. Please contact author.\n";
1080+
die "ERROR: Program died due to a bug (in endDistances subroutine: $containedRightEnd > ${${$transcript_exons{$container}}[$i]}[2])\n, sorry. Please contact author.\n";
10811081
}
10821082
}
10831083
#print STDERR "\tFinal distToLeft: $distToLeft\n";
@@ -1099,10 +1099,10 @@ sub checkIntronExonOverlap{
10991099
my $firstTrAIntronMatchedToTrB=$_[2];
11001100
my $lastTrAIntronMatchedToTrB=$_[3];
11011101
unless (defined $firstTrAIntronMatchedToTrB){
1102-
die "firstTrAIntronMatchedToTrB is undefined for $transcript_index{$trA} / $transcript_index{$trB}\n";
1102+
die "ERROR: firstTrAIntronMatchedToTrB is undefined for $transcript_index{$trA} / $transcript_index{$trB}\n";
11031103
}
11041104
unless (defined $lastTrAIntronMatchedToTrB){
1105-
die "lastTrAIntronMatchedToTrB is undefined for $transcript_index{$trA} / $transcript_index{$trB}\n";
1105+
die "ERROR: lastTrAIntronMatchedToTrB is undefined for $transcript_index{$trA} / $transcript_index{$trB}\n";
11061106

11071107
}
11081108
my @exons2=(${$transcript_exons{$trB}}[0],${$transcript_exons{$trB}}[-1]); #only terminal exons

0 commit comments

Comments
 (0)