From ddcd9564248b38d9894286d59a12e59b58d08d01 Mon Sep 17 00:00:00 2001 From: eead-csic-compbio Date: Tue, 7 Nov 2023 11:34:35 +0100 Subject: [PATCH] annotate_cluster.pl -b -P prints total missense mutations instead of SNPS --- CHANGES.txt | 2 ++ annotate_cluster.pl | 54 ++++++++++++++++++++++++++++++++++++--------- 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index ea9aa85..cea350e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -413,3 +413,5 @@ 30062023: added parse_pangenome_matrix.pl -R, which modifies -P to control %PAV only for -B 23102023: registered https://bio.tools/get_homologues 03112023: phyTools::same_sequence_order checks for internal STOP codons in translated sequences +07112023: annotate_cluster.pl fails gracefully when no sequences are read +07112023: annotate_cluster.pl -b -P prints total missense mutations instead of SNPS diff --git a/annotate_cluster.pl b/annotate_cluster.pl index 89859bb..278789d 100755 --- a/annotate_cluster.pl +++ b/annotate_cluster.pl @@ -88,12 +88,13 @@ print "-P sequences are peptides (optional, uses BLASTP instead of BLASTN)\n"; print "-r reference sequence FASTA (optional, aligns cluster sequences to this external seq)\n"; print "-D match Pfam domains (optional, annotates longest seq, nucleotides on 6 frames)\n"; - print "-u print unaligned sequences (optional, flips seqs, handy for multiple alignments, skips option below\n"; - print "-b blunt alignment borders (optional, also annotates SNPs and parsimony-informative sites)\n"; + print "-u print unaligned sequences (optional, flips seqs, handy for multiple alignments, skips option below)\n"; + print "-b blunt alignment borders (optional, also annotates SNPs/missense mutations and parsimony-informative sites)\n"; print "-A file with taxon names of group A (optional, identifies private variants of group A vs 'rest')\n"; print "-B file with taxon names of group B (optional, requires -A, group B is used as 'rest')\n"; print "-c collapse overlapping fragments (optional, example -c 40 for overlaps >= 40 residues, requires -o,\n"; - print " this is useful to merge fragmented de-novo transcripts)\n\n$warning"; + print " this is useful to merge fragmented de-novo transcripts)\n\n"; + print $warning; exit(0); } @@ -251,6 +252,12 @@ } } +if(!defined($cluster_ref->[0])) +{ + printf(STDERR "# total sequences: 0\n# exit\n"); + exit(0); +} + printf(STDERR "\n# total sequences: %d taxa: %d\n", $#{$cluster_ref}+1,scalar(keys(%intaxa))); printf(STDERR "# longest sequence: %d (%s)\n", @@ -562,20 +569,45 @@ ## extract SNPs, parsimony-informative sites, private variants and trim alignment if required my ($align_ref,$nSNPS,$npars,$npriv,$SNPs,$pars,$priv,$missA,$missB) = check_variants_FASTA_alignment($filenamea,!$INP_nucleotides,$INP_blunt,\@listA,\@listB); - -printf(STDERR "# aligned sequences: %d width: %d\n\n", - $#{$align_ref}+1,length($align_ref->[0][SEQ])); + +if(!defined($align_ref->[0])) +{ + printf(STDERR "# aligned sequences: 0\n# exit\n"); + exit(0); +} +else +{ + printf(STDERR "# aligned sequences: %d width: %d\n\n", + $#{$align_ref}+1,length($align_ref->[0][SEQ])); +} if($INP_includeA) { - printf(STDERR "# alignment sites: SNP=%d parsimony-informative=%d private=%d unaligned A=%d B=%d (%s)\n", - $nSNPS,$npars,$npriv,$missA,$missB,$INP_clusterfile); - printf(STDERR "# private sites=%s\n\n",$priv); + if($INP_nucleotides) + { + printf(STDERR "# alignment sites: SNP=%d parsimony-informative=%d private=%d unaligned A=%d B=%d (%s)\n", + $nSNPS,$npars,$npriv,$missA,$missB,$INP_clusterfile); + printf(STDERR "# private sites=%s\n\n",$priv); + } + else + { + printf(STDERR "# alignment sites: missense=%d parsimony-informative=%d private=%d unaligned A=%d B=%d (%s)\n", + $nSNPS,$npars,$npriv,$missA,$missB,$INP_clusterfile); + printf(STDERR "# private sites=%s\n\n",$priv); + } } else { - printf(STDERR "# alignment sites: SNP=%d parsimony-informative=%d (%s)\n\n", - $nSNPS,$npars,$INP_clusterfile); + if($INP_nucleotides) + { + printf(STDERR "# alignment sites: SNP=%d parsimony-informative=%d (%s)\n\n", + $nSNPS,$npars,$INP_clusterfile); + } + else + { + printf(STDERR "# alignment sites: missense=%d parsimony-informative=%d (%s)\n\n", + $nSNPS,$npars,$INP_clusterfile); + } }