diff --git a/Modules/QualityDistribution.cs b/Modules/QualityDistribution.cs new file mode 100644 index 0000000..666cc47 --- /dev/null +++ b/Modules/QualityDistribution.cs @@ -0,0 +1,66 @@ +using System; +using System.Linq; + +namespace Ovation.FasterQC.Net +{ + public class QualityDistribution : IQcModule + { + private const byte ILLUMINA_BASE_ADJUSTMENT = 33; + + private readonly ulong[] qualities = new ulong[128]; + + private byte lowestScore = byte.MaxValue; + + private byte highestScore = byte.MinValue; + + public string Name => "qualityDistribution"; + + public string Description => "Calculates the distribution of quality scores"; + + public bool IsEnabledForAll => true; + + public object Data + { + get + { + return new + { + lowestScore = lowestScore - ILLUMINA_BASE_ADJUSTMENT, + highestScore = highestScore - ILLUMINA_BASE_ADJUSTMENT, + + lowestScoreUnadjusted = lowestScore, + highestScoreUnadjusted = highestScore, + + qualities = qualities.Skip(lowestScore).Take(highestScore - lowestScore + 1), + }; + } + } + + public void ProcessSequence(Sequence sequence) + { + var sequenceLength = sequence.Read.Length; + var quals = sequence.Quality; + + for (var i = 0; i < sequenceLength; i++) + { + var qual = quals[i]; + + lowestScore = Math.Min(lowestScore, quals[i]); + highestScore = Math.Max(highestScore, quals[i]); + + qualities[qual]++; + } + } + + public void Reset() + { + for (var p = 0; p < qualities.Length; p++) + { + qualities[p] = 0; + } + + lowestScore = byte.MaxValue; + highestScore = byte.MinValue; + } + } +} diff --git a/R/fasterqc.R b/R/fasterqc.R index 1de40d1..acfd357 100644 --- a/R/fasterqc.R +++ b/R/fasterqc.R @@ -31,7 +31,7 @@ multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) { # ncol: Number of columns of plots # nrow: Number of rows needed, calculated from # of cols layout <- matrix(seq(1, cols * ceiling(num_plots / cols)), - ncol = cols, nrow = ceiling(num_plots / cols) + ncol = cols, nrow = ceiling(num_plots / cols) ) } @@ -56,9 +56,8 @@ multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) { } setwd("/Users/michaeljon/src/ovation/fasterqc.net/tmp") -sample <- "in3257_2_S1_sorted" +sample <- "in3257_2_S1.sorted" -# load the data json <- jsonlite::fromJSON(readLines(paste(getwd(), "/", sample, ".json", sep = ""))) @@ -162,6 +161,28 @@ png(paste(sample, ".base-qual-dist.png", sep = ""), width = 800, height = 600) print(plt) dev.off() +# plot quality distributions +quality_dist <- data.frame( + qs = json$qualityDistribution$qualities +) + +plt <- ggplot(quality_dist, aes(x = seq_along(qs))) + + geom_col(aes(y = qs), fill = "#7faf7f") + + scale_y_continuous(labels = scales::comma, trans = "log10") + + labs( + color = "", + title = "Quality distribution", + subtitle = paste("Sample ", sample, sep = ""), + x = "Quality (phred)", + y = "# of bases (log)", + caption = "Species: SARS-CoV-2" + ) + + theme_ipsum() + +png(paste(sample, ".qual-dist.png", sep = ""), width = 800, height = 600) +print(plt) +dev.off() + # plot quality distributions quality_distribution <- data.frame( qual = json$meanQualityDistribution$distribution, @@ -281,4 +302,4 @@ plt <- ggplot(histogram, aes(x = lengths)) + png(paste(sample, ".aligned.png", sep = ""), width = 800, height = 600) print(plt) -dev.off() \ No newline at end of file +dev.off()