Skip to content

Commit

Permalink
adding simple q distribution calc and chart
Browse files Browse the repository at this point in the history
  • Loading branch information
michaeljon committed May 19, 2022
1 parent 627326f commit 64d1b39
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 4 deletions.
66 changes: 66 additions & 0 deletions Modules/QualityDistribution.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
using System;
using System.Linq;

namespace Ovation.FasterQC.Net
{
public class QualityDistribution : IQcModule
{
private const byte ILLUMINA_BASE_ADJUSTMENT = 33;

private readonly ulong[] qualities = new ulong[128];

private byte lowestScore = byte.MaxValue;

private byte highestScore = byte.MinValue;

public string Name => "qualityDistribution";

public string Description => "Calculates the distribution of quality scores";

public bool IsEnabledForAll => true;

public object Data
{
get
{
return new
{
lowestScore = lowestScore - ILLUMINA_BASE_ADJUSTMENT,
highestScore = highestScore - ILLUMINA_BASE_ADJUSTMENT,

lowestScoreUnadjusted = lowestScore,
highestScoreUnadjusted = highestScore,

qualities = qualities.Skip(lowestScore).Take(highestScore - lowestScore + 1),
};
}
}

public void ProcessSequence(Sequence sequence)
{
var sequenceLength = sequence.Read.Length;
var quals = sequence.Quality;

for (var i = 0; i < sequenceLength; i++)
{
var qual = quals[i];

lowestScore = Math.Min(lowestScore, quals[i]);
highestScore = Math.Max(highestScore, quals[i]);

qualities[qual]++;
}
}

public void Reset()
{
for (var p = 0; p < qualities.Length; p++)
{
qualities[p] = 0;
}

lowestScore = byte.MaxValue;
highestScore = byte.MinValue;
}
}
}
29 changes: 25 additions & 4 deletions R/fasterqc.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(num_plots / cols)),
ncol = cols, nrow = ceiling(num_plots / cols)
ncol = cols, nrow = ceiling(num_plots / cols)
)
}

Expand All @@ -56,9 +56,8 @@ multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
}

setwd("/Users/michaeljon/src/ovation/fasterqc.net/tmp")
sample <- "in3257_2_S1_sorted"
sample <- "in3257_2_S1.sorted"

# load the data
json <-
jsonlite::fromJSON(readLines(paste(getwd(), "/", sample, ".json", sep = "")))

Expand Down Expand Up @@ -162,6 +161,28 @@ png(paste(sample, ".base-qual-dist.png", sep = ""), width = 800, height = 600)
print(plt)
dev.off()

# plot quality distributions
quality_dist <- data.frame(
qs = json$qualityDistribution$qualities
)

plt <- ggplot(quality_dist, aes(x = seq_along(qs))) +
geom_col(aes(y = qs), fill = "#7faf7f") +
scale_y_continuous(labels = scales::comma, trans = "log10") +
labs(
color = "",
title = "Quality distribution",
subtitle = paste("Sample ", sample, sep = ""),
x = "Quality (phred)",
y = "# of bases (log)",
caption = "Species: SARS-CoV-2"
) +
theme_ipsum()

png(paste(sample, ".qual-dist.png", sep = ""), width = 800, height = 600)
print(plt)
dev.off()

# plot quality distributions
quality_distribution <- data.frame(
qual = json$meanQualityDistribution$distribution,
Expand Down Expand Up @@ -281,4 +302,4 @@ plt <- ggplot(histogram, aes(x = lengths)) +

png(paste(sample, ".aligned.png", sep = ""), width = 800, height = 600)
print(plt)
dev.off()
dev.off()

0 comments on commit 64d1b39

Please sign in to comment.