-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
d252be0
commit 30b7b84
Showing
6 changed files
with
266 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
using System; | ||
using System.Linq; | ||
|
||
namespace Ovation.FasterQC.Net | ||
{ | ||
public class MultiQC : IQcModule | ||
{ | ||
private const byte ILLUMINA_BASE_ADJUSTMENT_LOWER = 33; | ||
|
||
private const byte ILLUMINA_BASE_ADJUSTMENT_UPPER = 126; | ||
|
||
private int maxReadLength = 0; | ||
|
||
private ulong sequenceCount = 0; | ||
|
||
private ulong[][] qualityScoresAtPosition = Array.Empty<ulong[]>(); | ||
|
||
private uint lowestMean = uint.MaxValue; | ||
|
||
private uint highestMean = uint.MinValue; | ||
|
||
private ulong[] nCountsAtPosition = Array.Empty<ulong>(); | ||
|
||
private readonly ulong[] gcPercentageCounts = new ulong[101]; | ||
|
||
private readonly ulong[] meanPhredCounts = new ulong[128]; | ||
|
||
public string Name => "multiqc"; | ||
|
||
public string Description => "Generates limited / Ovation-branded output compatible with MultiQC"; | ||
|
||
public bool IsEnabledForAll => true; | ||
|
||
public ReaderType SupportedReaders => ReaderType.AllReaders; | ||
|
||
public object Data | ||
{ | ||
get | ||
{ | ||
var meanPhreds = new ulong[maxReadLength][]; | ||
for (var pos = 0; pos < maxReadLength; pos++) | ||
{ | ||
meanPhreds[pos] = new ulong[128]; | ||
for (var qual = 0UL; qual < 128; qual++) | ||
{ | ||
meanPhreds[pos][qual] = qualityScoresAtPosition[pos][qual] * qual; | ||
} | ||
} | ||
|
||
var phreds = meanPhredCounts | ||
.Skip(ILLUMINA_BASE_ADJUSTMENT_LOWER) | ||
.Take(ILLUMINA_BASE_ADJUSTMENT_UPPER - ILLUMINA_BASE_ADJUSTMENT_LOWER + 1) | ||
.ToArray(); | ||
|
||
return new | ||
{ | ||
meanPhredByPosition = meanPhreds | ||
.Select((p, idx) => new { idx, val = (int)(p.Sum(q => (decimal)q) / sequenceCount) }) | ||
.ToDictionary(i => i.idx + 1, i => i.val), | ||
|
||
gcPercentageCounts = gcPercentageCounts | ||
.Select((gcc, idx) => new { idx, gcc }) | ||
.ToDictionary(i => i.idx, i => i.gcc), | ||
|
||
gcPercentagePercentage = gcPercentageCounts | ||
.Select((gcc, idx) => new { idx, val = Math.Round((float)gcc / (float)sequenceCount * 100.0, 3) }) | ||
.ToDictionary(i => i.idx, i => i.val), | ||
|
||
nCountsAtPosition = nCountsAtPosition | ||
.Select((nc, idx) => new { idx, nc }) | ||
.ToDictionary(i => i.idx + 1, i => i.nc), | ||
|
||
nPercentageAtPosition = nCountsAtPosition | ||
.Select((nc, idx) => new { idx, val = Math.Round((float)nc / (float)sequenceCount * 100.0, 3) }) | ||
.ToDictionary(i => i.idx + 1, i => i.val), | ||
|
||
meanPhredCounts = meanPhredCounts | ||
.Skip(ILLUMINA_BASE_ADJUSTMENT_LOWER) | ||
.Take(ILLUMINA_BASE_ADJUSTMENT_UPPER - ILLUMINA_BASE_ADJUSTMENT_LOWER + 1) | ||
.Select((val, idx) => new { idx, val }) | ||
.ToDictionary(i => ILLUMINA_BASE_ADJUSTMENT_LOWER + i.idx, i => i.val), | ||
|
||
meanPhredPercentage = meanPhredCounts | ||
.Skip(ILLUMINA_BASE_ADJUSTMENT_LOWER) | ||
.Take(ILLUMINA_BASE_ADJUSTMENT_UPPER - ILLUMINA_BASE_ADJUSTMENT_LOWER + 1) | ||
.Select((pc, idx) => new { idx, val = Math.Round((float)pc / (float)sequenceCount * 100.0, 3) }) | ||
.ToDictionary(i => ILLUMINA_BASE_ADJUSTMENT_LOWER + i.idx, i => i.val), | ||
|
||
phreds = phreds | ||
.Select((val, idx) => new { phred = Convert.ToChar(ILLUMINA_BASE_ADJUSTMENT_LOWER + idx), count = val }) | ||
.ToDictionary(i => i.phred, i => i.count) | ||
}; | ||
} | ||
} | ||
|
||
public void ProcessSequence(Sequence sequence) | ||
{ | ||
sequenceCount++; | ||
|
||
var sequenceLength = sequence.Read.Length; | ||
|
||
// see if we need to resize our arrays | ||
if (sequenceLength > maxReadLength) | ||
{ | ||
maxReadLength = sequenceLength; | ||
|
||
Array.Resize(ref qualityScoresAtPosition, maxReadLength); | ||
for (var n = 0; n < maxReadLength; n++) | ||
{ | ||
if (qualityScoresAtPosition[n] == null) | ||
{ | ||
qualityScoresAtPosition[n] = new ulong[128]; | ||
} | ||
} | ||
Array.Resize(ref nCountsAtPosition, maxReadLength); | ||
} | ||
|
||
// calculate mean quality score | ||
var sum = 0; | ||
var count = 0; | ||
|
||
var qual = sequence.Quality; | ||
for (var i = 0; i < sequenceLength; i++) | ||
{ | ||
byte q = qual[i]; | ||
|
||
qualityScoresAtPosition[i][q]++; | ||
sum += q; | ||
count++; | ||
} | ||
|
||
var mean = (uint)((float)sum / (float)count); | ||
meanPhredCounts[mean]++; | ||
|
||
lowestMean = Math.Min(lowestMean, mean); | ||
highestMean = Math.Max(highestMean, mean); | ||
|
||
// calculate the gc percentage and ncount of the read | ||
var gcCount = 0; | ||
var read = sequence.Read; | ||
for (var i = 0; i < sequenceLength; i++) | ||
{ | ||
switch (read[i]) | ||
{ | ||
case (byte)'G': | ||
case (byte)'C': | ||
gcCount++; | ||
break; | ||
|
||
case (byte)'N': | ||
nCountsAtPosition[i]++; | ||
break; | ||
|
||
default: | ||
break; | ||
} | ||
} | ||
|
||
var gcPercentage = (int)((float)gcCount / (float)sequenceLength * 100.0); | ||
gcPercentageCounts[gcPercentage]++; | ||
} | ||
|
||
public void Reset() | ||
{ | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
|
||
namespace Ovation.FasterQC.Net.Utils | ||
{ | ||
public static class Statistics | ||
{ | ||
/// <summary> | ||
/// Partitions the given list around a pivot element such that all elements on left of pivot are <= pivot | ||
/// and the ones at thr right are > pivot. This method can be used for sorting, N-order statistics such as | ||
/// as median finding algorithms. | ||
/// Pivot is selected ranodmly if random number generator is supplied else its selected as last element in the list. | ||
/// Reference: Introduction to Algorithms 3rd Edition, Corman et al, pp 171 | ||
/// </summary> | ||
private static int Partition<T>(this IList<T> list, int start, int end, Random? rnd = null) where T : IComparable<T> | ||
{ | ||
if (rnd != null) | ||
list.Swap(end, rnd.Next(start, end + 1)); | ||
|
||
var pivot = list[end]; | ||
var lastLow = start - 1; | ||
|
||
for (var i = start; i < end; i++) | ||
{ | ||
if (list[i].CompareTo(pivot) <= 0) | ||
list.Swap(i, ++lastLow); | ||
} | ||
|
||
list.Swap(end, ++lastLow); | ||
|
||
return lastLow; | ||
} | ||
|
||
/// <summary> | ||
/// Returns Nth smallest element from the list. Here n starts from 0 so that n=0 returns minimum, n=1 returns 2nd smallest element etc. | ||
/// Note: specified list would be mutated in the process. | ||
/// Reference: Introduction to Algorithms 3rd Edition, Corman et al, pp 216 | ||
/// </summary> | ||
public static T NthOrderStatistic<T>(this IList<T> list, int n, Random? rnd = null) where T : IComparable<T> | ||
{ | ||
return NthOrderStatistic(list, n, 0, list.Count - 1, rnd); | ||
} | ||
|
||
private static T NthOrderStatistic<T>(this IList<T> list, int n, int start, int end, Random? rnd) where T : IComparable<T> | ||
{ | ||
while (true) | ||
{ | ||
var pivotIndex = list.Partition(start, end, rnd); | ||
if (pivotIndex == n) | ||
return list[pivotIndex]; | ||
|
||
if (n < pivotIndex) | ||
end = pivotIndex - 1; | ||
else | ||
start = pivotIndex + 1; | ||
} | ||
} | ||
|
||
public static void Swap<T>(this IList<T> list, int i, int j) | ||
{ | ||
// This check is not required but Partition | ||
// function may make many calls so it's for perf reason | ||
if (i == j) | ||
return; | ||
|
||
(list[j], list[i]) = (list[i], list[j]); | ||
} | ||
|
||
/// <summary> | ||
/// Note: specified list would be mutated in the process. | ||
/// </summary> | ||
public static T Median<T>(this IList<T> list) where T : IComparable<T> | ||
{ | ||
return list.NthOrderStatistic((list.Count - 1) / 2); | ||
} | ||
|
||
public static double Median<T>(this IEnumerable<T> sequence, Func<T, double> getValue) | ||
{ | ||
var list = sequence.Select(getValue).ToList(); | ||
var mid = (list.Count - 1) / 2; | ||
|
||
return list.NthOrderStatistic(mid); | ||
} | ||
} | ||
} |