diff --git a/.vscode/launch.json b/.vscode/launch.json
index 2b1df33..2487a3f 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -13,16 +13,14 @@
"program": "${workspaceFolder}/bin/Debug/net6.0/Ovation.FasterQC.Net.dll",
"args": [
"-d",
- "-l",
- "10",
"-f",
"bam",
"-i",
- "./tmp/in3257_2_S1.sorted.bam",
+ "/Volumes/Untitled/experiment/OUT/bob/6111_S7_L001/6111_S7_L001.aligned.bam",
"-o",
- "./tmp/bob.json",
+ "/Volumes/Untitled/experiment/OUT/bob/bob.json",
"-m",
- "BasicStatistics"
+ "MultiQC"
],
"cwd": "${workspaceFolder}",
// For more information about the 'console' field, see https://aka.ms/VSCode-CS-LaunchJson-Console
diff --git a/Models/ReadFlag.cs b/Models/ReadFlag.cs
index effa7c3..ecbb016 100644
--- a/Models/ReadFlag.cs
+++ b/Models/ReadFlag.cs
@@ -40,14 +40,14 @@ public enum ReadFlag : ushort
///
/// the first segment in the template (is read1)
///
- FirstSegment = 64,
+ Read1 = 64,
///
/// the last segment in the template (is read2)
///
- LastSegment = 128,
+ Read2 = 128,
- EmbeddedSegment = FirstSegment | LastSegment,
+ EmbeddedSegment = Read1 | Read2,
///
/// not primary alignment
diff --git a/Modules/AlignmentStatistics.cs b/Modules/AlignmentStatistics.cs
index 923ef15..cc0fd25 100644
--- a/Modules/AlignmentStatistics.cs
+++ b/Modules/AlignmentStatistics.cs
@@ -24,9 +24,9 @@ public class AlignmentStatistics : IQcModule
private ulong nextSegmentReverseComplemented;
- private ulong firstSegment;
+ private ulong readOne;
- private ulong lastSegment;
+ private ulong readTwo;
private ulong embeddedSegment;
@@ -84,8 +84,8 @@ public void ProcessSequence(Sequence sequence)
if ((sequence.ReadFlag & ReadFlag.ReverseComplemented) != 0) reverseComplemented++;
if ((sequence.ReadFlag & ReadFlag.NextSegmentReverseComplemented) != 0) nextSegmentReverseComplemented++;
- if ((sequence.ReadFlag & ReadFlag.FirstSegment) != 0) firstSegment++;
- if ((sequence.ReadFlag & ReadFlag.LastSegment) != 0) lastSegment++;
+ if ((sequence.ReadFlag & ReadFlag.Read1) != 0) readOne++;
+ if ((sequence.ReadFlag & ReadFlag.Read2) != 0) readTwo++;
if ((sequence.ReadFlag & (ReadFlag.EmbeddedSegment)) == ReadFlag.EmbeddedSegment) embeddedSegment++;
if ((sequence.ReadFlag & (ReadFlag.EmbeddedSegment)) == 0) unknownIndex++;
@@ -132,8 +132,8 @@ public object Data
nextSegmentUnmapped,
reverseComplemented,
nextSegmentReverseComplemented,
- firstSegment,
- lastSegment,
+ readOne,
+ readTwo,
embeddedSegment,
unknownIndex,
primaryAlignment,
diff --git a/Modules/MultiQC.cs b/Modules/MultiQC.cs
new file mode 100644
index 0000000..90977f0
--- /dev/null
+++ b/Modules/MultiQC.cs
@@ -0,0 +1,167 @@
+using System;
+using System.Linq;
+
+namespace Ovation.FasterQC.Net
+{
+ public class MultiQC : IQcModule
+ {
+ private const byte ILLUMINA_BASE_ADJUSTMENT_LOWER = 33;
+
+ private const byte ILLUMINA_BASE_ADJUSTMENT_UPPER = 126;
+
+ private int maxReadLength = 0;
+
+ private ulong sequenceCount = 0;
+
+ private ulong[][] qualityScoresAtPosition = Array.Empty();
+
+ private uint lowestMean = uint.MaxValue;
+
+ private uint highestMean = uint.MinValue;
+
+ private ulong[] nCountsAtPosition = Array.Empty();
+
+ private readonly ulong[] gcPercentageCounts = new ulong[101];
+
+ private readonly ulong[] meanPhredCounts = new ulong[128];
+
+ public string Name => "multiqc";
+
+ public string Description => "Generates limited / Ovation-branded output compatible with MultiQC";
+
+ public bool IsEnabledForAll => true;
+
+ public ReaderType SupportedReaders => ReaderType.AllReaders;
+
+ public object Data
+ {
+ get
+ {
+ var meanPhreds = new ulong[maxReadLength][];
+ for (var pos = 0; pos < maxReadLength; pos++)
+ {
+ meanPhreds[pos] = new ulong[128];
+ for (var qual = 0UL; qual < 128; qual++)
+ {
+ meanPhreds[pos][qual] = qualityScoresAtPosition[pos][qual] * qual;
+ }
+ }
+
+ var phreds = meanPhredCounts
+ .Skip(ILLUMINA_BASE_ADJUSTMENT_LOWER)
+ .Take(ILLUMINA_BASE_ADJUSTMENT_UPPER - ILLUMINA_BASE_ADJUSTMENT_LOWER + 1)
+ .ToArray();
+
+ return new
+ {
+ meanPhredByPosition = meanPhreds
+ .Select((p, idx) => new { idx, val = (int)(p.Sum(q => (decimal)q) / sequenceCount) })
+ .ToDictionary(i => i.idx + 1, i => i.val),
+
+ gcPercentageCounts = gcPercentageCounts
+ .Select((gcc, idx) => new { idx, gcc })
+ .ToDictionary(i => i.idx, i => i.gcc),
+
+ gcPercentagePercentage = gcPercentageCounts
+ .Select((gcc, idx) => new { idx, val = Math.Round((float)gcc / (float)sequenceCount * 100.0, 3) })
+ .ToDictionary(i => i.idx, i => i.val),
+
+ nCountsAtPosition = nCountsAtPosition
+ .Select((nc, idx) => new { idx, nc })
+ .ToDictionary(i => i.idx + 1, i => i.nc),
+
+ nPercentageAtPosition = nCountsAtPosition
+ .Select((nc, idx) => new { idx, val = Math.Round((float)nc / (float)sequenceCount * 100.0, 3) })
+ .ToDictionary(i => i.idx + 1, i => i.val),
+
+ meanPhredCounts = meanPhredCounts
+ .Skip(ILLUMINA_BASE_ADJUSTMENT_LOWER)
+ .Take(ILLUMINA_BASE_ADJUSTMENT_UPPER - ILLUMINA_BASE_ADJUSTMENT_LOWER + 1)
+ .Select((val, idx) => new { idx, val })
+ .ToDictionary(i => ILLUMINA_BASE_ADJUSTMENT_LOWER + i.idx, i => i.val),
+
+ meanPhredPercentage = meanPhredCounts
+ .Skip(ILLUMINA_BASE_ADJUSTMENT_LOWER)
+ .Take(ILLUMINA_BASE_ADJUSTMENT_UPPER - ILLUMINA_BASE_ADJUSTMENT_LOWER + 1)
+ .Select((pc, idx) => new { idx, val = Math.Round((float)pc / (float)sequenceCount * 100.0, 3) })
+ .ToDictionary(i => ILLUMINA_BASE_ADJUSTMENT_LOWER + i.idx, i => i.val),
+
+ phreds = phreds
+ .Select((val, idx) => new { phred = Convert.ToChar(ILLUMINA_BASE_ADJUSTMENT_LOWER + idx), count = val })
+ .ToDictionary(i => i.phred, i => i.count)
+ };
+ }
+ }
+
+ public void ProcessSequence(Sequence sequence)
+ {
+ sequenceCount++;
+
+ var sequenceLength = sequence.Read.Length;
+
+ // see if we need to resize our arrays
+ if (sequenceLength > maxReadLength)
+ {
+ maxReadLength = sequenceLength;
+
+ Array.Resize(ref qualityScoresAtPosition, maxReadLength);
+ for (var n = 0; n < maxReadLength; n++)
+ {
+ if (qualityScoresAtPosition[n] == null)
+ {
+ qualityScoresAtPosition[n] = new ulong[128];
+ }
+ }
+ Array.Resize(ref nCountsAtPosition, maxReadLength);
+ }
+
+ // calculate mean quality score
+ var sum = 0;
+ var count = 0;
+
+ var qual = sequence.Quality;
+ for (var i = 0; i < sequenceLength; i++)
+ {
+ byte q = qual[i];
+
+ qualityScoresAtPosition[i][q]++;
+ sum += q;
+ count++;
+ }
+
+ var mean = (uint)((float)sum / (float)count);
+ meanPhredCounts[mean]++;
+
+ lowestMean = Math.Min(lowestMean, mean);
+ highestMean = Math.Max(highestMean, mean);
+
+ // calculate the gc percentage and ncount of the read
+ var gcCount = 0;
+ var read = sequence.Read;
+ for (var i = 0; i < sequenceLength; i++)
+ {
+ switch (read[i])
+ {
+ case (byte)'G':
+ case (byte)'C':
+ gcCount++;
+ break;
+
+ case (byte)'N':
+ nCountsAtPosition[i]++;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ var gcPercentage = (int)((float)gcCount / (float)sequenceLength * 100.0);
+ gcPercentageCounts[gcPercentage]++;
+ }
+
+ public void Reset()
+ {
+ }
+ }
+}
diff --git a/Readers/BamReader.cs b/Readers/BamReader.cs
index e7dfb78..ffe30c4 100644
--- a/Readers/BamReader.cs
+++ b/Readers/BamReader.cs
@@ -12,7 +12,7 @@ public class BamReader : AbstractReader
private bool disposedValue;
public BamReader(string bam) :
- base(bam, false)
+ base(bam, true)
{
binaryReader = new BinaryReader(bufferedStream);
diff --git a/Utils/Statistics.cs b/Utils/Statistics.cs
new file mode 100644
index 0000000..e457124
--- /dev/null
+++ b/Utils/Statistics.cs
@@ -0,0 +1,86 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Ovation.FasterQC.Net.Utils
+{
+ public static class Statistics
+ {
+ ///
+ /// Partitions the given list around a pivot element such that all elements on left of pivot are <= pivot
+ /// and the ones at thr right are > pivot. This method can be used for sorting, N-order statistics such as
+ /// as median finding algorithms.
+ /// Pivot is selected ranodmly if random number generator is supplied else its selected as last element in the list.
+ /// Reference: Introduction to Algorithms 3rd Edition, Corman et al, pp 171
+ ///
+ private static int Partition(this IList list, int start, int end, Random? rnd = null) where T : IComparable
+ {
+ if (rnd != null)
+ list.Swap(end, rnd.Next(start, end + 1));
+
+ var pivot = list[end];
+ var lastLow = start - 1;
+
+ for (var i = start; i < end; i++)
+ {
+ if (list[i].CompareTo(pivot) <= 0)
+ list.Swap(i, ++lastLow);
+ }
+
+ list.Swap(end, ++lastLow);
+
+ return lastLow;
+ }
+
+ ///
+ /// Returns Nth smallest element from the list. Here n starts from 0 so that n=0 returns minimum, n=1 returns 2nd smallest element etc.
+ /// Note: specified list would be mutated in the process.
+ /// Reference: Introduction to Algorithms 3rd Edition, Corman et al, pp 216
+ ///
+ public static T NthOrderStatistic(this IList list, int n, Random? rnd = null) where T : IComparable
+ {
+ return NthOrderStatistic(list, n, 0, list.Count - 1, rnd);
+ }
+
+ private static T NthOrderStatistic(this IList list, int n, int start, int end, Random? rnd) where T : IComparable
+ {
+ while (true)
+ {
+ var pivotIndex = list.Partition(start, end, rnd);
+ if (pivotIndex == n)
+ return list[pivotIndex];
+
+ if (n < pivotIndex)
+ end = pivotIndex - 1;
+ else
+ start = pivotIndex + 1;
+ }
+ }
+
+ public static void Swap(this IList list, int i, int j)
+ {
+ // This check is not required but Partition
+ // function may make many calls so it's for perf reason
+ if (i == j)
+ return;
+
+ (list[j], list[i]) = (list[i], list[j]);
+ }
+
+ ///
+ /// Note: specified list would be mutated in the process.
+ ///
+ public static T Median(this IList list) where T : IComparable
+ {
+ return list.NthOrderStatistic((list.Count - 1) / 2);
+ }
+
+ public static double Median(this IEnumerable sequence, Func getValue)
+ {
+ var list = sequence.Select(getValue).ToList();
+ var mid = (list.Count - 1) / 2;
+
+ return list.NthOrderStatistic(mid);
+ }
+ }
+}
\ No newline at end of file