Skip to content

Commit

Permalink
adding multiqc-like output
Browse files Browse the repository at this point in the history
  • Loading branch information
michaeljon committed Jul 7, 2022
1 parent d252be0 commit 30b7b84
Show file tree
Hide file tree
Showing 6 changed files with 266 additions and 15 deletions.
8 changes: 3 additions & 5 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,14 @@
"program": "${workspaceFolder}/bin/Debug/net6.0/Ovation.FasterQC.Net.dll",
"args": [
"-d",
"-l",
"10",
"-f",
"bam",
"-i",
"./tmp/in3257_2_S1.sorted.bam",
"/Volumes/Untitled/experiment/OUT/bob/6111_S7_L001/6111_S7_L001.aligned.bam",
"-o",
"./tmp/bob.json",
"/Volumes/Untitled/experiment/OUT/bob/bob.json",
"-m",
"BasicStatistics"
"MultiQC"
],
"cwd": "${workspaceFolder}",
// For more information about the 'console' field, see https://aka.ms/VSCode-CS-LaunchJson-Console
Expand Down
6 changes: 3 additions & 3 deletions Models/ReadFlag.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@ public enum ReadFlag : ushort
/// <summary>
/// the first segment in the template (is read1)
/// </summary>
FirstSegment = 64,
Read1 = 64,

/// <summary>
/// the last segment in the template (is read2)
/// </summary>
LastSegment = 128,
Read2 = 128,

EmbeddedSegment = FirstSegment | LastSegment,
EmbeddedSegment = Read1 | Read2,

/// <summary>
/// not primary alignment
Expand Down
12 changes: 6 additions & 6 deletions Modules/AlignmentStatistics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ public class AlignmentStatistics : IQcModule

private ulong nextSegmentReverseComplemented;

private ulong firstSegment;
private ulong readOne;

private ulong lastSegment;
private ulong readTwo;

private ulong embeddedSegment;

Expand Down Expand Up @@ -84,8 +84,8 @@ public void ProcessSequence(Sequence sequence)
if ((sequence.ReadFlag & ReadFlag.ReverseComplemented) != 0) reverseComplemented++;
if ((sequence.ReadFlag & ReadFlag.NextSegmentReverseComplemented) != 0) nextSegmentReverseComplemented++;

if ((sequence.ReadFlag & ReadFlag.FirstSegment) != 0) firstSegment++;
if ((sequence.ReadFlag & ReadFlag.LastSegment) != 0) lastSegment++;
if ((sequence.ReadFlag & ReadFlag.Read1) != 0) readOne++;
if ((sequence.ReadFlag & ReadFlag.Read2) != 0) readTwo++;
if ((sequence.ReadFlag & (ReadFlag.EmbeddedSegment)) == ReadFlag.EmbeddedSegment) embeddedSegment++;
if ((sequence.ReadFlag & (ReadFlag.EmbeddedSegment)) == 0) unknownIndex++;

Expand Down Expand Up @@ -132,8 +132,8 @@ public object Data
nextSegmentUnmapped,
reverseComplemented,
nextSegmentReverseComplemented,
firstSegment,
lastSegment,
readOne,
readTwo,
embeddedSegment,
unknownIndex,
primaryAlignment,
Expand Down
167 changes: 167 additions & 0 deletions Modules/MultiQC.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
using System;
using System.Linq;

namespace Ovation.FasterQC.Net
{
public class MultiQC : IQcModule
{
private const byte ILLUMINA_BASE_ADJUSTMENT_LOWER = 33;

private const byte ILLUMINA_BASE_ADJUSTMENT_UPPER = 126;

private int maxReadLength = 0;

private ulong sequenceCount = 0;

private ulong[][] qualityScoresAtPosition = Array.Empty<ulong[]>();

private uint lowestMean = uint.MaxValue;

private uint highestMean = uint.MinValue;

private ulong[] nCountsAtPosition = Array.Empty<ulong>();

private readonly ulong[] gcPercentageCounts = new ulong[101];

private readonly ulong[] meanPhredCounts = new ulong[128];

public string Name => "multiqc";

public string Description => "Generates limited / Ovation-branded output compatible with MultiQC";

public bool IsEnabledForAll => true;

public ReaderType SupportedReaders => ReaderType.AllReaders;

public object Data
{
get
{
var meanPhreds = new ulong[maxReadLength][];
for (var pos = 0; pos < maxReadLength; pos++)
{
meanPhreds[pos] = new ulong[128];
for (var qual = 0UL; qual < 128; qual++)
{
meanPhreds[pos][qual] = qualityScoresAtPosition[pos][qual] * qual;
}
}

var phreds = meanPhredCounts
.Skip(ILLUMINA_BASE_ADJUSTMENT_LOWER)
.Take(ILLUMINA_BASE_ADJUSTMENT_UPPER - ILLUMINA_BASE_ADJUSTMENT_LOWER + 1)
.ToArray();

return new
{
meanPhredByPosition = meanPhreds
.Select((p, idx) => new { idx, val = (int)(p.Sum(q => (decimal)q) / sequenceCount) })
.ToDictionary(i => i.idx + 1, i => i.val),

gcPercentageCounts = gcPercentageCounts
.Select((gcc, idx) => new { idx, gcc })
.ToDictionary(i => i.idx, i => i.gcc),

gcPercentagePercentage = gcPercentageCounts
.Select((gcc, idx) => new { idx, val = Math.Round((float)gcc / (float)sequenceCount * 100.0, 3) })
.ToDictionary(i => i.idx, i => i.val),

nCountsAtPosition = nCountsAtPosition
.Select((nc, idx) => new { idx, nc })
.ToDictionary(i => i.idx + 1, i => i.nc),

nPercentageAtPosition = nCountsAtPosition
.Select((nc, idx) => new { idx, val = Math.Round((float)nc / (float)sequenceCount * 100.0, 3) })
.ToDictionary(i => i.idx + 1, i => i.val),

meanPhredCounts = meanPhredCounts
.Skip(ILLUMINA_BASE_ADJUSTMENT_LOWER)
.Take(ILLUMINA_BASE_ADJUSTMENT_UPPER - ILLUMINA_BASE_ADJUSTMENT_LOWER + 1)
.Select((val, idx) => new { idx, val })
.ToDictionary(i => ILLUMINA_BASE_ADJUSTMENT_LOWER + i.idx, i => i.val),

meanPhredPercentage = meanPhredCounts
.Skip(ILLUMINA_BASE_ADJUSTMENT_LOWER)
.Take(ILLUMINA_BASE_ADJUSTMENT_UPPER - ILLUMINA_BASE_ADJUSTMENT_LOWER + 1)
.Select((pc, idx) => new { idx, val = Math.Round((float)pc / (float)sequenceCount * 100.0, 3) })
.ToDictionary(i => ILLUMINA_BASE_ADJUSTMENT_LOWER + i.idx, i => i.val),

phreds = phreds
.Select((val, idx) => new { phred = Convert.ToChar(ILLUMINA_BASE_ADJUSTMENT_LOWER + idx), count = val })
.ToDictionary(i => i.phred, i => i.count)
};
}
}

public void ProcessSequence(Sequence sequence)
{
sequenceCount++;

var sequenceLength = sequence.Read.Length;

// see if we need to resize our arrays
if (sequenceLength > maxReadLength)
{
maxReadLength = sequenceLength;

Array.Resize(ref qualityScoresAtPosition, maxReadLength);
for (var n = 0; n < maxReadLength; n++)
{
if (qualityScoresAtPosition[n] == null)
{
qualityScoresAtPosition[n] = new ulong[128];
}
}
Array.Resize(ref nCountsAtPosition, maxReadLength);
}

// calculate mean quality score
var sum = 0;
var count = 0;

var qual = sequence.Quality;
for (var i = 0; i < sequenceLength; i++)
{
byte q = qual[i];

qualityScoresAtPosition[i][q]++;
sum += q;
count++;
}

var mean = (uint)((float)sum / (float)count);
meanPhredCounts[mean]++;

lowestMean = Math.Min(lowestMean, mean);
highestMean = Math.Max(highestMean, mean);

// calculate the gc percentage and ncount of the read
var gcCount = 0;
var read = sequence.Read;
for (var i = 0; i < sequenceLength; i++)
{
switch (read[i])
{
case (byte)'G':
case (byte)'C':
gcCount++;
break;

case (byte)'N':
nCountsAtPosition[i]++;
break;

default:
break;
}
}

var gcPercentage = (int)((float)gcCount / (float)sequenceLength * 100.0);
gcPercentageCounts[gcPercentage]++;
}

public void Reset()
{
}
}
}
2 changes: 1 addition & 1 deletion Readers/BamReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public class BamReader : AbstractReader
private bool disposedValue;

public BamReader(string bam) :
base(bam, false)
base(bam, true)
{
binaryReader = new BinaryReader(bufferedStream);

Expand Down
86 changes: 86 additions & 0 deletions Utils/Statistics.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
using System;
using System.Collections.Generic;
using System.Linq;

namespace Ovation.FasterQC.Net.Utils
{
public static class Statistics
{
/// <summary>
/// Partitions the given list around a pivot element such that all elements on left of pivot are <= pivot
/// and the ones at thr right are > pivot. This method can be used for sorting, N-order statistics such as
/// as median finding algorithms.
/// Pivot is selected ranodmly if random number generator is supplied else its selected as last element in the list.
/// Reference: Introduction to Algorithms 3rd Edition, Corman et al, pp 171
/// </summary>
private static int Partition<T>(this IList<T> list, int start, int end, Random? rnd = null) where T : IComparable<T>
{
if (rnd != null)
list.Swap(end, rnd.Next(start, end + 1));

var pivot = list[end];
var lastLow = start - 1;

for (var i = start; i < end; i++)
{
if (list[i].CompareTo(pivot) <= 0)
list.Swap(i, ++lastLow);
}

list.Swap(end, ++lastLow);

return lastLow;
}

/// <summary>
/// Returns Nth smallest element from the list. Here n starts from 0 so that n=0 returns minimum, n=1 returns 2nd smallest element etc.
/// Note: specified list would be mutated in the process.
/// Reference: Introduction to Algorithms 3rd Edition, Corman et al, pp 216
/// </summary>
public static T NthOrderStatistic<T>(this IList<T> list, int n, Random? rnd = null) where T : IComparable<T>
{
return NthOrderStatistic(list, n, 0, list.Count - 1, rnd);
}

private static T NthOrderStatistic<T>(this IList<T> list, int n, int start, int end, Random? rnd) where T : IComparable<T>
{
while (true)
{
var pivotIndex = list.Partition(start, end, rnd);
if (pivotIndex == n)
return list[pivotIndex];

if (n < pivotIndex)
end = pivotIndex - 1;
else
start = pivotIndex + 1;
}
}

public static void Swap<T>(this IList<T> list, int i, int j)
{
// This check is not required but Partition
// function may make many calls so it's for perf reason
if (i == j)
return;

(list[j], list[i]) = (list[i], list[j]);
}

/// <summary>
/// Note: specified list would be mutated in the process.
/// </summary>
public static T Median<T>(this IList<T> list) where T : IComparable<T>
{
return list.NthOrderStatistic((list.Count - 1) / 2);
}

public static double Median<T>(this IEnumerable<T> sequence, Func<T, double> getValue)
{
var list = sequence.Select(getValue).ToList();
var mid = (list.Count - 1) / 2;

return list.NthOrderStatistic(mid);
}
}
}

0 comments on commit 30b7b84

Please sign in to comment.