Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/michaeljon/fasterqc.net int…
Browse files Browse the repository at this point in the history
…o more-tweaks
  • Loading branch information
stephen-riley committed May 24, 2022
2 parents f2f7e90 + 462e3e3 commit efcb7f3
Show file tree
Hide file tree
Showing 21 changed files with 842 additions and 77 deletions.
10 changes: 4 additions & 6 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,17 @@
// If you have changed target frameworks, make sure to update the program path.
"program": "${workspaceFolder}/bin/Debug/net6.0/Ovation.FasterQC.Net.dll",
"args": [
"-v",
"-d",
"-l",
"1000000",
"10",
"-f",
"sam",
"bam",
"-i",
"./tmp/in3257_2_S1.sorted.sam",
"./tmp/in3257_2_S1.sorted.bam",
"-o",
"./tmp/bob.json",
"-m",
"BasicStatistics",
"NCountsAtPosition"
"BasicStatistics"
],
"cwd": "${workspaceFolder}",
// For more information about the 'console' field, see https://aka.ms/VSCode-CS-LaunchJson-Console
Expand Down
2 changes: 2 additions & 0 deletions Interfaces/IQcModule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ public interface IQcModule

string Description { get; }

bool IsEnabledForAll { get; }

void ProcessSequence(Sequence sequence);

void Reset();
Expand Down
93 changes: 93 additions & 0 deletions Models/BamAlignment.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
using System;
using System.ComponentModel;
using System.Diagnostics.CodeAnalysis;
using System.Text;

namespace Ovation.FasterQC.Net
{
Expand Down Expand Up @@ -37,4 +40,94 @@ public class BamAlignment

public byte[] qual { get; set; } = null!;
}

[SuppressMessage("Code style", "IDE1006", Justification = "Names correspond to BAM structure field names")]
public class BamOptionalElement
{
public char[] tag { get; set; }

public char val_type { get; set; }

public object value { get; set; } = null!;

public BamOptionalElement(byte[] block, ref int offset)
{
tag = Encoding.ASCII.GetChars(block, offset, 2); offset += 2;
val_type = Encoding.ASCII.GetChars(block, offset, 1)[0]; offset += 1;

// consume the rest
switch (val_type)
{
case 'A': offset += 1; break;

// byte
case 'c': offset += 1; break;
case 'C': offset += 1; break;

// short
case 's': offset += 2; break;
case 'S': offset += 2; break;

// int
case 'i': offset += 4; break;
case 'I': offset += 4; break;

// float
case 'f': offset += 4; break;

// null-terminated string
case 'Z':
while (block[offset++] != 0) ;
break;

// null-terminated hex digit pairs
case 'H':
while (block[offset++] != 0) ;
break;

// array of stuff
case 'B':
var subtype = Encoding.ASCII.GetChars(block, offset, 1)[0]; offset += 1;
var length = BitConverter.ToUInt32(new Span<byte>(block, offset, 4)); offset += 4;

// consume the stuff
for (var element = 0; element < length; element++)
{
switch (subtype)
{
// byte
case 'c': offset += 1; break;
case 'C': offset += 1; break;

// short
case 's': offset += 2; break;
case 'S': offset += 2; break;

// int
case 'i': offset += 4; break;
case 'I': offset += 4; break;

// float
case 'f': offset += 4; break;
}
}

break;
}
}

public override string ToString()
{
var sb = new StringBuilder();

sb.Append("tag: ");
sb.Append(tag[0]);
sb.Append(tag[1]);

sb.Append(", type: ");
sb.Append(val_type);

return sb.ToString();
}
}
}
6 changes: 5 additions & 1 deletion Models/ReadFlag.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ public enum ReadFlag : ushort
/// </summary>
LastSegment = 128,

EmbeddedSegment = FirstSegment | LastSegment,

/// <summary>
/// not primary alignment
/// </summary>
Expand All @@ -65,6 +67,8 @@ public enum ReadFlag : ushort
/// <summary>
/// supplementary alignment (e.g. aligner specific, could be a portion of a split read or a tied region)
/// </summary>
SupplementaryAlignment = 2048
SupplementaryAlignment = 2048,

SecondaryAlignment = NotPrimaryAlignment | SupplementaryAlignment
}
}
130 changes: 114 additions & 16 deletions Modules/AlignmentStatistics.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@

using System.Collections.Generic;
using System.Linq;

namespace Ovation.FasterQC.Net
{
public class AlignmentStatistics : IQcModule
{
private ulong sequenceCount;

private ulong baseCount;

private ulong paired;

private ulong aligned;
Expand All @@ -19,50 +24,143 @@ public class AlignmentStatistics : IQcModule

private ulong nextSegmentReverseComplemented;

private ulong firstSegment;

private ulong lastSegment;

private ulong embeddedSegment;

private ulong unknownIndex;

private ulong primaryAlignment;

private ulong secondaryAlignment;

private ulong nonPrimaryAlignment;

private ulong failedQualityChecks;

private ulong opticalDuplicate;

private ulong alignedBases;

private readonly Dictionary<int, ReadPair> readLengthHistogram = new();

public string Name => "alignmentStatistics";

public bool IsEnabledForAll => true;

public string Description => "Calculates alignment statistics for SAM/BAM files";

public void ProcessSequence(Sequence sequence)
{
sequenceCount++;

if ((sequence.ReadFlag & ReadFlag.Paired) != 0) paired++;
if ((sequence.ReadFlag & ReadFlag.Aligned) != 0) aligned++;
baseCount += (ulong)sequence.Read.Length;

if (readLengthHistogram.ContainsKey(sequence.Read.Length) == false)
{
readLengthHistogram.Add(sequence.Read.Length, new ReadPair());
}

var readPair = readLengthHistogram[sequence.Read.Length];

// revisit the logic here based on https://samtools.github.io/hts-specs/SAMv1.pdf
if ((sequence.ReadFlag & ReadFlag.Paired) != 0)
{
paired++;
readPair.Paired++;
}
if ((sequence.ReadFlag & ReadFlag.Aligned) != 0)
{
aligned++;
alignedBases += (ulong)sequence.Read.Length;
readPair.AlignedAndPaired++;
}
if ((sequence.ReadFlag & ReadFlag.AlignedAndPaired) == ReadFlag.AlignedAndPaired) alignedAndPaired++;
if ((sequence.ReadFlag & ReadFlag.SegmentUnmapped) != 0) segmentUnmapped++;
if ((sequence.ReadFlag & ReadFlag.NextSegmentUnmapped) != 0) nextSegmentUnmapped++;
if ((sequence.ReadFlag & ReadFlag.ReverseComplemented) != 0) reverseComplemented++;
if ((sequence.ReadFlag & ReadFlag.NextSegmentReverseComplemented) != 0) nextSegmentReverseComplemented++;

if ((sequence.ReadFlag & ReadFlag.FirstSegment) != 0) firstSegment++;
if ((sequence.ReadFlag & ReadFlag.LastSegment) != 0) lastSegment++;
if ((sequence.ReadFlag & (ReadFlag.EmbeddedSegment)) == ReadFlag.EmbeddedSegment) embeddedSegment++;
if ((sequence.ReadFlag & (ReadFlag.EmbeddedSegment)) == 0) unknownIndex++;

if ((sequence.ReadFlag & ReadFlag.NotPrimaryAlignment) != 0) nonPrimaryAlignment++;
if ((sequence.ReadFlag & ReadFlag.FailedQualityChecks) != 0) failedQualityChecks++;
if ((sequence.ReadFlag & ReadFlag.OpticalDuplicate) != 0) opticalDuplicate++;
if ((sequence.ReadFlag & ReadFlag.SecondaryAlignment) == 0)
{
primaryAlignment++;
}
if ((sequence.ReadFlag & ReadFlag.SecondaryAlignment) == ReadFlag.SecondaryAlignment)
{
secondaryAlignment++;
}
}

public void Reset()
{
sequenceCount = 0;
}

public object Data => new
public object Data
{
sequenceCount,
paired,
aligned,
alignedAndPaired,
segmentUnmapped,
nextSegmentUnmapped,
reverseComplemented,
nextSegmentReverseComplemented,
nonPrimaryAlignment,
failedQualityChecks,
opticalDuplicate
};
get
{
var minReadLength = readLengthHistogram.Keys.Min();
var maxReadLength = readLengthHistogram.Keys.Max();

for (var readLength = minReadLength; readLength < maxReadLength; readLength++)
{
if (readLengthHistogram.ContainsKey(readLength) == false)
{
readLengthHistogram.Add(readLength, new ReadPair());
}
}

return new
{
sequenceCount,
paired,
aligned,
alignedAndPaired,
segmentUnmapped,
nextSegmentUnmapped,
reverseComplemented,
nextSegmentReverseComplemented,
firstSegment,
lastSegment,
embeddedSegment,
unknownIndex,
primaryAlignment,
secondaryAlignment,
nonPrimaryAlignment,
failedQualityChecks,
opticalDuplicate,
alignedBases,
averageReadLength = (double)baseCount / (double)sequenceCount,
histogram = new
{
minReadLength,
maxReadLength,
paired = readLengthHistogram
.OrderBy(k => k.Key)
.Select((k, v) => k.Value.Paired),
unpaired = readLengthHistogram
.OrderBy(k => k.Key)
.Select((k, v) => k.Value.AlignedAndPaired)
}
};
}
}

class ReadPair
{
public ulong Paired { get; set; }

public ulong AlignedAndPaired { get; set; }
}
}
}
6 changes: 5 additions & 1 deletion Modules/BasicStatistics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ namespace Ovation.FasterQC.Net
{
public class BasicStatistics : IQcModule
{
private const byte ILLUMINA_BASE_ADJUSTMENT = 33;

private ulong sequenceCount;

private int minimumReadLength = int.MaxValue;
Expand All @@ -24,6 +26,8 @@ public class BasicStatistics : IQcModule

public string Description => "Calculates basic quality statistics";

public bool IsEnabledForAll => true;

public void ProcessSequence(Sequence sequence)
{
var sequenceLength = sequence.Read.Length;
Expand Down Expand Up @@ -85,7 +89,7 @@ public void Reset()
nCount,
xCount,

minimumQuality,
minimumQuality = Math.Min(minimumQuality - ILLUMINA_BASE_ADJUSTMENT, 0),

gcContent = Math.Round((double)(cCount + gCount) / (double)totalBases * 100.0, 3),

Expand Down
2 changes: 2 additions & 0 deletions Modules/KmerContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ public class KMerContent : IQcModule

public string Description => "Computes 4-mer counts across all sequences";

public bool IsEnabledForAll => true;

public object Data
{
get
Expand Down
Loading

0 comments on commit efcb7f3

Please sign in to comment.