Skip to content

Commit

Permalink
Merge pull request #13 from michaeljon/issue-12
Browse files Browse the repository at this point in the history
Making module factory dynamic
  • Loading branch information
michaeljon authored May 19, 2022
2 parents 8ab1a9b + 9db0c64 commit ec7dcd3
Show file tree
Hide file tree
Showing 14 changed files with 227 additions and 41 deletions.
2 changes: 2 additions & 0 deletions Interfaces/IQcModule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ public interface IQcModule

string Description { get; }

bool IsEnabledForAll { get; }

void ProcessSequence(Sequence sequence);

void Reset();
Expand Down
6 changes: 5 additions & 1 deletion Models/ReadFlag.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ public enum ReadFlag : ushort
/// </summary>
LastSegment = 128,

EmbeddedSegment = FirstSegment | LastSegment,

/// <summary>
/// not primary alignment
/// </summary>
Expand All @@ -65,6 +67,8 @@ public enum ReadFlag : ushort
/// <summary>
/// supplementary alignment (e.g. aligner specific, could be a portion of a split read or a tied region)
/// </summary>
SupplementaryAlignment = 2048
SupplementaryAlignment = 2048,

SecondaryAlignment = NotPrimaryAlignment | SupplementaryAlignment
}
}
130 changes: 114 additions & 16 deletions Modules/AlignmentStatistics.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@

using System.Collections.Generic;
using System.Linq;

namespace Ovation.FasterQC.Net
{
public class AlignmentStatistics : IQcModule
{
private ulong sequenceCount;

private ulong baseCount;

private ulong paired;

private ulong aligned;
Expand All @@ -19,50 +24,143 @@ public class AlignmentStatistics : IQcModule

private ulong nextSegmentReverseComplemented;

private ulong firstSegment;

private ulong lastSegment;

private ulong embeddedSegment;

private ulong unknownIndex;

private ulong primaryAlignment;

private ulong secondaryAlignment;

private ulong nonPrimaryAlignment;

private ulong failedQualityChecks;

private ulong opticalDuplicate;

private ulong alignedBases;

private readonly Dictionary<int, ReadPair> readLengthHistogram = new();

public string Name => "alignmentStatistics";

public bool IsEnabledForAll => true;

public string Description => "Calculates alignment statistics for SAM/BAM files";

public void ProcessSequence(Sequence sequence)
{
sequenceCount++;

if ((sequence.ReadFlag & ReadFlag.Paired) != 0) paired++;
if ((sequence.ReadFlag & ReadFlag.Aligned) != 0) aligned++;
baseCount += (ulong)sequence.Read.Length;

if (readLengthHistogram.ContainsKey(sequence.Read.Length) == false)
{
readLengthHistogram.Add(sequence.Read.Length, new ReadPair());
}

var readPair = readLengthHistogram[sequence.Read.Length];

// revisit the logic here based on https://samtools.github.io/hts-specs/SAMv1.pdf
if ((sequence.ReadFlag & ReadFlag.Paired) != 0)
{
paired++;
readPair.Paired++;
}
if ((sequence.ReadFlag & ReadFlag.Aligned) != 0)
{
aligned++;
alignedBases += (ulong)sequence.Read.Length;
readPair.AlignedAndPaired++;
}
if ((sequence.ReadFlag & ReadFlag.AlignedAndPaired) == ReadFlag.AlignedAndPaired) alignedAndPaired++;
if ((sequence.ReadFlag & ReadFlag.SegmentUnmapped) != 0) segmentUnmapped++;
if ((sequence.ReadFlag & ReadFlag.NextSegmentUnmapped) != 0) nextSegmentUnmapped++;
if ((sequence.ReadFlag & ReadFlag.ReverseComplemented) != 0) reverseComplemented++;
if ((sequence.ReadFlag & ReadFlag.NextSegmentReverseComplemented) != 0) nextSegmentReverseComplemented++;

if ((sequence.ReadFlag & ReadFlag.FirstSegment) != 0) firstSegment++;
if ((sequence.ReadFlag & ReadFlag.LastSegment) != 0) lastSegment++;
if ((sequence.ReadFlag & (ReadFlag.EmbeddedSegment)) == ReadFlag.EmbeddedSegment) embeddedSegment++;
if ((sequence.ReadFlag & (ReadFlag.EmbeddedSegment)) == 0) unknownIndex++;

if ((sequence.ReadFlag & ReadFlag.NotPrimaryAlignment) != 0) nonPrimaryAlignment++;
if ((sequence.ReadFlag & ReadFlag.FailedQualityChecks) != 0) failedQualityChecks++;
if ((sequence.ReadFlag & ReadFlag.OpticalDuplicate) != 0) opticalDuplicate++;
if ((sequence.ReadFlag & ReadFlag.SecondaryAlignment) == 0)
{
primaryAlignment++;
}
if ((sequence.ReadFlag & ReadFlag.SecondaryAlignment) == ReadFlag.SecondaryAlignment)
{
secondaryAlignment++;
}
}

public void Reset()
{
sequenceCount = 0;
}

public object Data => new
public object Data
{
sequenceCount,
paired,
aligned,
alignedAndPaired,
segmentUnmapped,
nextSegmentUnmapped,
reverseComplemented,
nextSegmentReverseComplemented,
nonPrimaryAlignment,
failedQualityChecks,
opticalDuplicate
};
get
{
var minReadLength = readLengthHistogram.Keys.Min();
var maxReadLength = readLengthHistogram.Keys.Max();

for (var readLength = minReadLength; readLength < maxReadLength; readLength++)
{
if (readLengthHistogram.ContainsKey(readLength) == false)
{
readLengthHistogram.Add(readLength, new ReadPair());
}
}

return new
{
sequenceCount,
paired,
aligned,
alignedAndPaired,
segmentUnmapped,
nextSegmentUnmapped,
reverseComplemented,
nextSegmentReverseComplemented,
firstSegment,
lastSegment,
embeddedSegment,
unknownIndex,
primaryAlignment,
secondaryAlignment,
nonPrimaryAlignment,
failedQualityChecks,
opticalDuplicate,
alignedBases,
averageReadLength = (double)baseCount / (double)sequenceCount,
histogram = new
{
minReadLength,
maxReadLength,
paired = readLengthHistogram
.OrderBy(k => k.Key)
.Select((k, v) => k.Value.Paired),
unpaired = readLengthHistogram
.OrderBy(k => k.Key)
.Select((k, v) => k.Value.AlignedAndPaired)
}
};
}
}

class ReadPair
{
public ulong Paired { get; set; }

public ulong AlignedAndPaired { get; set; }
}
}
}
2 changes: 2 additions & 0 deletions Modules/BasicStatistics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ public class BasicStatistics : IQcModule

public string Description => "Calculates basic quality statistics";

public bool IsEnabledForAll => true;

public void ProcessSequence(Sequence sequence)
{
var sequenceLength = sequence.Read.Length;
Expand Down
2 changes: 2 additions & 0 deletions Modules/KmerContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ public class KMerContent : IQcModule

public string Description => "Computes 4-mer counts across all sequences";

public bool IsEnabledForAll => true;

public object Data
{
get
Expand Down
2 changes: 2 additions & 0 deletions Modules/MeanQualityDistribution.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ public class MeanQualityDistribution : IQcModule

public string Description => "Calculates the quality distribution across all sequences";

public bool IsEnabledForAll => true;

public object Data
{
get
Expand Down
53 changes: 38 additions & 15 deletions Modules/ModuleFactory.cs
Original file line number Diff line number Diff line change
@@ -1,35 +1,58 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Reflection;
using Ovation.FasterQC.Net.Utils;

namespace Ovation.FasterQC.Net.Modules
{
public static class ModuleFactory
{
private static readonly Dictionary<string, IQcModule> moduleMap = new()
private static Dictionary<string, IQcModule>? moduleMap;

public static Dictionary<string, IQcModule> ModuleMap
{
["AlignmentStatistics"] = new AlignmentStatistics(),
["BasicStatistics"] = new BasicStatistics(),
["KMerContent"] = new KMerContent(),
["NCountsAtPosition"] = new NCountsAtPosition(),
["PerPositionSequenceContent"] = new PerPositionSequenceContent(),
["PerSequenceGcContent"] = new PerSequenceGcContent(),
["QualityDistributionByBase"] = new QualityDistributionByBase(),
["MeanQualityDistribution"] = new MeanQualityDistribution(),
["SequenceLengthDistribution"] = new SequenceLengthDistribution(),
["PerPositionQuality"] = new PerPositionQuality(),
};
get
{
if (moduleMap == null)
{
moduleMap = new Dictionary<string, IQcModule>();

var modules = Assembly.GetExecutingAssembly()
.GetTypes()
.Where(t => string.IsNullOrEmpty(t.Namespace) == false && t.GetInterface(nameof(IQcModule)) != null)
.Select(t => Activator.CreateInstance(t))
.Cast<IQcModule>();

foreach (var module in modules)
{
moduleMap.Add(module.GetType().Name, module);
}
}

return moduleMap;
}
}

public static IEnumerable<IQcModule> Create(CliOptions settings)
{
if (settings.ModuleNames.Any() == false || settings.ModuleNames.First() == "all")
{
settings.ModuleNames = moduleMap.Keys;
return moduleMap.Values;
var moduleNames = new List<string>();
var modules = new List<IQcModule>();

foreach (var module in ModuleMap.Where(m => m.Value.IsEnabledForAll == true))
{
moduleNames.Add(module.Key);
modules.Add(module.Value);
}

settings.ModuleNames = moduleNames;
return modules;
}
else
{
return settings.ModuleNames.Select(n => moduleMap[n]);
return settings.ModuleNames.Select(n => ModuleMap[n]);
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions Modules/NCountsAtPosition.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@ namespace Ovation.FasterQC.Net
public class NCountsAtPosition : IQcModule
{
private int[] nCounts = Array.Empty<int>();

private int[] notNCounts = Array.Empty<int>();

public string Name => "nPercentages";

public string Description => "Calculates N counts at position along sequence";

public bool IsEnabledForAll => true;

public object Data
{
get
Expand Down
3 changes: 3 additions & 0 deletions Modules/PerPositionQuality.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@ public class PerPositionQuality : IQcModule
private QualityMetric[] qualities = Array.Empty<QualityMetric>();

private int minimumReadLength = int.MaxValue;

private int maximumReadLength = int.MinValue;

public string Name => "perPositionQuality";

public string Description => "Calculates the per-position quality metrics";

public bool IsEnabledForAll => true;

public object Data
{
get
Expand Down
5 changes: 5 additions & 0 deletions Modules/PerPositionSequenceContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,20 @@ namespace Ovation.FasterQC.Net
public class PerPositionSequenceContent : IQcModule
{
private ulong[] aCounts = Array.Empty<ulong>();

private ulong[] cCounts = Array.Empty<ulong>();

private ulong[] tCounts = Array.Empty<ulong>();

private ulong[] gCounts = Array.Empty<ulong>();
private ulong sequenceCount;

public string Name => "baseCounts";

public string Description => "Calculates ATCG counts at position along sequence";

public bool IsEnabledForAll => true;

public object Data
{
get
Expand Down
3 changes: 3 additions & 0 deletions Modules/PerSequenceGcContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ namespace Ovation.FasterQC.Net
public class PerSequenceGcContent : IQcModule
{
private ulong[] gcCounts = Array.Empty<ulong>();

private ulong sequenceCount;

public string Name => "gcDistribution";

public string Description => "Distribution of GC content percentages";

public bool IsEnabledForAll => true;

public object Data => gcCounts.Select(a => Math.Round((double)a / (double)sequenceCount * 100.0, 3));

public void ProcessSequence(Sequence sequence)
Expand Down
5 changes: 5 additions & 0 deletions Modules/QualityDistributionByBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ public class QualityDistributionByBase : IQcModule
private const byte ILLUMINA_BASE_ADJUSTMENT = 33;

private readonly ulong[] aQuality = new ulong[128];

private readonly ulong[] cQuality = new ulong[128];

private readonly ulong[] tQuality = new ulong[128];

private readonly ulong[] gQuality = new ulong[128];

private byte lowestScore = byte.MaxValue;
Expand All @@ -20,6 +23,8 @@ public class QualityDistributionByBase : IQcModule

public string Description => "Calculates the quality distribution across all sequences";

public bool IsEnabledForAll => true;

public object Data
{
get
Expand Down
Loading

0 comments on commit ec7dcd3

Please sign in to comment.