Skip to content

Commit

Permalink
making module factory dynamic
Browse files Browse the repository at this point in the history
  • Loading branch information
michaeljon committed May 18, 2022
1 parent 8ab1a9b commit 6f79480
Show file tree
Hide file tree
Showing 13 changed files with 120 additions and 23 deletions.
2 changes: 2 additions & 0 deletions Interfaces/IQcModule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ public interface IQcModule

string Description { get; }

bool IsEnabledForAll { get; }

void ProcessSequence(Sequence sequence);

void Reset();
Expand Down
48 changes: 44 additions & 4 deletions Modules/AlignmentStatistics.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@

using System.Collections.Generic;
using System.Linq;

namespace Ovation.FasterQC.Net
{
public class AlignmentStatistics : IQcModule
{
private ulong sequenceCount;

private ulong baseCount;

private ulong paired;

private ulong aligned;
Expand All @@ -25,16 +30,39 @@ public class AlignmentStatistics : IQcModule

private ulong opticalDuplicate;

private ulong alignedBases;

private readonly Dictionary<int, ReadPair> readLengthHistogram = new();

public string Name => "alignmentStatistics";

public bool IsEnabledForAll => true;

public string Description => "Calculates alignment statistics for SAM/BAM files";

public void ProcessSequence(Sequence sequence)
{
sequenceCount++;

if ((sequence.ReadFlag & ReadFlag.Paired) != 0) paired++;
if ((sequence.ReadFlag & ReadFlag.Aligned) != 0) aligned++;
baseCount += (ulong)sequence.Read.Length;

if (readLengthHistogram.ContainsKey(sequence.Read.Length) == false)
{
readLengthHistogram.Add(sequence.Read.Length, new ReadPair());
}

var readPair = readLengthHistogram[sequence.Read.Length];

if ((sequence.ReadFlag & ReadFlag.Paired) != 0)
{
paired++;
readPair.Paired++;
}
if ((sequence.ReadFlag & ReadFlag.Aligned) != 0)
{
aligned++;
alignedBases += (ulong)sequence.Read.Length;
readPair.AlignedAndPaired++;
}
if ((sequence.ReadFlag & ReadFlag.AlignedAndPaired) == ReadFlag.AlignedAndPaired) alignedAndPaired++;
if ((sequence.ReadFlag & ReadFlag.SegmentUnmapped) != 0) segmentUnmapped++;
if ((sequence.ReadFlag & ReadFlag.NextSegmentUnmapped) != 0) nextSegmentUnmapped++;
Expand Down Expand Up @@ -62,7 +90,19 @@ public void Reset()
nextSegmentReverseComplemented,
nonPrimaryAlignment,
failedQualityChecks,
opticalDuplicate
opticalDuplicate,
alignedBases,
averageReadLength = (double)baseCount / (double)sequenceCount,
histogram = readLengthHistogram
.Select((k, v) => new ulong[] { (ulong)k.Key, k.Value.Paired, k.Value.AlignedAndPaired })
.OrderBy(a => a[0])
};

class ReadPair
{
public ulong AlignedAndPaired { get; set; }

public ulong Paired { get; set; }
}
}
}
2 changes: 2 additions & 0 deletions Modules/BasicStatistics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ public class BasicStatistics : IQcModule

public string Description => "Calculates basic quality statistics";

public bool IsEnabledForAll => true;

public void ProcessSequence(Sequence sequence)
{
var sequenceLength = sequence.Read.Length;
Expand Down
2 changes: 2 additions & 0 deletions Modules/KmerContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ public class KMerContent : IQcModule

public string Description => "Computes 4-mer counts across all sequences";

public bool IsEnabledForAll => true;

public object Data
{
get
Expand Down
2 changes: 2 additions & 0 deletions Modules/MeanQualityDistribution.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ public class MeanQualityDistribution : IQcModule

public string Description => "Calculates the quality distribution across all sequences";

public bool IsEnabledForAll => true;

public object Data
{
get
Expand Down
53 changes: 38 additions & 15 deletions Modules/ModuleFactory.cs
Original file line number Diff line number Diff line change
@@ -1,35 +1,58 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Reflection;
using Ovation.FasterQC.Net.Utils;

namespace Ovation.FasterQC.Net.Modules
{
public static class ModuleFactory
{
private static readonly Dictionary<string, IQcModule> moduleMap = new()
private static Dictionary<string, IQcModule>? moduleMap;

private static Dictionary<string, IQcModule> ModuleMap
{
["AlignmentStatistics"] = new AlignmentStatistics(),
["BasicStatistics"] = new BasicStatistics(),
["KMerContent"] = new KMerContent(),
["NCountsAtPosition"] = new NCountsAtPosition(),
["PerPositionSequenceContent"] = new PerPositionSequenceContent(),
["PerSequenceGcContent"] = new PerSequenceGcContent(),
["QualityDistributionByBase"] = new QualityDistributionByBase(),
["MeanQualityDistribution"] = new MeanQualityDistribution(),
["SequenceLengthDistribution"] = new SequenceLengthDistribution(),
["PerPositionQuality"] = new PerPositionQuality(),
};
get
{
if (moduleMap == null)
{
moduleMap = new Dictionary<string, IQcModule>();

var modules = Assembly.GetExecutingAssembly()
.GetTypes()
.Where(t => string.IsNullOrEmpty(t.Namespace) == false && t.GetInterface(nameof(IQcModule)) != null)
.Select(t => Activator.CreateInstance(t))
.Cast<IQcModule>();

foreach (var module in modules)
{
moduleMap.Add(module.GetType().Name, module);
}
}

return moduleMap;
}
}

public static IEnumerable<IQcModule> Create(CliOptions settings)
{
if (settings.ModuleNames.Any() == false || settings.ModuleNames.First() == "all")
{
settings.ModuleNames = moduleMap.Keys;
return moduleMap.Values;
var moduleNames = new List<string>();
var modules = new List<IQcModule>();

foreach (var module in ModuleMap.Where(m => m.Value.IsEnabledForAll == true))
{
moduleNames.Add(module.Key);
modules.Add(module.Value);
}

settings.ModuleNames = moduleNames;
return modules;
}
else
{
return settings.ModuleNames.Select(n => moduleMap[n]);
return settings.ModuleNames.Select(n => ModuleMap[n]);
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions Modules/NCountsAtPosition.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@ namespace Ovation.FasterQC.Net
public class NCountsAtPosition : IQcModule
{
private int[] nCounts = Array.Empty<int>();

private int[] notNCounts = Array.Empty<int>();

public string Name => "nPercentages";

public string Description => "Calculates N counts at position along sequence";

public bool IsEnabledForAll => true;

public object Data
{
get
Expand Down
3 changes: 3 additions & 0 deletions Modules/PerPositionQuality.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@ public class PerPositionQuality : IQcModule
private QualityMetric[] qualities = Array.Empty<QualityMetric>();

private int minimumReadLength = int.MaxValue;

private int maximumReadLength = int.MinValue;

public string Name => "perPositionQuality";

public string Description => "Calculates the per-position quality metrics";

public bool IsEnabledForAll => true;

public object Data
{
get
Expand Down
5 changes: 5 additions & 0 deletions Modules/PerPositionSequenceContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,20 @@ namespace Ovation.FasterQC.Net
public class PerPositionSequenceContent : IQcModule
{
private ulong[] aCounts = Array.Empty<ulong>();

private ulong[] cCounts = Array.Empty<ulong>();

private ulong[] tCounts = Array.Empty<ulong>();

private ulong[] gCounts = Array.Empty<ulong>();
private ulong sequenceCount;

public string Name => "baseCounts";

public string Description => "Calculates ATCG counts at position along sequence";

public bool IsEnabledForAll => true;

public object Data
{
get
Expand Down
3 changes: 3 additions & 0 deletions Modules/PerSequenceGcContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ namespace Ovation.FasterQC.Net
public class PerSequenceGcContent : IQcModule
{
private ulong[] gcCounts = Array.Empty<ulong>();

private ulong sequenceCount;

public string Name => "gcDistribution";

public string Description => "Distribution of GC content percentages";

public bool IsEnabledForAll => true;

public object Data => gcCounts.Select(a => Math.Round((double)a / (double)sequenceCount * 100.0, 3));

public void ProcessSequence(Sequence sequence)
Expand Down
5 changes: 5 additions & 0 deletions Modules/QualityDistributionByBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ public class QualityDistributionByBase : IQcModule
private const byte ILLUMINA_BASE_ADJUSTMENT = 33;

private readonly ulong[] aQuality = new ulong[128];

private readonly ulong[] cQuality = new ulong[128];

private readonly ulong[] tQuality = new ulong[128];

private readonly ulong[] gQuality = new ulong[128];

private byte lowestScore = byte.MaxValue;
Expand All @@ -20,6 +23,8 @@ public class QualityDistributionByBase : IQcModule

public string Description => "Calculates the quality distribution across all sequences";

public bool IsEnabledForAll => true;

public object Data
{
get
Expand Down
3 changes: 3 additions & 0 deletions Modules/SequenceLengthDistribution.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ namespace Ovation.FasterQC.Net
public class SequenceLengthDistribution : IQcModule
{
private int minimumReadLength = int.MaxValue;

private int maximumReadLength = int.MinValue;

private readonly IDictionary<int, ulong> lengths = new Dictionary<int, ulong>();
Expand All @@ -14,6 +15,8 @@ public class SequenceLengthDistribution : IQcModule

public string Description => "Calculates the sequence length distributions";

public bool IsEnabledForAll => true;

public object Data
{
get
Expand Down
12 changes: 8 additions & 4 deletions Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ private void Run()
On(Settings.ShowProgress, () => progressBar = new TimedSequenceProgressBar(sequenceReader));
On(Settings.Verbose, () => Console.Error.WriteLine($"Processing {Settings.InputFilename}..."));

while (sequenceReader.ReadSequence(out Sequence? sequence) && sequenceReader.SequencesRead < Settings.ReadLimit)
while (sequenceReader.SequencesRead < Settings.ReadLimit && sequenceReader.ReadSequence(out Sequence? sequence))
{
ArgumentNullException.ThrowIfNull(sequence);

Expand All @@ -70,9 +70,13 @@ private void Run()

Dictionary<string, object>? results = new()
{
["_modules"] = Settings.ModuleNames,
["_inputFilename"] = Settings.InputFilename,
["_outputFilename"] = string.IsNullOrWhiteSpace(Settings.OutputFilename) ? "STDOUT" : Settings.OutputFilename,
["_metadata"] = new
{
_modules = Settings.ModuleNames,
_inputFilename = Settings.InputFilename,
_outputFilename = string.IsNullOrWhiteSpace(Settings.OutputFilename) ? "STDOUT" : Settings.OutputFilename,
_sequences = sequenceReader.SequencesRead
}
};

foreach (IQcModule? module in modules)
Expand Down

0 comments on commit 6f79480

Please sign in to comment.