Skip to content

Commit

Permalink
Merge pull request #1 from stephen-riley/main
Browse files Browse the repository at this point in the history
CLI support, progress bar w/ ETA
  • Loading branch information
michaeljon authored May 17, 2022
2 parents e54a9c6 + 7ddcd24 commit e008268
Show file tree
Hide file tree
Showing 16 changed files with 395 additions and 66 deletions.
4 changes: 3 additions & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
"preLaunchTask": "build",
// If you have changed target frameworks, make sure to update the program path.
"program": "${workspaceFolder}/bin/Debug/net6.0/Ovation.FasterQC.Net.dll",
"args": ["/Users/michaeljon/Downloads/zr6254_1.sorted.bam"],
"args": [
"-p", "-i", "/tmp/zr6254_1/zr6254_1.sorted.bam", "-o", "/tmp/bob.json", "-m", "BasicStatistics", "NCountsAtPosition"
],
"cwd": "${workspaceFolder}",
// For more information about the 'console' field, see https://aka.ms/VSCode-CS-LaunchJson-Console
"console": "internalConsole",
Expand Down
6 changes: 4 additions & 2 deletions Interfaces/ISequenceReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ namespace Ovation.FasterQC.Net
{
public interface ISequenceReader : IDisposable
{
int SequencesRead { get; }

bool ReadSequence(out Sequence sequence);

int ApproximateCompletion();
double ApproximateCompletion { get; }
}
}
}
35 changes: 35 additions & 0 deletions Modules/ModuleFactory.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
using System.Collections.Generic;
using System.Linq;
using Ovation.FasterQC.Net.Utils;

namespace Ovation.FasterQC.Net.Modules
{
public static class ModuleFactory
{
private static readonly Dictionary<string, IQcModule> moduleMap = new()
{
["BasicStatistics"] = new BasicStatistics(),
["KMerContent"] = new KMerContent(),
["NCountsAtPosition"] = new NCountsAtPosition(),
["PerPositionSequenceContent"] = new PerPositionSequenceContent(),
["PerSequenceGcContent"] = new PerSequenceGcContent(),
["QualityDistributionByBase"] = new QualityDistributionByBase(),
["MeanQualityDistribution"] = new MeanQualityDistribution(),
["SequenceLengthDistribution"] = new SequenceLengthDistribution(),
["PerPositionQuality"] = new PerPositionQuality(),
};

public static IEnumerable<IQcModule> Create(CliOptions settings)
{
if (settings.ModuleNames.First() == "all")
{
settings.ModuleNames = moduleMap.Keys;
return moduleMap.Values;
}
else
{
return settings.ModuleNames.Select(n => moduleMap[n]);
}
}
}
}
11 changes: 5 additions & 6 deletions Modules/QualityDistributionByBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@ public class QualityDistributionByBase : IQcModule
{
private const byte ILLUMINA_BASE_ADJUSTMENT = 33;

private ulong[] aQuality = new ulong[128];
private ulong[] cQuality = new ulong[128];
private ulong[] tQuality = new ulong[128];
private ulong[] gQuality = new ulong[128];

private readonly ulong[] aQuality = new ulong[128];
private readonly ulong[] cQuality = new ulong[128];
private readonly ulong[] tQuality = new ulong[128];
private readonly ulong[] gQuality = new ulong[128];

private byte lowestScore = byte.MaxValue;

Expand Down Expand Up @@ -83,4 +82,4 @@ public void Reset()
highestScore = byte.MinValue;
}
}
}
}
5 changes: 5 additions & 0 deletions Ovation.FasterQC.Net.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,9 @@
<TargetFramework>net6.0</TargetFramework>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="CommandLineParser" Version="2.8.0" />
<PackageReference Include="ShellProgressBar" Version="5.1.0" />
</ItemGroup>

</Project>
69 changes: 49 additions & 20 deletions Program.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text.Json;
using CommandLine;
using Ovation.FasterQC.Net.Modules;
using Ovation.FasterQC.Net.Readers;
using Ovation.FasterQC.Net.Utils;
using static Ovation.FasterQC.Net.Utils.CliOptions;

namespace Ovation.FasterQC.Net
{
Expand All @@ -13,24 +19,28 @@ class Program
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
};

private static readonly List<IQcModule> modules = new()
{
// new BasicStatistics(),
// new KMerContent(),
// new NCountsAtPosition(),
// new PerPositionSequenceContent(),
// new PerSequenceGcContent(),
new QualityDistributionByBase(),
// new MeanQualityDistribution(),
// new SequenceLengthDistribution(),
// new PerPositionQuality()
};
private TimedSequenceProgressBar progressBar;

static void Main(string[] args)
{
using var sequenceReader = new BamReader(args[0]);
Parser.Default.ParseArguments<CliOptions>(args)
.WithParsed(o =>
{
o.Validate();
Settings = o;
new Program().Run();
});
}

private void Run()
{
using var sequenceReader = ReaderFactory.Create(Settings);
var modules = ModuleFactory.Create(Settings);

var sequencesProcessed = 0;
Console.Error.WriteLine($"Running modules:\n {string.Join("\n ", Settings.ModuleNames)}");

On(Settings.ShowProgress, () => progressBar = new TimedSequenceProgressBar(sequenceReader));
On(Settings.Verbose, () => Console.Error.WriteLine($"Processing {Settings.InputFilename}..."));

while (sequenceReader.ReadSequence(out Sequence sequence))
{
Expand All @@ -39,20 +49,39 @@ static void Main(string[] args)
module.ProcessSequence(sequence);
}

if (++sequencesProcessed % 100000 == 0)
On(Settings.ShowProgress, () => progressBar.Update());
On(Settings.Verbose, () =>
{
Console.Error.WriteLine($"{sequencesProcessed} sequences completed ~{sequenceReader.ApproximateCompletion()}%");
}
if (sequenceReader.SequencesRead % UpdatePeriod == 0)
{
Console.Error.WriteLine($"{sequenceReader.SequencesRead.WithSsiUnits()} sequences completed ({sequenceReader.ApproximateCompletion:0.0}%)");
}
});
}

Console.Error.WriteLine($"{sequencesProcessed} sequences processed");
var results = new Dictionary<string, object>()
{
["_modules"] = Settings.ModuleNames,
["_inputFilename"] = Settings.InputFilename,
["_outputFilename"] = string.IsNullOrWhiteSpace(Settings.OutputFilename) ? "STDOUT" : Settings.OutputFilename,
};

var results = new Dictionary<string, object>();
foreach (var module in modules)
{
results[module.Name] = module.Data;
}
Console.WriteLine(JsonSerializer.Serialize(results, options));

On(Settings.ShowProgress, () => progressBar.Update(force: true));
On(Settings.Verbose, () => Console.Error.WriteLine($"{sequenceReader.SequencesRead.WithSsiUnits()} sequences completed ({sequenceReader.ApproximateCompletion:0.0}%)"));

if (string.IsNullOrWhiteSpace(Settings.OutputFilename))
{
Console.WriteLine(JsonSerializer.Serialize(results, options));
}
else
{
File.WriteAllText(Settings.OutputFilename, JsonSerializer.Serialize(results, options));
}
}
}
}
34 changes: 34 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# FasterQC.net

A C# version of [FastQC](https://github.com/s-andrews/FastQC)

## Usage

```
> dotnet run -- --help
Ovation.FasterQC.Net 1.0.0
Copyright (C) 2022 Ovation.FasterQC.Net
-v, --verbose Set output to verbose messages.
--debug Show diagnostic output. Can only use with --verbose.
-p, --progress Show progress bar. Cannnot use with --verbose.
-i, --input Required. Input filename.
-o, --output Output filename. Defaults to STDOUT.
-b, --bam Assume BAM format.
-f, --fastq Assume FASTQ format.
-z, --zipped Assume input file is gzipped.
-m, --modules Required. Space-separated list of modules to run, or 'all'.
--help Display this help screen.
--version Display version information.
```
45 changes: 24 additions & 21 deletions Readers/BamReader.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
// #define DEBUG_OUTPUT
using System;
using System.IO;
using System.IO.Compression;
#if DEBUG_OUTPUT
using System.Text;
#endif
using static Ovation.FasterQC.Net.Utils.CliOptions;

namespace Ovation.FasterQC.Net
{
Expand All @@ -20,6 +18,10 @@ public class BamReader : ISequenceReader

private bool disposedValue;

private int sequencesRead = 0;

public int SequencesRead => sequencesRead;

public BamReader(string bam)
{
var bufferSize = 128 * 1024;
Expand All @@ -45,6 +47,7 @@ public bool ReadSequence(out Sequence sequence)
var bamAlignment = ReadSequence();

sequence = new Sequence(bamAlignment);
sequencesRead++;
return true;
}
catch (EndOfStreamException)
Expand All @@ -54,10 +57,8 @@ public bool ReadSequence(out Sequence sequence)
}
}

public int ApproximateCompletion()
{
return (int)((double)inputStream.Position / (double)inputStream.Length * 100.0);
}
public double ApproximateCompletion =>
100.0 * inputStream.Position / inputStream.Length;

private void ConsumeHeader()
{
Expand All @@ -66,13 +67,14 @@ private void ConsumeHeader()
var text = binaryReader.ReadBytes((int)l_text);
var n_ref = binaryReader.ReadUInt32();

#if DEBUG_OUTPUT
Console.Error.WriteLine($"magic: {(char)magic[0]}{(char)magic[1]}{(char)magic[2]}");
Console.Error.WriteLine($"l_text: {l_text}");
Console.Error.WriteLine($"text: ");
Console.Error.WriteLine(new string(Encoding.ASCII.GetChars(text)));
Console.Error.WriteLine($"n_ref: {n_ref}");
#endif
On(Settings.Debug, () =>
{
Console.Error.WriteLine($"magic: {(char)magic[0]}{(char)magic[1]}{(char)magic[2]}");
Console.Error.WriteLine($"l_text: {l_text}");
Console.Error.WriteLine($"text: ");
Console.Error.WriteLine(new string(Encoding.ASCII.GetChars(text)));
Console.Error.WriteLine($"n_ref: {n_ref}");
});

for (var refSeq = 0; refSeq < n_ref; refSeq++)
{
Expand All @@ -83,12 +85,13 @@ private void ConsumeHeader()
var name = binaryReader.ReadBytes((int)l_name - 1); binaryReader.ReadByte();
var l_ref = binaryReader.ReadUInt32();

#if DEBUG_OUTPUT
Console.Error.WriteLine($"refSeq: {refSeq}");
Console.Error.WriteLine($"l_name: {l_name}");
Console.Error.WriteLine($"name: {new string(Encoding.ASCII.GetChars(name))}");
Console.Error.WriteLine($"l_ref: {l_ref}");
#endif
On(Settings.Debug, () =>
{
Console.Error.WriteLine($"refSeq: {refSeq}");
Console.Error.WriteLine($"l_name: {l_name}");
Console.Error.WriteLine($"name: {new string(Encoding.ASCII.GetChars(name))}");
Console.Error.WriteLine($"l_ref: {l_ref}");
});
}
}

Expand Down Expand Up @@ -186,4 +189,4 @@ public void Dispose()
GC.SuppressFinalize(this);
}
}
}
}
20 changes: 12 additions & 8 deletions Readers/FastqLineReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.IO;
using System.IO.Compression;
using System.Text;
using static Ovation.FasterQC.Net.Utils.CliOptions;

namespace Ovation.FasterQC.Net
{
Expand All @@ -17,6 +18,13 @@ public class FastqLineReader : ISequenceReader

private bool disposedValue;

private int sequencesRead = 0;

public int SequencesRead => sequencesRead;

public double ApproximateCompletion =>
100.0 * inputStream.Position / inputStream.Length;

public FastqLineReader(string fastq, bool gzipped = true)
{
var bufferSize = 128 * 1024;
Expand Down Expand Up @@ -48,7 +56,7 @@ public bool ReadSequence(out Sequence sequence)
{
if (streamReader.EndOfStream == true)
{
Console.Error.WriteLine("End of stream");
On(Settings.Verbose, () => Console.Error.WriteLine("End of stream"));
sequence = null;
return false;
}
Expand All @@ -59,21 +67,17 @@ public bool ReadSequence(out Sequence sequence)
var quality = Encoding.ASCII.GetBytes(streamReader.ReadLine());

sequence = new Sequence(identifier, read, blank, quality);
sequencesRead++;
return true;
}
catch (EndOfStreamException)
{
Console.Error.WriteLine("End of stream");
On(Settings.Verbose, () => Console.Error.WriteLine("End of stream"));
sequence = null;
return false;
}
}

public int ApproximateCompletion()
{
return (int)((double)inputStream.Position / (double)inputStream.Length * 100.0);
}

protected virtual void Dispose(bool disposing)
{
if (!disposedValue)
Expand All @@ -96,4 +100,4 @@ public void Dispose()
GC.SuppressFinalize(this);
}
}
}
}
Loading

0 comments on commit e008268

Please sign in to comment.