Skip to content

Commit

Permalink
Merge branch 'main' into issue-6
Browse files Browse the repository at this point in the history
  • Loading branch information
michaeljon authored May 18, 2022
2 parents ac22a74 + 9cf6fed commit 7722be6
Show file tree
Hide file tree
Showing 14 changed files with 304 additions and 72 deletions.
15 changes: 13 additions & 2 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,23 @@
// If you have changed target frameworks, make sure to update the program path.
"program": "${workspaceFolder}/bin/Debug/net6.0/Ovation.FasterQC.Net.dll",
"args": [
"-p", "-i", "/tmp/zr6254_1/zr6254_1.sorted.bam", "-o", "/tmp/bob.json", "-m", "BasicStatistics", "NCountsAtPosition"
"-v",
"-d",
"-f",
"sam",
"-i",
"./tmp/in3257_2_S1.sorted.sam",
"-o",
"./tmp/bob.json",
"-m",
"BasicStatistics",
"NCountsAtPosition"
],
"cwd": "${workspaceFolder}",
// For more information about the 'console' field, see https://aka.ms/VSCode-CS-LaunchJson-Console
"console": "internalConsole",
"stopAtEntry": false
"stopAtEntry": false,
"requireExactSource": false
},
{
"name": ".NET Core Attach",
Expand Down
2 changes: 1 addition & 1 deletion Interfaces/ISequenceReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ public interface ISequenceReader : IDisposable
{
int SequencesRead { get; }

bool ReadSequence(out Sequence sequence);
bool ReadSequence(out Sequence? sequence);

double ApproximateCompletion { get; }
}
Expand Down
15 changes: 8 additions & 7 deletions Models/BamAlignment.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
using System.Diagnostics.CodeAnalysis;

namespace Ovation.FasterQC.Net
{
#pragma warning disable IDE1006
[SuppressMessage("Code style", "IDE1006", Justification = "Names correspond to BAM structure field names")]
public class BamAlignment
{
public uint block_size { get; set; }
Expand All @@ -27,13 +29,12 @@ public class BamAlignment

public int tlen { get; set; }

public byte[] read_name { get; set; }
public byte[] read_name { get; set; } = null!;

public uint[] cigar { get; set; }
public uint[] cigar { get; set; } = null!;

public byte[] seq { get; set; }
public byte[] seq { get; set; } = null!;

public byte[] qual { get; set; }
public byte[] qual { get; set; } = null!;
}
#pragma warning restore IDE1006
}
}
68 changes: 68 additions & 0 deletions Models/ReadFlag.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
using System;

namespace Ovation.FasterQC.Net
{
[Flags]
public enum ReadFlag : ushort
{
/// <summary>
/// template having multiple templates in sequencing (read is paired)
/// </summary>
Paired = 1,

/// <summary>
/// each segment properly aligned according to the aligner (read mapped in proper pair)
/// </summary>
Aligned = 2,

/// <summary>
/// segment unmapped (read1 unmapped)
/// </summary>
SegmentUnmapped = 4,

/// <summary>
/// next segment in the template unmapped (read2 unmapped)
/// </summary>
NextSegmentUnmapped = 8,

/// <summary>
/// SEQ being reverse complemented (read1 reverse complemented)
/// </summary>
ReverseComplemented = 16,

/// <summary>
/// SEQ of the next segment in the template being reverse complemented (read2 reverse complemented)
/// </summary>
NextSegmentReverseComplemented = 32,

/// <summary>
/// the first segment in the template (is read1)
/// </summary>
FirstSegment = 64,

/// <summary>
/// the last segment in the template (is read2)
/// </summary>
LastSegment = 128,

/// <summary>
/// not primary alignment
/// </summary>
NotPrimaryAlignment = 256,

/// <summary>
/// alignment fails quality checks
/// </summary>
FailedQualityChecks = 512,

/// <summary>
/// PCR or optical duplicate
/// </summary>
OpticalDuplicate = 1024,

/// <summary>
/// supplementary alignment (e.g. aligner specific, could be a portion of a split read or a tied region)
/// </summary>
SupplementaryAlignment = 2048
}
}
10 changes: 8 additions & 2 deletions Models/Sequence.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ namespace Ovation.FasterQC.Net
{
public class Sequence
{
public ReadFlag ReadFlag { get; }

public byte[] Identifier { get; }

public byte[] Read { get; }
Expand All @@ -21,8 +23,9 @@ public Sequence(byte[] lines, int[] endOfLines)
Quality = new ReadOnlyMemory<byte>(lines, endOfLines[2], endOfLines[3] - endOfLines[2]).ToArray();
}

public Sequence(byte[] identifer, byte[] read, byte[] blank, byte[] quality)
public Sequence(ushort readFlag, byte[] identifer, byte[] read, byte[] blank, byte[] quality)
{
ReadFlag = (ReadFlag)readFlag;
Identifier = new ReadOnlyMemory<byte>(identifer).ToArray();
Read = new ReadOnlyMemory<byte>(read).ToArray();
Blank = new ReadOnlyMemory<byte>(blank).ToArray();
Expand All @@ -31,15 +34,18 @@ public Sequence(byte[] identifer, byte[] read, byte[] blank, byte[] quality)

public Sequence(BamAlignment bamAlignment)
{
ReadFlag = (ReadFlag)bamAlignment.flag;
Identifier = new ReadOnlyMemory<byte>(bamAlignment.read_name).ToArray();
Read = new ReadOnlyMemory<byte>(bamAlignment.seq).ToArray();
Quality = new ReadOnlyMemory<byte>(bamAlignment.qual).ToArray();
Blank = Array.Empty<byte>();
}

public override string ToString()
{
var sb = new StringBuilder("sequence: \n");

sb.AppendLine(ReadFlag.ToString());
sb.AppendLine(new string(Encoding.ASCII.GetChars(Identifier)));
sb.AppendLine(new string(Encoding.ASCII.GetChars(Read)));
sb.AppendLine(new string(Encoding.ASCII.GetChars(Blank)));
Expand All @@ -48,4 +54,4 @@ public override string ToString()
return sb.ToString();
}
}
}
}
1 change: 1 addition & 0 deletions Ovation.FasterQC.Net.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
Expand Down
30 changes: 20 additions & 10 deletions Program.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.Json;
using CommandLine;
using Ovation.FasterQC.Net.Modules;
Expand All @@ -19,17 +20,24 @@ class Program
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
};

private TimedSequenceProgressBar progressBar;
private TimedSequenceProgressBar? progressBar;

static void Main(string[] args)
{
Parser.Default.ParseArguments<CliOptions>(args)
.WithParsed(o =>
var parser = new Parser(config =>
{
o.Validate();
Settings = o;
new Program().Run();
});
config.AutoHelp = true;
config.AutoVersion = true;
config.CaseInsensitiveEnumValues = true;
}
);

parser.ParseArguments<CliOptions>(args)
.WithParsed(o =>
{
Settings = o;
new Program().Run();
});
}

private void Run()
Expand All @@ -42,14 +50,16 @@ private void Run()
On(Settings.ShowProgress, () => progressBar = new TimedSequenceProgressBar(sequenceReader));
On(Settings.Verbose, () => Console.Error.WriteLine($"Processing {Settings.InputFilename}..."));

while (sequenceReader.ReadSequence(out Sequence sequence))
while (sequenceReader.ReadSequence(out Sequence? sequence))
{
ArgumentNullException.ThrowIfNull(sequence);

foreach (var module in modules)
{
module.ProcessSequence(sequence);
}

On(Settings.ShowProgress, () => progressBar.Update());
On(Settings.ShowProgress, () => progressBar?.Update());
On(Settings.Verbose, () =>
{
if (sequenceReader.SequencesRead % UpdatePeriod == 0)
Expand All @@ -71,7 +81,7 @@ private void Run()
results[module.Name] = module.Data;
}

On(Settings.ShowProgress, () => progressBar.Update(force: true));
On(Settings.ShowProgress, () => progressBar?.Update(force: true));
On(Settings.Verbose, () => Console.Error.WriteLine($"{sequenceReader.SequencesRead.WithSsiUnits()} sequences completed ({sequenceReader.ApproximateCompletion:0.0}%)"));

if (string.IsNullOrWhiteSpace(Settings.OutputFilename))
Expand Down
5 changes: 3 additions & 2 deletions Readers/BamReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public BamReader(string bam)
ConsumeHeader();
}

public bool ReadSequence(out Sequence sequence)
public bool ReadSequence(out Sequence? sequence)
{
try
{
Expand Down Expand Up @@ -106,9 +106,10 @@ private BamAlignment ReadSequence()
var bamAlignment = new BamAlignment
{
block_size = block_size,
refID = BitConverter.ToInt32(block, offset)
};

bamAlignment.refID = BitConverter.ToInt32(block, offset); offset += 4;
offset += 4;
bamAlignment.pos = BitConverter.ToInt32(block, offset) + 1; offset += 4;
bamAlignment.l_read_name = block[offset]; offset += 1;
bamAlignment.mapq = block[offset]; offset += 1;
Expand Down
14 changes: 7 additions & 7 deletions Readers/FastqLineReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ public class FastqLineReader : ISequenceReader
{
private readonly FileStream inputStream;

private readonly GZipStream gzipStream;
private readonly GZipStream? gzipStream;

private readonly BufferedStream bufferedStream;

Expand Down Expand Up @@ -50,7 +50,7 @@ public FastqLineReader(string fastq, bool gzipped = true)
}
}

public bool ReadSequence(out Sequence sequence)
public bool ReadSequence(out Sequence? sequence)
{
try
{
Expand All @@ -61,12 +61,12 @@ public bool ReadSequence(out Sequence sequence)
return false;
}

var identifier = Encoding.ASCII.GetBytes(streamReader.ReadLine());
var read = Encoding.ASCII.GetBytes(streamReader.ReadLine());
var blank = Encoding.ASCII.GetBytes(streamReader.ReadLine());
var quality = Encoding.ASCII.GetBytes(streamReader.ReadLine());
var identifier = Encoding.ASCII.GetBytes(streamReader.ReadLine() ?? "");
var read = Encoding.ASCII.GetBytes(streamReader.ReadLine() ?? "");
var blank = Encoding.ASCII.GetBytes(streamReader.ReadLine() ?? "");
var quality = Encoding.ASCII.GetBytes(streamReader.ReadLine() ?? "");

sequence = new Sequence(identifier, read, blank, quality);
sequence = new Sequence(0, identifier, read, blank, quality);
sequencesRead++;
return true;
}
Expand Down
4 changes: 2 additions & 2 deletions Readers/FastqReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ public class FastqReader : ISequenceReader
{
private readonly FileStream inputStream;

private readonly GZipStream gzipStream;
private readonly GZipStream? gzipStream;

private readonly BufferedStream bufferedStream;

Expand Down Expand Up @@ -49,7 +49,7 @@ public FastqReader(string fastq, bool gzipped = true)
}
}

public bool ReadSequence(out Sequence sequence)
public bool ReadSequence(out Sequence? sequence)
{
// this is clearly dangerous, instead read a large chunk of the file
// and then walk through it returning only the consumed portion while
Expand Down
15 changes: 12 additions & 3 deletions Readers/ReaderFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,19 @@ public static class ReaderFactory
{
public static ISequenceReader Create(CliOptions settings)
{
return settings switch
return settings.Format switch
{
{ Fastq: true } => new FastqLineReader(settings.InputFilename, true),
{ Bam: true } => new BamReader(settings.InputFilename),
ReaderType.Fastq => new FastqReader(settings.InputFilename, false),
ReaderType.FastqGz => new FastqReader(settings.InputFilename, true),

ReaderType.FastqLine => new FastqLineReader(settings.InputFilename, false),
ReaderType.FastqLineGz => new FastqLineReader(settings.InputFilename, true),

ReaderType.Sam => new SamReader(settings.InputFilename, false),
ReaderType.SamGz => new SamReader(settings.InputFilename, true),

ReaderType.Bam => new BamReader(settings.InputFilename),

_ => throw new InvalidOperationException($"could not determine file type of {settings.InputFilename}")
};
}
Expand Down
19 changes: 19 additions & 0 deletions Readers/ReaderType.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
namespace Ovation.FasterQC.Net
{
public enum ReaderType
{
Fastq,

FastqGz,

FastqLine,

FastqLineGz,

Sam,

SamGz,

Bam
}
}
Loading

0 comments on commit 7722be6

Please sign in to comment.