-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #10 from michaeljon/issue-8
issue-8: adding samreader
- Loading branch information
Showing
9 changed files
with
272 additions
and
45 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
using System; | ||
|
||
namespace Ovation.FasterQC.Net | ||
{ | ||
[Flags] | ||
public enum ReadFlag : ushort | ||
{ | ||
/// <summary> | ||
/// template having multiple templates in sequencing (read is paired) | ||
/// </summary> | ||
Paired = 1, | ||
|
||
/// <summary> | ||
/// each segment properly aligned according to the aligner (read mapped in proper pair) | ||
/// </summary> | ||
Aligned = 2, | ||
|
||
/// <summary> | ||
/// segment unmapped (read1 unmapped) | ||
/// </summary> | ||
SegmentUnmapped = 4, | ||
|
||
/// <summary> | ||
/// next segment in the template unmapped (read2 unmapped) | ||
/// </summary> | ||
NextSegmentUnmapped = 8, | ||
|
||
/// <summary> | ||
/// SEQ being reverse complemented (read1 reverse complemented) | ||
/// </summary> | ||
ReverseComplemented = 16, | ||
|
||
/// <summary> | ||
/// SEQ of the next segment in the template being reverse complemented (read2 reverse complemented) | ||
/// </summary> | ||
NextSegmentReverseComplemented = 32, | ||
|
||
/// <summary> | ||
/// the first segment in the template (is read1) | ||
/// </summary> | ||
FirstSegment = 64, | ||
|
||
/// <summary> | ||
/// the last segment in the template (is read2) | ||
/// </summary> | ||
LastSegment = 128, | ||
|
||
/// <summary> | ||
/// not primary alignment | ||
/// </summary> | ||
NotPrimaryAlignment = 256, | ||
|
||
/// <summary> | ||
/// alignment fails quality checks | ||
/// </summary> | ||
FailedQualityChecks = 512, | ||
|
||
/// <summary> | ||
/// PCR or optical duplicate | ||
/// </summary> | ||
OpticalDuplicate = 1024, | ||
|
||
/// <summary> | ||
/// supplementary alignment (e.g. aligner specific, could be a portion of a split read or a tied region) | ||
/// </summary> | ||
SupplementaryAlignment = 2048 | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
namespace Ovation.FasterQC.Net | ||
{ | ||
public enum ReaderType | ||
{ | ||
Fastq, | ||
|
||
FastqGz, | ||
|
||
FastqLine, | ||
|
||
FastqLineGz, | ||
|
||
Sam, | ||
|
||
SamGz, | ||
|
||
Bam | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
using System; | ||
using System.Globalization; | ||
using System.IO; | ||
using System.IO.Compression; | ||
using System.Text; | ||
using static Ovation.FasterQC.Net.Utils.CliOptions; | ||
|
||
namespace Ovation.FasterQC.Net | ||
{ | ||
public class SamReader : ISequenceReader | ||
{ | ||
private readonly FileStream inputStream; | ||
|
||
private readonly GZipStream? gzipStream; | ||
|
||
private readonly BufferedStream bufferedStream; | ||
|
||
private readonly StreamReader streamReader; | ||
|
||
private bool disposedValue; | ||
|
||
private int sequencesRead = 0; | ||
|
||
public int SequencesRead => sequencesRead; | ||
|
||
public SamReader(string sam, bool gzipped = true) | ||
{ | ||
var bufferSize = 128 * 1024; | ||
|
||
var fileStreamOptions = new FileStreamOptions() | ||
{ | ||
Mode = FileMode.Open, | ||
BufferSize = bufferSize, | ||
}; | ||
|
||
if (gzipped == true) | ||
{ | ||
inputStream = File.Open(sam, fileStreamOptions); | ||
gzipStream = new GZipStream(inputStream, CompressionMode.Decompress); | ||
bufferedStream = new BufferedStream(gzipStream, bufferSize); | ||
streamReader = new StreamReader(bufferedStream, Encoding.ASCII, false, bufferSize); | ||
} | ||
else | ||
{ | ||
inputStream = File.Open(sam, fileStreamOptions); | ||
bufferedStream = new BufferedStream(inputStream, bufferSize); | ||
streamReader = new StreamReader(bufferedStream, Encoding.ASCII, false, bufferSize); | ||
} | ||
|
||
ConsumeHeader(); | ||
} | ||
|
||
private void ConsumeHeader() | ||
{ | ||
try | ||
{ | ||
while (streamReader.Peek() == '@') | ||
{ | ||
var header = streamReader.ReadLine(); | ||
On(Settings.Debug, () => Console.Error.WriteLine(header)); | ||
} | ||
} | ||
catch (EndOfStreamException) | ||
{ | ||
// swallow this, we've run out of file and a call | ||
// into ReadSequence will handle the EOF case | ||
} | ||
} | ||
|
||
public bool ReadSequence(out Sequence? sequence) | ||
{ | ||
try | ||
{ | ||
if (streamReader.EndOfStream == true) | ||
{ | ||
goto endofstream; | ||
} | ||
|
||
var entry = streamReader.ReadLine(); | ||
if (entry == null) | ||
{ | ||
goto endofstream; | ||
} | ||
|
||
// this is clearly a bad approach, we're going to be allocating a | ||
// ton of small strings here, probably better to read the line, | ||
// find the tabs ourselves, then pull the bytes out of the components | ||
var parts = entry.Split('\t', StringSplitOptions.TrimEntries); | ||
|
||
var identifier = Encoding.ASCII.GetBytes(parts[0]); | ||
var flag = ushort.Parse(parts[1], CultureInfo.InvariantCulture); | ||
var read = Encoding.ASCII.GetBytes(parts[9]); | ||
var blank = Encoding.ASCII.GetBytes(""); | ||
var quality = Encoding.ASCII.GetBytes(parts[10]); | ||
|
||
sequence = new Sequence(flag, identifier, read, blank, quality); | ||
sequencesRead++; | ||
return true; | ||
} | ||
catch (EndOfStreamException) | ||
{ | ||
goto endofstream; | ||
} | ||
|
||
endofstream: | ||
On(Settings.Verbose, () => Console.Error.WriteLine("End of stream")); | ||
sequence = null; | ||
return false; | ||
} | ||
|
||
public double ApproximateCompletion => | ||
100.0 * inputStream.Position / inputStream.Length; | ||
|
||
protected virtual void Dispose(bool disposing) | ||
{ | ||
if (!disposedValue) | ||
{ | ||
if (disposing) | ||
{ | ||
streamReader?.Dispose(); | ||
bufferedStream?.Dispose(); | ||
gzipStream?.Dispose(); | ||
inputStream?.Dispose(); | ||
} | ||
|
||
disposedValue = true; | ||
} | ||
} | ||
|
||
public void Dispose() | ||
{ | ||
Dispose(true); | ||
GC.SuppressFinalize(this); | ||
} | ||
} | ||
} |
Oops, something went wrong.