diff --git a/.vscode/launch.json b/.vscode/launch.json index 8e867fd..2b1df33 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -12,18 +12,17 @@ // If you have changed target frameworks, make sure to update the program path. "program": "${workspaceFolder}/bin/Debug/net6.0/Ovation.FasterQC.Net.dll", "args": [ - "-v", "-d", "-l", - "1000000", + "10", "-f", - "sam", + "bam", "-i", - "./tmp/in3257_2_S1.sorted.sam", + "./tmp/in3257_2_S1.sorted.bam", "-o", "./tmp/bob.json", "-m", - "MeanQualityDistribution" + "BasicStatistics" ], "cwd": "${workspaceFolder}", // For more information about the 'console' field, see https://aka.ms/VSCode-CS-LaunchJson-Console diff --git a/Models/BamAlignment.cs b/Models/BamAlignment.cs index 28cdc0f..f87b7cb 100644 --- a/Models/BamAlignment.cs +++ b/Models/BamAlignment.cs @@ -1,4 +1,7 @@ +using System; +using System.ComponentModel; using System.Diagnostics.CodeAnalysis; +using System.Text; namespace Ovation.FasterQC.Net { @@ -37,4 +40,94 @@ public class BamAlignment public byte[] qual { get; set; } = null!; } + + [SuppressMessage("Code style", "IDE1006", Justification = "Names correspond to BAM structure field names")] + public class BamOptionalElement + { + public char[] tag { get; set; } + + public char val_type { get; set; } + + public object value { get; set; } = null!; + + public BamOptionalElement(byte[] block, ref int offset) + { + tag = Encoding.ASCII.GetChars(block, offset, 2); offset += 2; + val_type = Encoding.ASCII.GetChars(block, offset, 1)[0]; offset += 1; + + // consume the rest + switch (val_type) + { + case 'A': offset += 1; break; + + // byte + case 'c': offset += 1; break; + case 'C': offset += 1; break; + + // short + case 's': offset += 2; break; + case 'S': offset += 2; break; + + // int + case 'i': offset += 4; break; + case 'I': offset += 4; break; + + // float + case 'f': offset += 4; break; + + // null-terminated string + case 'Z': + while (block[offset++] != 0) ; + break; + + // null-terminated hex digit pairs + case 'H': + while (block[offset++] != 0) ; + break; + + // array of stuff + case 'B': + var subtype = Encoding.ASCII.GetChars(block, offset, 1)[0]; offset += 1; + var length = BitConverter.ToUInt32(new Span(block, offset, 4)); offset += 4; + + // consume the stuff + for (var element = 0; element < length; element++) + { + switch (subtype) + { + // byte + case 'c': offset += 1; break; + case 'C': offset += 1; break; + + // short + case 's': offset += 2; break; + case 'S': offset += 2; break; + + // int + case 'i': offset += 4; break; + case 'I': offset += 4; break; + + // float + case 'f': offset += 4; break; + } + } + + break; + } + } + + public override string ToString() + { + var sb = new StringBuilder(); + + sb.Append("tag: "); + sb.Append(tag[0]); + sb.Append(tag[1]); + + sb.Append(", type: "); + sb.Append(val_type); + + return sb.ToString(); + } + } } diff --git a/Readers/BamReader.cs b/Readers/BamReader.cs index fd548c6..6463cea 100644 --- a/Readers/BamReader.cs +++ b/Readers/BamReader.cs @@ -164,6 +164,12 @@ private BamAlignment ReadSequence() { bamAlignment.qual = Array.Empty(); } + offset += (int)bamAlignment.l_seq; + + while (offset < block_size) + { + _ = new BamOptionalElement(block, ref offset); + } return bamAlignment; }