Skip to content

Commit

Permalink
Pulling more realistic bam from outside source
Browse files Browse the repository at this point in the history
  • Loading branch information
tefirman committed Dec 16, 2024
1 parent abd0e1e commit a5f64bd
Showing 1 changed file with 41 additions and 37 deletions.
78 changes: 41 additions & 37 deletions .github/workflows/variant-calling-test-run.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:

- name: Setup Test Data
run: |
# mkdir -p test/data
mkdir -p test/data
cd test/data
# Download reference chromosome 20 from NCBI
Expand Down Expand Up @@ -152,13 +152,8 @@ jobs:
{
"PanelBwaGatk4Annovar.sample_batch": [
{
"sample_name": "TESTBAM1",
"bam_file": "test/data/TESTBAM1.unmapped.bam",
"bed_file": "test/data/test.bed"
},
{
"sample_name": "TESTBAM2",
"bam_file": "test/data/TESTBAM2.unmapped.bam",
"sample_name": "test_sample",
"bam_file": "test/data/test.unmapped.bam",
"bed_file": "test/data/test.bed"
}
],
Expand Down Expand Up @@ -203,38 +198,47 @@ jobs:
gatk ValidateVariants -V mills_1000G.vcf.gz -R ref.fasta
gatk ValidateVariants -V known_indels.vcf.gz -R ref.fasta
# # Create a test BAM with unmapped reads
# cat << EOF > test.sam
# @HD VN:1.6 SO:queryname
# @SQ SN:chr20 LN:63025520
# @RG ID:test SM:test_sample PL:ILLUMINA LB:lib1 PU:unit1
# read1 77 * 0 0 * * 0 0 $(get_ref_base 1000100 16) FFFFFFFFFFFFFFFF RG:Z:test
# read1 141 * 0 0 * * 0 0 $(get_ref_base 1000200 16) FFFFFFFFFFFFFFFF RG:Z:test
# read2 77 * 0 0 * * 0 0 $(get_ref_base 1000300 16) FFFFFFFFFFFFFFFF RG:Z:test
# read2 141 * 0 0 * * 0 0 $(get_ref_base 1000400 16) FFFFFFFFFFFFFFFF RG:Z:test
# read3 77 * 0 0 * * 0 0 $(get_ref_base 1000500 16) FFFFFFFFFFFFFFFF RG:Z:test
# read3 141 * 0 0 * * 0 0 $(get_ref_base 1000600 16) FFFFFFFFFFFFFFFF RG:Z:test
# EOF
# Navigating back to original directory
cd ../../
- name: Generate test BAM
run: |
# Download NA12878 data
wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data/NA12878/Garvan_NA12878_HG001_HiSeq_Exome/project.NIST_NIST7035_H7AP8ADXX_TAAGGCGA_1_NA12878.bwa.markDuplicates.bam
# # Convert SAM to BAM and proper sorting
# samtools view -b test.sam > test.unsorted.bam
# samtools sort -n test.unsorted.bam > test.unmapped.bam
# Generate BAM index
samtools index project.NIST_NIST7035_H7AP8ADXX_TAAGGCGA_1_NA12878.bwa.markDuplicates.bam
# # Debug: Check the BAM
# echo "=== Checking final BAM structure ==="
# samtools view -H test.unmapped.bam
# echo "=== First few reads ==="
# samtools view test.unmapped.bam | head -n 2
# Extract region and ensure proper pairing
samtools view -h -f 0x2 -s 0.1 project.NIST_NIST7035_H7AP8ADXX_TAAGGCGA_1_NA12878.bwa.markDuplicates.bam chr20:1000000-1100000 | \
samtools sort -n - | \
samtools fastq -1 test/data/r1.fq -2 test/data/r2.fq -0 /dev/null -s /dev/null -n
# # Clean up intermediate files
# rm test.sam test.unsorted.bam
# # Validate BAM file
# echo "=== Validating BAM file ==="
# gatk ValidateSamFile -I test.unmapped.bam
# Navigating back to original directory
cd ../../
# Convert back to unmapped BAM with proper read groups
gatk FastqToSam \
-F1 test/data/r1.fq \
-F2 test/data/r2.fq \
-O test/data/test.unmapped.bam \
-SM NA12878 \
-RG test \
-PL ILLUMINA \
-LB lib1 \
-PU unit1
# Debug: Check the BAM
echo "=== Checking final BAM structure ==="
samtools view -H test/data/test.unmapped.bam
echo "=== First few reads ==="
samtools view test/data/test.unmapped.bam | head -n 2
# Clean up intermediate files
rm test/data/r1.fq test/data/r2.fq \
project.NIST_NIST7035_H7AP8ADXX_TAAGGCGA_1_NA12878.bwa.markDuplicates.bam \
project.NIST_NIST7035_H7AP8ADXX_TAAGGCGA_1_NA12878.bwa.markDuplicates.bam.bai
# Validate BAM file
echo "=== Validating BAM file ==="
gatk ValidateSamFile -I test/data/test.unmapped.bam
- name: Run test workflow
run: |
Expand Down

0 comments on commit a5f64bd

Please sign in to comment.