diff --git a/pixi.toml b/pixi.toml index c7720a4..5393012 100644 --- a/pixi.toml +++ b/pixi.toml @@ -21,6 +21,7 @@ bwa = ">=0.7.17" skani = ">=0.2.2" fastani = ">=1.3" extern = "*" +x-mapper = "*" # x86_64-specific dependencies [target.linux-64.dependencies] diff --git a/src/bam_generator.rs b/src/bam_generator.rs index e428af9..f476453 100644 --- a/src/bam_generator.rs +++ b/src/bam_generator.rs @@ -53,6 +53,7 @@ pub enum MappingProgram { MINIMAP2_HIFI, MINIMAP2_NO_PRESET, STROBEALIGN, + X_MAPPER, } pub struct BamFileNamedReader { @@ -415,7 +416,10 @@ pub fn generate_named_bam_readers_from_reads( // Required because of https://github.com/wwood/CoverM/issues/58 let minimap2_log_file_index = match mapping_program { - MappingProgram::BWA_MEM | MappingProgram::BWA_MEM2 | MappingProgram::STROBEALIGN => None, + MappingProgram::BWA_MEM + | MappingProgram::BWA_MEM2 + | MappingProgram::STROBEALIGN + | MappingProgram::X_MAPPER => None, // Required because of https://github.com/lh3/minimap2/issues/527 MappingProgram::MINIMAP2_SR | MappingProgram::MINIMAP2_ONT @@ -864,6 +868,32 @@ pub fn build_mapping_command( read2_path: Option<&str>, mapping_options: Option<&str>, ) -> String { + if matches!(mapping_program, MappingProgram::X_MAPPER) { + let read_params = match read_format { + ReadFormat::Coupled => format!( + "--paired-queries '{}' '{}'", + read1_path, + read2_path.unwrap() + ), + ReadFormat::Single => format!("--queries '{}'", read1_path), + ReadFormat::Interleaved => { + panic!("Interleaved reads are not supported by X-Mapper") + } + }; + let mapping_options = mapping_options + .filter(|opts| !opts.is_empty()) + .map(|opts| format!(" {opts}")) + .unwrap_or_default(); + return format!( + "x-mapper{} --num-threads {} --reference '{}' {} --out-sam - | samtools calmd -S - '{}'", + mapping_options, + threads, + reference.index_path(), + read_params, + reference.index_path() + ); + } + let read_params1 = match mapping_program { // minimap2 auto-detects interleaved based on read names MappingProgram::MINIMAP2_SR @@ -879,6 +909,7 @@ pub fn build_mapping_command( ReadFormat::Interleaved => "--interleaved", ReadFormat::Coupled | ReadFormat::Single => "", }, + MappingProgram::X_MAPPER => unreachable!(), }; let read_params2 = match read_format { @@ -893,6 +924,7 @@ pub fn build_mapping_command( MappingProgram::BWA_MEM => "bwa mem".to_string(), MappingProgram::BWA_MEM2 => "bwa-mem2 mem".to_string(), MappingProgram::STROBEALIGN => "strobealign".to_string(), + MappingProgram::X_MAPPER => unreachable!(), _ => { let split_prefix = tempfile::Builder::new() .prefix("coverm-minimap2-split-index") @@ -912,7 +944,8 @@ pub fn build_mapping_command( match mapping_program { MappingProgram::BWA_MEM | MappingProgram::BWA_MEM2 - | MappingProgram::STROBEALIGN => unreachable!(), + | MappingProgram::STROBEALIGN + | MappingProgram::X_MAPPER => unreachable!(), MappingProgram::MINIMAP2_SR => "-x sr", MappingProgram::MINIMAP2_ONT => "-x map-ont", MappingProgram::MINIMAP2_HIFI => "-x map-hifi", diff --git a/src/bin/coverm.rs b/src/bin/coverm.rs index 76fe0ca..e6c07d4 100644 --- a/src/bin/coverm.rs +++ b/src/bin/coverm.rs @@ -820,6 +820,12 @@ fn setup_mapping_index( )) } } + MappingProgram::X_MAPPER => { + info!("Not pre-generating x-mapper index"); + Box::new(coverm::mapping_index_maintenance::VanillaIndexStruct::new( + reference_wise_params.reference, + )) + } } } @@ -879,6 +885,7 @@ fn parse_mapping_program(m: &clap::ArgMatches) -> MappingProgram { Some("minimap2-hifi") => MappingProgram::MINIMAP2_HIFI, Some("minimap2-no-preset") => MappingProgram::MINIMAP2_NO_PRESET, Some("strobealign") => MappingProgram::STROBEALIGN, + Some("x-mapper") => MappingProgram::X_MAPPER, None => DEFAULT_MAPPING_SOFTWARE_ENUM, _ => panic!( "Unexpected definition for --mapper: {:?}", @@ -902,6 +909,9 @@ fn parse_mapping_program(m: &clap::ArgMatches) -> MappingProgram { MappingProgram::STROBEALIGN => { external_command_checker::check_for_strobealign(); } + MappingProgram::X_MAPPER => { + external_command_checker::check_for_x_mapper(); + } } mapping_program } diff --git a/src/cli.rs b/src/cli.rs index a3839e2..e4fe5ae 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -20,6 +20,7 @@ const MAPPING_SOFTWARE_LIST: &[&str] = &[ "minimap2-hifi", "minimap2-no-preset", "strobealign", + "x-mapper", ]; const DEFAULT_MAPPING_SOFTWARE: &str = "strobealign"; @@ -86,11 +87,15 @@ fn add_mapping_options(manual: Manual) -> Manual { &format!("minimap2 with '{}' option", &monospace_roff("-x map-hifi")) ], &[ - &monospace_roff("minimap2-no-preset"), - &format!("minimap2 with no '{}' option", &monospace_roff("-x")) - ], - ]) - ))) + &monospace_roff("minimap2-no-preset"), + &format!("minimap2 with no '{}' option", &monospace_roff("-x")) + ], + &[ + &monospace_roff("x-mapper"), + "x-mapper using default parameters" + ], + ]) + ))) .option(Opt::new("PARAMS").long("--minimap2-params").help(&format!( "Extra parameters to provide to minimap2, \ both indexing command (if used) and for \ @@ -113,6 +118,12 @@ fn add_mapping_options(manual: Manual) -> Manual { "Extra parameters to provide to strobealign. Note \ that usage of this parameter has security \ implications if untrusted input is specified. \ + [default: none]", + )) + .option(Opt::new("PARAMS").long("--x-mapper-params").help( + "Extra parameters to provide to x-mapper. Note \ + that usage of this parameter has security \ + implications if untrusted input is specified. \ [default: none]", )) .flag(Flag::new().long("--strobealign-use-index").help( @@ -1233,6 +1244,13 @@ Ben J. Woodcroft .allow_hyphen_values(true) .requires("reference"), ) + .arg( + Arg::new("x-mapper-params") + .long("x-mapper-params") + .long("x-mapper-parameters") + .allow_hyphen_values(true) + .requires("reference"), + ) .arg( Arg::new("strobealign-use-index") .long("strobealign-use-index") @@ -1768,6 +1786,13 @@ Ben J. Woodcroft .allow_hyphen_values(true) .requires("reference"), ) + .arg( + Arg::new("x-mapper-params") + .long("x-mapper-params") + .long("x-mapper-parameters") + .allow_hyphen_values(true) + .requires("reference"), + ) .arg( Arg::new("strobealign-use-index") .long("strobealign-use-index") @@ -2146,6 +2171,13 @@ Ben J. Woodcroft .allow_hyphen_values(true) .requires("reference"), ) + .arg( + Arg::new("x-mapper-params") + .long("x-mapper-params") + .long("x-mapper-parameters") + .allow_hyphen_values(true) + .requires("reference"), + ) .arg( Arg::new("strobealign-use-index") .long("strobealign-use-index") diff --git a/src/external_command_checker.rs b/src/external_command_checker.rs index 54976ce..a541432 100644 --- a/src/external_command_checker.rs +++ b/src/external_command_checker.rs @@ -30,3 +30,8 @@ pub fn check_for_strobealign() { default_version_check("strobealign", "0.11.0", false, None) .expect("Failed to find sufficient version of strobealign"); } + +pub fn check_for_x_mapper() { + check_for_external_command_presence_with_which("x-mapper") + .expect("Failed to find installed x-mapper"); +} diff --git a/src/mapping_index_maintenance.rs b/src/mapping_index_maintenance.rs index 6b590f9..a375d9a 100644 --- a/src/mapping_index_maintenance.rs +++ b/src/mapping_index_maintenance.rs @@ -67,6 +67,7 @@ impl TemporaryIndexStruct { | MappingProgram::MINIMAP2_HIFI | MappingProgram::MINIMAP2_NO_PRESET => std::process::Command::new("minimap2"), MappingProgram::STROBEALIGN => std::process::Command::new("strobealign"), + MappingProgram::X_MAPPER => unreachable!(), }; match &mapping_program { MappingProgram::BWA_MEM | MappingProgram::BWA_MEM2 => { @@ -96,7 +97,8 @@ impl TemporaryIndexStruct { MappingProgram::MINIMAP2_NO_PRESET | MappingProgram::BWA_MEM | MappingProgram::BWA_MEM2 - | MappingProgram::STROBEALIGN => {} + | MappingProgram::STROBEALIGN + | MappingProgram::X_MAPPER => {} }; if let Some(t) = num_threads { cmd.arg("-t").arg(format!("{t}")); @@ -106,6 +108,9 @@ impl TemporaryIndexStruct { MappingProgram::STROBEALIGN => { warn!("STROBEALIGN pre-indexing is not supported currently, so skipping index generation."); } + MappingProgram::X_MAPPER => { + warn!("X_MAPPER pre-indexing is not supported currently, so skipping index generation."); + } }; if let Some(params) = index_creation_options { for s in params.split_whitespace() { @@ -201,7 +206,8 @@ pub fn check_reference_existence(reference_path: &str, mapping_program: &Mapping | MappingProgram::MINIMAP2_HIFI | MappingProgram::MINIMAP2_PB | MappingProgram::MINIMAP2_NO_PRESET - | MappingProgram::STROBEALIGN => {} + | MappingProgram::STROBEALIGN + | MappingProgram::X_MAPPER => {} }; if !ref_path.exists() { diff --git a/src/mapping_parameters.rs b/src/mapping_parameters.rs index 596d744..5c850f2 100644 --- a/src/mapping_parameters.rs +++ b/src/mapping_parameters.rs @@ -111,6 +111,14 @@ impl<'a> MappingParameters<'a> { process::exit(1); } } + MappingProgram::X_MAPPER => { + if !interleaved.is_empty() { + error!( + "Interleaved read input specified to be mapped with x-mapper which is not supported." + ); + process::exit(1); + } + } _ => {} } @@ -122,6 +130,7 @@ impl<'a> MappingParameters<'a> { | MappingProgram::MINIMAP2_PB | MappingProgram::MINIMAP2_NO_PRESET => "minimap2-params", MappingProgram::STROBEALIGN => "strobealign-params", + MappingProgram::X_MAPPER => "x-mapper-params", }; let mapping_options = match m.contains_id(mapping_parameters_arg) { true => { diff --git a/tests/test_cmdline.rs b/tests/test_cmdline.rs index 50752ab..908790c 100644 --- a/tests/test_cmdline.rs +++ b/tests/test_cmdline.rs @@ -1,4 +1,5 @@ extern crate assert_cli; +extern crate coverm; extern crate rust_htslib; extern crate tempfile; @@ -12,7 +13,10 @@ mod tests { use std; use std::io::Read; use std::io::Write; + use std::path::PathBuf; + use std::process::Command; use std::str; + use tempfile::tempdir; fn assert_equal_table(expected: &str, observed: &str) -> bool { // assert the first lines are the same @@ -3757,6 +3761,244 @@ genome6~random_sequence_length_11003 0 0 0 } assert_eq!(filtered_count, 0); } + + #[test] + fn test_build_mapping_command_xmapper_single() { + use coverm::bam_generator::{build_mapping_command, MappingProgram}; + use coverm::mapping_index_maintenance::MappingIndex; + use coverm::mapping_parameters::ReadFormat; + + struct DummyIndex { + path: String, + } + impl MappingIndex for DummyIndex { + fn index_path(&self) -> &String { + &self.path + } + } + + let index = DummyIndex { + path: "ref.fa".to_string(), + }; + let cmd = build_mapping_command( + MappingProgram::X_MAPPER, + ReadFormat::Single, + 4, + "reads.fq", + &index, + None, + None, + ); + assert_eq!(cmd.split_whitespace().next().unwrap(), "x-mapper"); + assert!(cmd.contains("--reference 'ref.fa'")); + assert!(cmd.contains("--queries 'reads.fq'")); + assert!(cmd.contains("--out-sam -")); + } + + #[test] + fn test_build_mapping_command_xmapper_paired() { + use coverm::bam_generator::{build_mapping_command, MappingProgram}; + use coverm::mapping_index_maintenance::MappingIndex; + use coverm::mapping_parameters::ReadFormat; + + struct DummyIndex { + path: String, + } + impl MappingIndex for DummyIndex { + fn index_path(&self) -> &String { + &self.path + } + } + + let index = DummyIndex { + path: "ref.fa".to_string(), + }; + let cmd = build_mapping_command( + MappingProgram::X_MAPPER, + ReadFormat::Coupled, + 8, + "r1.fq", + &index, + Some("r2.fq"), + None, + ); + assert!(cmd.contains("--paired-queries 'r1.fq' 'r2.fq'")); + } + + #[test] + fn test_coverm_contig_x_mapper() { + let reference = PathBuf::from("tests/data/7seqs.fna") + .canonicalize() + .unwrap(); + let r1 = PathBuf::from("tests/data/7seqs.reads_for_7.1.fq") + .canonicalize() + .unwrap(); + let r2 = PathBuf::from("tests/data/7seqs.reads_for_7.2.fq") + .canonicalize() + .unwrap(); + + let output = Command::new(env!("CARGO_BIN_EXE_coverm")) + .args([ + "contig", + "-r", + reference.to_str().unwrap(), + "-1", + r1.to_str().unwrap(), + "-2", + r2.to_str().unwrap(), + "-p", + "x-mapper", + "--threads", + "1", + "--output-format", + "dense", + ]) + .output() + .expect("failed to run coverm contig with x-mapper"); + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + assert!(stdout.starts_with("Contig")); + assert!(stdout.contains("genome1~random_sequence_length_11000")); + assert!(stdout.contains("genome5~seq2")); + } + + #[test] + fn test_coverm_contig_x_mapper_min_identity() { + let reference = PathBuf::from("tests/data/7seqs.fna") + .canonicalize() + .unwrap(); + let r1 = PathBuf::from("tests/data/7seqs.reads_for_7.1.fq") + .canonicalize() + .unwrap(); + let r2 = PathBuf::from("tests/data/7seqs.reads_for_7.2.fq") + .canonicalize() + .unwrap(); + + let output = Command::new(env!("CARGO_BIN_EXE_coverm")) + .args([ + "contig", + "-r", + reference.to_str().unwrap(), + "-1", + r1.to_str().unwrap(), + "-2", + r2.to_str().unwrap(), + "-p", + "x-mapper", + "--threads", + "1", + "--output-format", + "dense", + "--min-read-percent-identity", + "99", + ]) + .output() + .expect("failed to run coverm contig with x-mapper min identity"); + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + assert!(stdout.starts_with("Contig")); + assert!(stdout.contains("genome1~random_sequence_length_11000")); + assert!(stdout.contains("genome5~seq2")); + let stderr = String::from_utf8(output.stderr).unwrap(); + assert!(stderr.contains("Using min-read-percent-identity 99%")); + } + + #[test] + fn test_coverm_genome_x_mapper() { + let reference = PathBuf::from("tests/data/7seqs.fna") + .canonicalize() + .unwrap(); + let r1 = PathBuf::from("tests/data/7seqs.reads_for_7.1.fq") + .canonicalize() + .unwrap(); + let r2 = PathBuf::from("tests/data/7seqs.reads_for_7.2.fq") + .canonicalize() + .unwrap(); + let definition = PathBuf::from("tests/data/7seqs.definition") + .canonicalize() + .unwrap(); + + let output = Command::new(env!("CARGO_BIN_EXE_coverm")) + .args([ + "genome", + "--genome-definition", + definition.to_str().unwrap(), + "-r", + reference.to_str().unwrap(), + "-1", + r1.to_str().unwrap(), + "-2", + r2.to_str().unwrap(), + "-p", + "x-mapper", + "--threads", + "1", + ]) + .output() + .expect("failed to run coverm genome with x-mapper"); + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + assert!(stdout.starts_with("Genome")); + assert!(stdout.contains("genome2")); + assert!(stdout.contains("genome5")); + } + + #[test] + fn test_coverm_make_x_mapper() { + let tmp = tempdir().unwrap(); + let out_dir = tmp.path().join("out"); + std::fs::create_dir(&out_dir).unwrap(); + + let reference = PathBuf::from("tests/data/7seqs.fna") + .canonicalize() + .unwrap(); + let r1 = PathBuf::from("tests/data/7seqs.reads_for_7.1.fq") + .canonicalize() + .unwrap(); + let r2 = PathBuf::from("tests/data/7seqs.reads_for_7.2.fq") + .canonicalize() + .unwrap(); + + let status = Command::new(env!("CARGO_BIN_EXE_coverm")) + .current_dir(tmp.path()) + .args([ + "make", + "-r", + reference.to_str().unwrap(), + "-1", + r1.to_str().unwrap(), + "-2", + r2.to_str().unwrap(), + "-o", + out_dir.to_str().unwrap(), + "-p", + "x-mapper", + ]) + .status() + .expect("failed to run coverm make"); + assert!(status.success()); + + let bam_files: Vec = std::fs::read_dir(&out_dir) + .unwrap() + .map(|e| e.unwrap().path()) + .filter(|p| p.extension().map_or(false, |e| e == "bam")) + .collect(); + assert_eq!(bam_files.len(), 1); + + let output = Command::new("samtools") + .arg("view") + .arg("-c") + .arg(&bam_files[0]) + .output() + .expect("samtools view failed"); + assert!(output.status.success()); + let count: u64 = std::str::from_utf8(&output.stdout) + .unwrap() + .trim() + .parse() + .unwrap(); + assert!(count > 0); + } } // TODO: Add mismatching bases test