diff --git a/alignment/alignment.rs b/alignment/alignment.rs deleted file mode 100644 index 0b91007..0000000 --- a/alignment/alignment.rs +++ /dev/null @@ -1,398 +0,0 @@ -use crate::alphabet::{Alphabet, Scoring}; -use crate::aminoacid::*; -use itertools::Itertools; -use std::fmt::Write; - -/// An alignment of two reads. -#[derive(Debug, Clone)] -pub struct Alignment { - /// The score of this alignment - pub score: isize, - /// The path or steps taken for the alignment - pub path: Vec, - /// The position in the first sequence where the alignment starts - pub start_a: usize, - /// The position in the second sequence where the alignment starts - pub start_b: usize, - /// The first sequence - pub seq_a: Vec, - /// The second sequence - pub seq_b: Vec, -} - -impl Alignment { - fn short(&self) -> String { - self.path.iter().map(Piece::short).join("") - } - - fn aligned(&self) -> String { - let blocks: Vec = " ▁▂▃▄▅▆▇█".chars().collect(); - let blocks_neg: Vec = " ▔▔▔▀▀▀▀█".chars().collect(); - let mut str_a = String::new(); - let mut str_b = String::new(); - let mut str_blocks = String::new(); - let mut str_blocks_neg = String::new(); - let mut loc_a = self.start_a; - let mut loc_b = self.start_b; - - for piece in &self.path { - let l = std::cmp::max(piece.step_b, piece.step_a); - if piece.step_a == 0 { - let _ = write!(str_a, "{:- 0 { - " ".to_string() - } else { - #[allow(clippy::cast_sign_loss)] // Checked above - blocks_neg[-piece.local_score as usize].to_string() - }, - l as usize - ) - ); - - loc_a += piece.step_a as usize; - loc_b += piece.step_b as usize; - } - - format!("{}\n{}\n{}\n{}", str_a, str_b, str_blocks, str_blocks_neg) - } - - /// Generate a summary of this alignment for printing to the command line - pub fn summary(&self) -> String { - format!( - "score: {}\npath: {}\nstart: ({}, {})\naligned:\n{}", - self.score, - self.short(), - self.start_a, - self.start_b, - self.aligned() - ) - } - - /// The total number of residues matched on the first sequence - pub fn len_a(&self) -> usize { - self.path.iter().map(|p| p.step_a as usize).sum() - } - - /// The total number of residues matched on the second sequence - pub fn len_b(&self) -> usize { - self.path.iter().map(|p| p.step_b as usize).sum() - } -} - -/// A piece in an alignment, determining what step was taken in the alignment and how this impacted the score -#[derive(Clone, Default, Debug)] -pub struct Piece { - /// The total score of the path up till now - pub score: isize, - /// The local contribution to the score of this piece - pub local_score: i8, - /// The number of steps on the first sequence - pub step_a: u8, - /// The number of steps on the second sequence - pub step_b: u8, -} - -impl Piece { - /// Create a new alignment piece - pub const fn new(score: isize, local_score: i8, step_a: u8, step_b: u8) -> Self { - Self { - score, - local_score, - step_a, - step_b, - } - } -} - -impl Piece { - /// Display this piece very compactly - pub fn short(&self) -> String { - match (self.step_a, self.step_b) { - (0, 1) => "I".to_string(), - (1, 0) => "D".to_string(), - (1, 1) => "M".to_string(), - (a, b) => format!("S[{},{}]", b, a), - } - } -} - -/// The type of alignment to perform -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum Type { - /// Global alignment, which tries to find the best alignment to link both sequences fully to each other, like the Needleman Wunsch algorithm - Global, - /// Local alignment, which tries to find the best patch of both sequences to align to each other, this could lead to trailing ends on both sides of both sequences, like the Smith Waterman - Local, - /// Hybrid alignment, the second sequence will be fully aligned to the first sequence, this could lead to trailing ends on the first sequence but not on the second. - GlobalForB, -} - -impl Type { - const fn global(self) -> bool { - !matches!(self, Self::Local) - } -} - -/// # Panics -/// It panics when the length of `seq_a` or `seq_b` is bigger then [`isize::MAX`]. -#[allow(clippy::too_many_lines)] -pub fn align(seq_a: &[AminoAcid], seq_b: &[AminoAcid], alphabet: &Alphabet, ty: Type) -> Alignment { - assert!(isize::try_from(seq_a.len()).is_ok()); - assert!(isize::try_from(seq_b.len()).is_ok()); - let mut matrix = vec![vec![Piece::default(); seq_b.len() + 1]; seq_a.len() + 1]; - let mut high = (0, 0, 0); - - if ty.global() { - #[allow(clippy::cast_possible_wrap)] - // b is always less than seq_b - for index_b in 0..=seq_b.len() { - matrix[0][index_b] = Piece::new( - (index_b as isize) * Scoring::GapExtendPenalty as isize, - Scoring::GapExtendPenalty as i8, - 0, - if index_b == 0 { 0 } else { 1 }, - ); - } - } - if ty == Type::Global { - #[allow(clippy::cast_possible_wrap)] - // a is always less than seq_a - for (index_a, row) in matrix.iter_mut().enumerate() { - row[0] = Piece::new( - (index_a as isize) * Scoring::GapExtendPenalty as isize, - Scoring::GapExtendPenalty as i8, - if index_a == 0 { 0 } else { 1 }, - 0, - ); - } - } - - let mut values = Vec::with_capacity(Alphabet::STEPS * Alphabet::STEPS + 2); - for index_a in 1..=seq_a.len() { - for index_b in 1..=seq_b.len() { - values.clear(); - for len_a in 0..=Alphabet::STEPS { - for len_b in 0..=Alphabet::STEPS { - if len_a == 0 && len_b != 1 - || len_a != 1 && len_b == 0 - || len_a > index_a - || len_b > index_b - { - continue; // Do not allow double gaps (just makes no sense) - } - #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] - // len_a and b are always less then Alphabet::STEPS - let score = if len_a == 0 || len_b == 0 { - Scoring::GapExtendPenalty as i8 // Defined to always be one gap - } else { - alphabet[( - &seq_a[index_a - len_a..index_a], - &seq_b[index_b - len_b..index_b], - )] - }; - if score == 0 { - continue; - } - values.push(Piece::new( - matrix[index_a - len_a][index_b - len_b].score + score as isize, - score, - len_a as u8, - len_b as u8, - )); - } - } - let value = values - .iter() - .max_by(|x, y| x.score.cmp(&y.score)) - .cloned() - .unwrap_or_default(); - if value.score >= high.0 { - high = (value.score, index_a, index_b); - } - matrix[index_a][index_b] = value; - } - } - - // loop back - if ty == Type::Global { - high = ( - matrix[seq_a.len()][seq_b.len()].score, - seq_a.len(), - seq_b.len(), - ); - } else if ty == Type::GlobalForB { - let value = (0..=seq_a.len()) - .map(|v| (v, matrix[v][seq_b.len()].score)) - .max_by(|a, b| a.1.cmp(&b.1)) - .unwrap_or_default(); - high = (value.1, value.0, seq_b.len()); - } - let mut path = Vec::new(); - let high_score = high.0; - //dbg!(&highest_score); - //dbg!(&matrix); - while !(high.1 == 0 && high.2 == 0) { - let value = matrix[high.1][high.2].clone(); - if value.step_a == 0 && value.step_b == 0 { - break; - } - high = ( - 0, - high.1 - value.step_a as usize, - high.2 - value.step_b as usize, - ); - path.push(value); - } - //dbg!(&path); - Alignment { - score: high_score, - path: path.into_iter().rev().collect(), - start_a: high.1, - start_b: high.2, - seq_a: seq_a.to_owned(), - seq_b: seq_b.to_owned(), - } -} - -#[cfg(test)] -mod tests { - use crate::alignment::{align, Type}; - use crate::alphabet::Alphabet; - use crate::aminoacid::AminoAcid::*; - - #[test] - fn equal() { - let alphabet = Alphabet::default(); - let a = vec![A, C, C, G, W]; - let b = vec![A, C, C, G, W]; - let result = align(&a, &b, &alphabet, Type::Local); - dbg!(&result); - assert_eq!(40, result.score); - assert_eq!("MMMMM", &result.short()); - } - - #[test] - fn insertion() { - let alphabet = Alphabet::default(); - let a = vec![A, C, G, W]; - let b = vec![A, C, F, G, W]; - let result = align(&a, &b, &alphabet, Type::Local); - dbg!(&result); - assert_eq!(27, result.score); - assert_eq!("MMIMM", &result.short()); - } - - #[test] - fn deletion() { - let alphabet = Alphabet::default(); - let a = vec![A, C, F, G, W]; - let b = vec![A, C, G, W]; - let result = align(&a, &b, &alphabet, Type::Local); - dbg!(&result); - assert_eq!(27, result.score); - assert_eq!("MMDMM", &result.short()); - } - - #[test] - fn iso_mass() { - let alphabet = Alphabet::default(); - let a = vec![A, F, G, G, W]; - let b = vec![A, F, N, W]; - let result = align(&a, &b, &alphabet, Type::Local); - dbg!(&result); - dbg!(result.short()); - assert_eq!(29, result.score); - assert_eq!("MMS[1,2]M", &result.short()); - } - - #[test] - fn switched() { - let alphabet = Alphabet::default(); - let a = vec![A, F, G, G, W]; - let b = vec![A, G, F, G, W]; - let result = align(&a, &b, &alphabet, Type::Local); - dbg!(&result); - dbg!(result.short()); - assert_eq!(28, result.score); - assert_eq!("MS[2,2]MM", &result.short()); - } - - #[test] - fn local() { - let alphabet = Alphabet::default(); - let a = vec![A, F, G, G, E, W]; - let b = vec![F, G, G, D]; - let result = align(&a, &b, &alphabet, Type::Local); - dbg!(&result); - dbg!(result.short()); - assert_eq!(24, result.score); - assert_eq!("MMM", &result.short()); - } - - #[test] - fn global() { - let alphabet = Alphabet::default(); - let a = vec![A, F, G, G, E, W]; - let b = vec![F, G, G, D]; - let result = align(&a, &b, &alphabet, Type::Global); - dbg!(&result); - println!("{}", result.summary()); - assert_eq!(13, result.score); - assert_eq!("DMMMDM", &result.short()); - assert_eq!(0, result.start_a, "A global alignment should start at 0"); - } - - #[test] - fn global_for_b() { - let alphabet = Alphabet::default(); - let a = vec![A, F, G, G, E, W]; - let b = vec![F, G, G, D]; - let result = align(&a, &b, &alphabet, Type::GlobalForB); - dbg!(&result); - dbg!(result.short()); - assert_eq!(23, result.score); - assert_eq!("MMMM", &result.short()); - assert_eq!(0, result.start_b, "A global alignment should start at 0"); - } -} diff --git a/alignment/alphabet.rs b/alignment/alphabet.rs deleted file mode 100644 index 6b58ea0..0000000 --- a/alignment/alphabet.rs +++ /dev/null @@ -1,283 +0,0 @@ -use crate::aminoacid::AminoAcid; -use crate::aminoacid::AminoAcid::*; -use itertools::Itertools; - -/// An alphabet to determine the score of two amino acid sets -pub struct Alphabet { - array: Vec>, -} - -impl std::ops::Index<(&[AminoAcid], &[AminoAcid])> for Alphabet { - type Output = i8; - fn index(&self, index: (&[AminoAcid], &[AminoAcid])) -> &Self::Output { - &self.array[get_index(index.0)][get_index(index.1)] - } -} - -fn get_index_ref(set: &[&AminoAcid]) -> usize { - set.iter() - .fold(0, |acc, item| acc * AminoAcid::MAX + **item as usize) -} - -fn get_index(set: &[AminoAcid]) -> usize { - set.iter() - .fold(0, |acc, item| acc * AminoAcid::MAX + *item as usize) -} - -impl Alphabet { - /// The number of steps to trace back, if updated a lot of other code has to be updated as well - pub const STEPS: usize = 3; -} - -#[repr(i8)] -#[derive(Clone, Default, Debug)] -pub enum Scoring { - /// The score for identity, should be the highest score of the bunch - Identity = 8, - /// The score for a mismatch - #[default] - Mismatch = -1, - /// The score for an iso mass set, eg Q<>AG - IsoMass = 5, - /// The score for a modification - Modification = 3, - /// The score for a switched set, defined as this value times the size of the set (eg AG scores 4 with GA) - Switched = 2, - /// The score for scoring a gap, should be less than `MISMATCH` - GapStartPenalty = -5, - GapExtendPenalty = -3, -} - -#[allow(clippy::too_many_lines)] -impl Default for Alphabet { - fn default() -> Self { - macro_rules! sets { - ($($($($id:ident),+);+)|+) => { - vec![ - $(vec![ - $(vec![$($id),+],)+ - ],)+ - ] - }; - } - - #[allow(clippy::cast_possible_truncation)] - // STEPS is always within bounds for u32 - let mut alphabet = Self { - array: vec![ - vec![0; (AminoAcid::MAX + 1).pow(Self::STEPS as u32)]; - (AminoAcid::MAX + 1).pow(Self::STEPS as u32) - ], - }; - - for x in 0..=AminoAcid::MAX { - for y in 0..=AminoAcid::MAX { - alphabet.array[x][y] = if x == y { - Scoring::Identity as i8 - } else { - Scoring::Mismatch as i8 - }; - } - } - let iso_mass = sets!( - I; L| - N; G,G| - Q; A,G| - A,V; G,L; G,I| - A,N; Q,G; A,G,G| - L,S; I,S; T,V| - A,M; C,V| - N,V; A,A,A; G,G,V| - N,T; Q,S; A,G,S; G,G,T| - L,N; I,N; Q,V; A,G,V; G,G,L; G,G,I| - D,L; D,I; E,V| - Q,T; A,A,S; A,G,T| - A,Y; F,S| - L,Q; I,Q; A,A,V; A,G,L; A,G,I| - N,Q; A,N,G; Q,G,G| - K,N; G,G,K| - E,N; D,Q; A,D,G; E,G,G| - D,K; A,A,T; G,S,V| - M,N; A,A,C; G,G,M| - A,S; G,T| - A,A,L; A,A,I; G,V,V| - Q,Q; A,A,N; A,Q,G| - E,Q; A,A,D; A,E,G| - E,K; A,S,V; G,L,S; G,I,S; G,T,V| - M,Q; A,G,M; C,G,V| - A,A,Q; N,G,V - ); - - for set in iso_mass { - for set in set.iter().permutations(2) { - let a = set[0]; - let b = set[1]; - for seq_a in a.iter().permutations(a.len()) { - for seq_b in b.iter().permutations(b.len()) { - alphabet.array[get_index_ref(&seq_a)][get_index_ref(&seq_b)] = - Scoring::IsoMass as i8; - } - } - } - } - - let modifications = sets!( - //N;D| // Amidation only at N term - Q;E| // Deamidation - D;N| // Deamidation - C;T| // Disulfide bond - T;D| // Methylation - S;T| // Methylation - D;E| // Methylation - R;A,V;G,L| // Methylation - Q;A,A // Methylation - ); - - for set in modifications { - let a = &set[0]; - for seq_b in set.iter().skip(1) { - alphabet.array[get_index(a)][get_index(seq_b.as_slice())] = - Scoring::Modification as i8; - } - } - - let amino_acids = (1..=AminoAcid::MAX) - .map(|a| AminoAcid::try_from(a).unwrap()) - .collect_vec(); - for size in 2..=Self::STEPS { - for set in amino_acids - .iter() - .combinations_with_replacement(size) - .flat_map(|v| v.into_iter().permutations(size)) - { - if set.iter().all(|v| *v == set[0]) { - continue; // Do not add [A, A] or [A, A, A] etc as SWITCHED - } - #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] - // set.len() is at max equal to Self::STEPS - for switched in set.clone().into_iter().permutations(size) { - alphabet.array[get_index_ref(&set)][get_index_ref(&switched)] = - Scoring::Switched as i8 * set.len() as i8; - } - } - } - - alphabet - } -} - -#[cfg(test)] -mod tests { - use super::{Alphabet, Scoring}; - use crate::aminoacid::AminoAcid::*; - - #[test] - fn identity() { - let alphabet = Alphabet::default(); - assert_eq!( - Scoring::Identity as i8, - alphabet[([A].as_slice(), [A].as_slice())] - ); - assert_eq!(0, alphabet[([A, A].as_slice(), [A, A].as_slice())]); - assert_eq!(0, alphabet[([A, A, A].as_slice(), [A, A, A].as_slice())]); - } - - #[test] - fn similarity() { - let alphabet = Alphabet::default(); - assert_eq!( - Scoring::IsoMass as i8, - alphabet[([I].as_slice(), [L].as_slice())] - ); - assert_eq!( - Scoring::IsoMass as i8, - alphabet[([N].as_slice(), [G, G].as_slice())] - ); - assert_eq!( - Scoring::IsoMass as i8, - alphabet[([G, G].as_slice(), [N].as_slice())] - ); - assert_eq!( - Scoring::IsoMass as i8, - alphabet[([A, S].as_slice(), [G, T].as_slice())] - ); - assert_eq!( - Scoring::IsoMass as i8, - alphabet[([S, A].as_slice(), [G, T].as_slice())] - ); - assert_eq!( - Scoring::IsoMass as i8, - alphabet[([S, A].as_slice(), [T, G].as_slice())] - ); - assert_eq!( - Scoring::IsoMass as i8, - alphabet[([A, S].as_slice(), [T, G].as_slice())] - ); - assert_eq!( - Scoring::IsoMass as i8, - alphabet[([L, Q].as_slice(), [A, V, A].as_slice())] - ); - } - - #[test] - fn inequality() { - let alphabet = Alphabet::default(); - assert_eq!( - Scoring::Mismatch as i8, - alphabet[([I].as_slice(), [Q].as_slice())] - ); - assert_eq!(0, alphabet[([Q].as_slice(), [G, G].as_slice())]); - assert_eq!(0, alphabet[([A, E].as_slice(), [G, T].as_slice())]); - assert_eq!(0, alphabet[([E, Q].as_slice(), [A, V, A].as_slice())]); - } - - #[test] - fn switched() { - let alphabet = Alphabet::default(); - assert_eq!( - Scoring::Switched as i8 * 2, - alphabet[([E, Q].as_slice(), [Q, E].as_slice())] - ); - assert_eq!( - Scoring::Switched as i8 * 3, - alphabet[([D, A, C].as_slice(), [A, C, D].as_slice())] - ); - assert_eq!( - Scoring::Switched as i8 * 3, - alphabet[([C, D, A].as_slice(), [A, C, D].as_slice())] - ); - assert_eq!( - Scoring::Switched as i8 * 3, - alphabet[([A, C, D].as_slice(), [D, A, C].as_slice())] - ); - assert_eq!( - Scoring::Switched as i8 * 3, - alphabet[([C, D, A].as_slice(), [D, A, C].as_slice())] - ); - assert_eq!( - Scoring::Switched as i8 * 3, - alphabet[([A, C, D].as_slice(), [C, D, A].as_slice())] - ); - assert_eq!( - Scoring::Switched as i8 * 3, - alphabet[([D, A, C].as_slice(), [C, D, A].as_slice())] - ); - assert_eq!( - Scoring::Switched as i8 * 3, - alphabet[([V, A, A].as_slice(), [A, V, A].as_slice())] - ); - } - - #[test] - fn modification() { - let alphabet = Alphabet::default(); - assert_eq!( - Scoring::Modification as i8, - alphabet[([D].as_slice(), [N].as_slice())] - ); - assert_eq!( - Scoring::Mismatch as i8, - alphabet[([N].as_slice(), [D].as_slice())] - ); - } -} diff --git a/alignment/aminoacid.rs b/alignment/aminoacid.rs deleted file mode 100644 index 71c5ffd..0000000 --- a/alignment/aminoacid.rs +++ /dev/null @@ -1,170 +0,0 @@ -use itertools::Itertools; -use std::fmt::Display; - -/// All aminoacids -#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)] -pub enum AminoAcid { - /// Alanine - A = 1, - /// Arginine - R, - /// Asparagine - N, - /// Aspartic acid - D, - /// Cysteine - C, - /// Glutamine - Q, - /// Glutamic acid - E, - /// Glycine - G, - /// Histidine - H, - /// Isoleucine - I, - /// Leucine - L, - /// Lysine - K, - /// Methionine - M, - /// Phenylalanine - F, - /// Proline - P, - /// Serine - S, - /// Threonine - T, - /// Tryptophan - W, - /// Tyrosine - Y, - /// Valine - V, - /// Weird - B, - /// Also weird - Z, - /// Single gap - X, - /// Longer gap - Gap, -} - -impl AminoAcid { - /// The total number of normal amino acids (disregards Gap) - pub const MAX: usize = 23; -} - -impl Display for AminoAcid { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(match self { - Self::A => "A", - Self::R => "R", - Self::N => "N", - Self::D => "D", - Self::C => "C", - Self::Q => "Q", - Self::E => "E", - Self::G => "G", - Self::H => "H", - Self::I => "I", - Self::L => "L", - Self::K => "K", - Self::M => "M", - Self::F => "F", - Self::P => "P", - Self::S => "S", - Self::T => "T", - Self::W => "W", - Self::Y => "Y", - Self::V => "V", - Self::B => "B", - Self::Z => "Z", - Self::X => "X", - Self::Gap => "*", - }) - } -} - -impl TryFrom for AminoAcid { - type Error = (); - fn try_from(num: usize) -> Result { - match num { - 1 => Ok(Self::A), - 2 => Ok(Self::R), - 3 => Ok(Self::N), - 4 => Ok(Self::D), - 5 => Ok(Self::C), - 6 => Ok(Self::Q), - 7 => Ok(Self::E), - 8 => Ok(Self::G), - 9 => Ok(Self::H), - 10 => Ok(Self::I), - 11 => Ok(Self::L), - 12 => Ok(Self::K), - 13 => Ok(Self::M), - 14 => Ok(Self::F), - 15 => Ok(Self::P), - 16 => Ok(Self::S), - 17 => Ok(Self::T), - 18 => Ok(Self::W), - 19 => Ok(Self::Y), - 20 => Ok(Self::V), - 21 => Ok(Self::B), - 22 => Ok(Self::Z), - 23 => Ok(Self::X), - 24 => Ok(Self::Gap), - _ => Err(()), - } - } -} - -impl TryFrom for AminoAcid { - type Error = (); - fn try_from(value: char) -> Result { - match value { - 'A' => Ok(Self::A), - 'R' => Ok(Self::R), - 'N' => Ok(Self::N), - 'D' => Ok(Self::D), - 'C' => Ok(Self::C), - 'Q' => Ok(Self::Q), - 'E' => Ok(Self::E), - 'G' => Ok(Self::G), - 'H' => Ok(Self::H), - 'I' => Ok(Self::I), - 'L' => Ok(Self::L), - 'K' => Ok(Self::K), - 'M' => Ok(Self::M), - 'F' => Ok(Self::F), - 'P' => Ok(Self::P), - 'S' => Ok(Self::S), - 'T' => Ok(Self::T), - 'W' => Ok(Self::W), - 'Y' => Ok(Self::Y), - 'V' => Ok(Self::V), - 'B' => Ok(Self::B), - 'Z' => Ok(Self::Z), - 'X' => Ok(Self::X), - '*' => Ok(Self::Gap), - _ => Err(()), - } - } -} - -/// Create an aminoacid sequence from a string, just ignores any non aminoacids characters -pub fn sequence_from_string(value: &str) -> Vec { - value - .chars() - .filter_map(|v| AminoAcid::try_from(v).ok()) - .collect() -} - -/// Generate a string from a sequence of aminoacids -pub fn sequence_to_string(value: &[AminoAcid]) -> String { - value.iter().map(std::string::ToString::to_string).join("") -} diff --git a/alignment/bin.rs b/alignment/bin.rs deleted file mode 100644 index 078b800..0000000 --- a/alignment/bin.rs +++ /dev/null @@ -1,101 +0,0 @@ -#![allow(dead_code)] -#![warn(clippy::pedantic, clippy::nursery, clippy::all)] -#![allow(clippy::enum_glob_use, clippy::wildcard_imports)] -use mass_alignment::template::Template; -use mass_alignment::*; - -fn main() { - let alphabet = Alphabet::default(); - //let template = aminoacid::sequence_from_string("XXXXXXXXXXXXXXXXXXXXYFDYWGQGTLVTVSS"); - let template = mass_alignment::sequence_from_string("EVQLVESGGGLVQPGGSLRLSCAASGFTVSSNYMSWVRQAPGKGLEWVSVIYSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARXXXXXXXXXXXXXXXXXXXX"); - let reads: Vec> = [ - //"SRWGGDGFYAMDYWGQGTLVTV", - //"DWNGFYAMDYWGQGTLVTVSS", - //"RWGGDGFYAMDYWGQGTLVTV", - //"HVPHGDGFYAMDYWGQGTLVT", - //"WRGGDGFYAMDYWGQGTLVT", - //"SRWGGDGFYAMDYWGQGTLV", - //"RWGGDGFYAMDYWGQGTLVT", - //"WRNDGFYAMDYWGQGTLVT", - //"RWGGDGFYAMDYWGQGTLV", - //"MARNDGFYAMDYWGQGTLV", - //"RWNDGFYAMDYWGQGTLV", - //"SRWGGNGFYWDYWGQGT", - //"RWNDGFYWDYWGQGT", - //"DYWGQGTLVVTSS", - //"DYWGQGTLVTVSS", - //"DYWGQGTLVTV", - //"DYWGQGTLVT", - //"WGQGTLVT", - "DLQLVESGGGLVGAKSPPGTLSAAASGFNL", - "DLQLVESGGGLVGAKSPPGTLSAAASGFNL", - "EVQLVESGGGLVQPGGSLSGAKYHSGFNL", - "EVVQLVESGGGLVQPGGSLGVLSCAASGF", - "DLQLVESGGGLVQPGGSLGVLSCAASGF", - "DLQLVESGGGLVQPGTPLYWNAASGFNL", - "DLQLVESGGGLVQPGGSLRLSCAASGF", - "QVQLVESGGGLVQPGGSLRLSCAASGF", - "EVQLVESGGGLPVQGGSLRLSCAADGF", - "EVQLVESGGGLVQPGGSLRLSCAASGF", - "EVQLVSGEGGLVQPGGSLRLSCAASGF", - "QVELVESGGGLVQPGGSLRLSCAASGF", - "TLSADTSKNTAYLQMNSLRAEDTAVY", - "RFTLSADTSKNTAYLQMNSLRAEDTA", - "QLVESGGGLVQPGGSLTHVAGAGHSGF", - "SADTSKNTAYLQMNSLRAEDTAVYY", - "LMLTDGYTRYADSVKGRFTLSADTS", - "QLVESGGGLVQPGGSLRLSCAASGF", - "QLVESGGGLVQPGGSLRLSCQTGF", - "LVESGGGLVQPNSLRLSCAASGF", - ] - .into_iter() - .map(mass_alignment::sequence_from_string) - .collect(); - - let template = Template::new( - template, - reads.iter().map(std::vec::Vec::as_slice).collect(), - &alphabet, - ); - let content = format!( - " - - - - - - - -
-
{} -
-
- -", - template.generate_html() - ); - std::fs::write("test.html", content).unwrap(); -} diff --git a/alignment/lib.rs b/alignment/lib.rs deleted file mode 100644 index 720fa4f..0000000 --- a/alignment/lib.rs +++ /dev/null @@ -1,42 +0,0 @@ -//! An algorithm based on Needleman Wunsch/Smith Waterman but extended to allow for mass based alignment. -//! The mass based part gives the option to match two sets of aminoacids with different sizes. -//! For example the set {Q} matches {AG} because these have the same mass and so are commonly misclassified -//! is de novo sequencing for peptides. Besides iso mass definitions it also handles swaps with finesse, -//! meaning that {AG} matches {GA} with a well defined score to allow for these mistakes to be fixed. The -//! last important addition is the handling of post translational modifications meaning that {Q} matches {E} -//! but not the other way around to allow for deamidation of the sample in reference to the template. -//! -//! ```rust -//! use mass_alignment::*; -//! use mass_alignment::AminoAcid::*; -//! -//! let alphabet = Alphabet::default(); -//! let template = &[A,G,Q,S,T,Q]; -//! let query = &[Q,E,S,W]; -//! let result = align(template, query, &alphabet, Type::GlobalForB); -//! println!("{}", result.summary()); -//! assert_eq!(15, result.score) -//! ``` - -#![allow(dead_code)] -#![warn(clippy::pedantic, clippy::nursery, clippy::all, missing_docs)] -#![allow( - clippy::enum_glob_use, - clippy::wildcard_imports, - clippy::must_use_candidate -)] -/// The module containing all alignment handling -mod alignment; -/// The module containing all alphabet handling -mod alphabet; -/// The module containing the definition for aminoacids -mod aminoacid; -/// The module containing the definition for templates -pub mod template; - -pub use crate::alignment::align; -pub use crate::alignment::*; -pub use crate::alphabet::Alphabet; -pub use crate::aminoacid::sequence_from_string; -pub use crate::aminoacid::sequence_to_string; -pub use crate::aminoacid::AminoAcid; diff --git a/alignment/template.rs b/alignment/template.rs deleted file mode 100644 index 80e188b..0000000 --- a/alignment/template.rs +++ /dev/null @@ -1,123 +0,0 @@ -use crate::alignment::Alignment; -use crate::alphabet::Scoring; -use crate::aminoacid::{self, AminoAcid}; -use crate::{align, Alphabet}; -use itertools::Itertools; -use std::fmt::Write; - -/// A template that is matched with many reads -pub struct Template { - /// The sequence of this template - pub sequence: Vec, - /// The reads matched to this template - pub reads: Vec, -} - -impl Template { - /// Create a new template by matching the given reads to the given template sequence - pub fn new(sequence: Vec, reads: Vec<&[AminoAcid]>, alphabet: &Alphabet) -> Self { - Self { - reads: reads - .into_iter() - .map(|v| align(&sequence, v, alphabet, crate::alignment::Type::GlobalForB)) - .collect(), - sequence, - } - } - - /// Generate HTML for a reads alignment, all styling is missing and it is only a small part of a document - pub fn generate_html(&self) -> String { - let mut insertions = vec![0; self.sequence.len()]; - for read in &self.reads { - let mut loc_a = read.start_a; - let mut insertion = 0; - - for piece in &read.path { - if piece.step_a == 0 && piece.step_b == 1 { - insertion += 1; - } else if insertion != 0 { - insertions[loc_a] = std::cmp::max(insertions[loc_a], insertion); - insertion = 0; - } else { - insertion = 0; - } - loc_a += piece.step_a as usize; - } - } - - let mut output = format!("
1....
", insertions.iter().sum::() + self.sequence.len()); - for (ins, seq) in insertions.iter().zip(&self.sequence) { - let _ = write!(output, "{:-<1$}{2}", "", ins, seq); - } - let _ = write!(output, "
"); - - for read in &self.reads { - let _ = write!( - output, - "
", - insertions[0..read.start_a].iter().sum::() + read.start_a + 1, - insertions[0..read.start_a + read.len_a()] - .iter() - .sum::() - + read.start_a - + read.len_a() - + 3 - ); - let mut loc_a = read.start_a; - let mut loc_b = read.start_b; - let mut insertion = 0; - for piece in &read.path { - if piece.step_a == 0 && piece.step_b == 1 { - insertion += 1; - } else { - let _ = write!(output, "{:-<1$}", "", insertions[loc_a] - insertion); - insertion = 0; - } - let _ = write!( - output, - "{}", - match (piece.step_a, piece.step_b) { - (0 | 1, 1) => read.seq_b[loc_b].to_string(), - (1, 0) => "-".to_string(), - #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] - // a is defined to be in range 0..=Alphabet::STEPS - (a, b) => { - let inner = if a == b { - // As a equals b it is a swap or iso length iso mass sets, add in the missing insertions (if any) - // Because a equals b the length of the sequence patch and insertion patch is always equal. - // This means that the resulting insertions makes the text nicely aligned. - read.seq_b[loc_b..loc_b + b as usize] - .iter() - .zip(&insertions[loc_a..loc_a + a as usize]) - .map(|(sb, sa)| format!("{:->1$}", sb.to_string(), sa + 1)) - .join("") - } else { - aminoacid::sequence_to_string( - &read.seq_b[loc_b..loc_b + b as usize], - ) - }; - format!( - "{}", - if a == b && piece.local_score == Scoring::Switched as i8 * a as i8 - { - " swap" - } else { - "" - }, - inner.len(), - insertions[loc_a..loc_a + a as usize].iter().sum::() - + a as usize, - inner - ) - } - } - ); - loc_a += piece.step_a as usize; - loc_b += piece.step_b as usize; - } - let _ = write!(output, "
"); - } - let _ = write!(output, "
"); - output - } -} diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index 2862c3b..bce10bd 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -2379,9 +2379,9 @@ checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" [[package]] name = "rustyms" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb1fee3148a098d6ef3973dcafbca097e952d0a0c5c2302ed01c053f8857086" +checksum = "5149c3f32626cca00d98cf4392506f310923730b9b9b6badb34d4ee9fb7f2b6d" dependencies = [ "itertools", "regex", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 650c3dd..db13040 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -8,14 +8,6 @@ repository = "https://github.com/snijderlab/annotator" edition = "2021" rust-version = "1.57" -[lib] -name = "mass_alignment" -path = "alignment/src/lib.rs" - -[[bin]] -name = "mass_alignment_bin" -path = "alignment/src/bin.rs" - # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [build-dependencies] @@ -27,7 +19,7 @@ serde_json = "1.0" serde = { version = "1.0", features = ["derive"] } tauri = { version = "1.2", features = ["dialog-open"] } pdbtbx = "0.10" -rustyms = "0.4.0" +rustyms = "0.4.1" proc_interface = { path = "../src-proc-interface" } [features] diff --git a/src-tauri/alignment/output.html b/src-tauri/alignment/output.html deleted file mode 100644 index 0db2d7d..0000000 --- a/src-tauri/alignment/output.html +++ /dev/null @@ -1,109 +0,0 @@ - - - - - - - - - -
-
-
-
1....
-
- EVQLVESGGGLVQ--PGGSL-RLSCAASGFTVSSNYMSWVRQAPGKGLEWVSVI-YSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARXXXXXXXXXXXXXXXXXXXX -
-
DLQLVESGGGLVGAKSPPGT---LSAAASGFNL
-
DLQLVESGGGLVGAKSPPGT---LSAAASGFNL
-
EVQLVESGGGLVQ--PGGSL-SGAKYHSGFNL
-
EVQLVESGGGLVQ--PGGSLGVLSCAASGF
-
DLQLVESGGGLVQ--PGGSLGVLSCAASGF
-
DLQLVESGGGLVQ--PGTPL--YWNAASGFNL
-
DLQLVESGGGLVQ--PGGSL-RLSCAASGF
-
QVQLVESGGGLVQ--PGGSL-RLSCAASGF
-
EVQLVESGGGLVQ--PGGSL-RLSCAADGF
-
EVQLVESGGGLVQ--PGGSL-RLSCAASGF
-
EVQLVSGEGGLVQ--PGGSL-RLSCAASGF
-
QVELVESGGGLVQ--PGGSL-RLSCAASGF
-
TLSADTSKNTAYLQMNSLRAEDTAVY
-
RFTLSADTSKNTAYLQMNSLRAEDTA
-
QLVESGGGLVQ--PGGSLTHVAGGHSGF
-
SADTSKNTAYLQMNSLRAEDTAVYY
-
LMLTDGYTRYADSVKGRFTLSADTS
-
QLVESGGGLVQ--PGGSL-RLSCAASGF
-
QLVESGGGLVQ--PGGSL-RLSCQTGF
-
LVESGGGLVQ--PNSL-RLSCAASGF
-
-
-
- - - - \ No newline at end of file diff --git a/src-tauri/alignment/preview.jpg b/src-tauri/alignment/preview.jpg deleted file mode 100644 index c79259e..0000000 Binary files a/src-tauri/alignment/preview.jpg and /dev/null differ diff --git a/src-tauri/alignment/readme.md b/src-tauri/alignment/readme.md deleted file mode 100644 index 9785f1a..0000000 --- a/src-tauri/alignment/readme.md +++ /dev/null @@ -1,28 +0,0 @@ -# Mass Alignment -This is an algorithm based on Smith Waterman/Needleman Wunsch for sequence alignment at the aminoacid level, but extended for the use of mass spectrometry. It has some notable features for de novo sequenced -peptides: -* Detects sets of aminoacids of the same mass, even if they differ in length. {Q} matches {AG} -* Detects swaps of aminoacids (up to length 3) {AVG} matches {GAV} -* Handles modifications. {Q} matches {E} but not the other way around (deamidation) - -It scales similarly to SW/NW, with N*M. This implementation is quite fast ~118ns * N * M. Below is a preview of an alignment based on this algorithm. The alignment generating code can also be found in this project. - -![preview of a peptide alignment](preview.jpg) - -_grey: found isomass, underline: found swap_ - -## Usage - -There is a library `mass_alignment` under `src\lib.rs`. The alignment code can be found under `src\alignment.rs`. The alphabet (lookup matrix) generation under `src\alphabet.rs`. And the HTML generation under `src\template.html`+`src\bin.rs`. - -There is a binary to generate the above preview in `src\bin.rs`. You can use this with `cargo run` this generates `test.html` in the root folder. - -## Building - -[Use cargo](https://www.rust-lang.org/tools/install) - -Commands: -* `cargo run` runs `bin.rs` -* `cargo doc --open` builds the documentation -* `cargo test` runs the unit tests -* `cargo bench` runs the benchmarks (will be saved in `target/benchmark_result.csv`) \ No newline at end of file diff --git a/src-tauri/alignment/src/alignment.rs b/src-tauri/alignment/src/alignment.rs deleted file mode 100644 index 78f315f..0000000 --- a/src-tauri/alignment/src/alignment.rs +++ /dev/null @@ -1,399 +0,0 @@ -use crate::alphabet::{Alphabet, Scoring}; -use crate::aminoacid::*; -use itertools::Itertools; -use std::fmt::Write; - -/// An alignment of two reads. -#[derive(Debug, Clone)] -pub struct Alignment { - /// The score of this alignment - pub score: isize, - /// The path or steps taken for the alignment - pub path: Vec, - /// The position in the first sequence where the alignment starts - pub start_a: usize, - /// The position in the second sequence where the alignment starts - pub start_b: usize, - /// The first sequence - pub seq_a: Vec, - /// The second sequence - pub seq_b: Vec, -} - -impl Alignment { - fn short(&self) -> String { - self.path.iter().map(Piece::short).join("") - } - - fn aligned(&self) -> String { - let blocks: Vec = " ▁▂▃▄▅▆▇█".chars().collect(); - let blocks_neg: Vec = " ▔▔▔▀▀▀▀█".chars().collect(); - let mut str_a = String::new(); - let mut str_b = String::new(); - let mut str_blocks = String::new(); - let mut str_blocks_neg = String::new(); - let mut loc_a = self.start_a; - let mut loc_b = self.start_b; - - for piece in &self.path { - let l = std::cmp::max(piece.step_b, piece.step_a); - if piece.step_a == 0 { - let _ = write!(str_a, "{:- 0 { - " ".to_string() - } else { - #[allow(clippy::cast_sign_loss)] // Checked above - blocks_neg[-piece.local_score as usize].to_string() - }, - l as usize - ) - ); - - loc_a += piece.step_a as usize; - loc_b += piece.step_b as usize; - } - - format!("{}\n{}\n{}\n{}", str_a, str_b, str_blocks, str_blocks_neg) - } - - /// Generate a summary of this alignment for printing to the command line - pub fn summary(&self) -> String { - format!( - "score: {}\npath: {}\nstart: ({}, {})\naligned:\n{}", - self.score, - self.short(), - self.start_a, - self.start_b, - self.aligned() - ) - } - - /// The total number of residues matched on the first sequence - pub fn len_a(&self) -> usize { - self.path.iter().map(|p| p.step_a as usize).sum() - } - - /// The total number of residues matched on the second sequence - pub fn len_b(&self) -> usize { - self.path.iter().map(|p| p.step_b as usize).sum() - } -} - -/// A piece in an alignment, determining what step was taken in the alignment and how this impacted the score -#[derive(Clone, Default, Debug)] -pub struct Piece { - /// The total score of the path up till now - pub score: isize, - /// The local contribution to the score of this piece - pub local_score: i8, - /// The number of steps on the first sequence - pub step_a: u8, - /// The number of steps on the second sequence - pub step_b: u8, -} - -impl Piece { - /// Create a new alignment piece - pub const fn new(score: isize, local_score: i8, step_a: u8, step_b: u8) -> Self { - Self { - score, - local_score, - step_a, - step_b, - } - } -} - -impl Piece { - /// Display this piece very compactly - pub fn short(&self) -> String { - match (self.step_a, self.step_b) { - (0, 1) => "I".to_string(), - (1, 0) => "D".to_string(), - (1, 1) => "M".to_string(), - (a, b) => format!("S[{},{}]", b, a), - } - } -} - -/// The type of alignment to perform -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum Type { - /// Global alignment, which tries to find the best alignment to link both sequences fully to each other, like the Needleman Wunsch algorithm - Global, - /// Local alignment, which tries to find the best patch of both sequences to align to each other, this could lead to trailing ends on both sides of both sequences, like the Smith Waterman - Local, - /// Hybrid alignment, the second sequence will be fully aligned to the first sequence, this could lead to trailing ends on the first sequence but not on the second. - GlobalForB, -} - -impl Type { - const fn global(self) -> bool { - !matches!(self, Self::Local) - } -} - -/// # Panics -/// It panics when the length of `seq_a` or `seq_b` is bigger then [`isize::MAX`]. -#[allow(clippy::too_many_lines)] -pub fn align(seq_a: &[AminoAcid], seq_b: &[AminoAcid], alphabet: &Alphabet, ty: Type) -> Alignment { - assert!(isize::try_from(seq_a.len()).is_ok()); - assert!(isize::try_from(seq_b.len()).is_ok()); - let mut matrix = vec![vec![Piece::default(); seq_b.len() + 1]; seq_a.len() + 1]; - let mut high = (0, 0, 0); - - if ty.global() { - #[allow(clippy::cast_possible_wrap)] - // b is always less than seq_b - for index_b in 0..=seq_b.len() { - matrix[0][index_b] = Piece::new( - (index_b as isize) * Scoring::GapExtendPenalty as isize, - Scoring::GapExtendPenalty as i8, - 0, - u8::from(index_b != 0), - ); - } - } - if ty == Type::Global { - #[allow(clippy::cast_possible_wrap)] - // a is always less than seq_a - for (index_a, row) in matrix.iter_mut().enumerate() { - row[0] = Piece::new( - (index_a as isize) * Scoring::GapExtendPenalty as isize, - Scoring::GapExtendPenalty as i8, - u8::from(index_a != 0), - 0, - ); - } - } - - let mut values = Vec::with_capacity(Alphabet::STEPS * Alphabet::STEPS + 2); - for index_a in 1..=seq_a.len() { - for index_b in 1..=seq_b.len() { - values.clear(); - for len_a in 0..=Alphabet::STEPS { - for len_b in 0..=Alphabet::STEPS { - if len_a == 0 && len_b != 1 - || len_a != 1 && len_b == 0 - || len_a > index_a - || len_b > index_b - { - continue; // Do not allow double gaps (just makes no sense) - } - #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] - // len_a and b are always less then Alphabet::STEPS - let score = if len_a == 0 || len_b == 0 { - Scoring::GapExtendPenalty as i8 // Defined to always be one gap - } else { - alphabet[( - &seq_a[index_a - len_a..index_a], - &seq_b[index_b - len_b..index_b], - )] - }; - if score == 0 { - continue; - } - #[allow(clippy::cast_possible_truncation)] - values.push(Piece::new( - matrix[index_a - len_a][index_b - len_b].score + score as isize, - score, - len_a as u8, - len_b as u8, - )); - } - } - let value = values - .iter() - .max_by(|x, y| x.score.cmp(&y.score)) - .cloned() - .unwrap_or_default(); - if value.score >= high.0 { - high = (value.score, index_a, index_b); - } - matrix[index_a][index_b] = value; - } - } - - // loop back - if ty == Type::Global { - high = ( - matrix[seq_a.len()][seq_b.len()].score, - seq_a.len(), - seq_b.len(), - ); - } else if ty == Type::GlobalForB { - let value = (0..=seq_a.len()) - .map(|v| (v, matrix[v][seq_b.len()].score)) - .max_by(|a, b| a.1.cmp(&b.1)) - .unwrap_or_default(); - high = (value.1, value.0, seq_b.len()); - } - let mut path = Vec::new(); - let high_score = high.0; - //dbg!(&highest_score); - //dbg!(&matrix); - while !(high.1 == 0 && high.2 == 0) { - let value = matrix[high.1][high.2].clone(); - if value.step_a == 0 && value.step_b == 0 { - break; - } - high = ( - 0, - high.1 - value.step_a as usize, - high.2 - value.step_b as usize, - ); - path.push(value); - } - //dbg!(&path); - Alignment { - score: high_score, - path: path.into_iter().rev().collect(), - start_a: high.1, - start_b: high.2, - seq_a: seq_a.to_owned(), - seq_b: seq_b.to_owned(), - } -} - -#[cfg(test)] -mod tests { - use crate::alignment::{align, Type}; - use crate::alphabet::Alphabet; - use crate::aminoacid::AminoAcid::*; - - #[test] - fn equal() { - let alphabet = Alphabet::default(); - let a = vec![A, C, C, G, W]; - let b = vec![A, C, C, G, W]; - let result = align(&a, &b, &alphabet, Type::Local); - dbg!(&result); - assert_eq!(40, result.score); - assert_eq!("MMMMM", &result.short()); - } - - #[test] - fn insertion() { - let alphabet = Alphabet::default(); - let a = vec![A, C, G, W]; - let b = vec![A, C, F, G, W]; - let result = align(&a, &b, &alphabet, Type::Local); - dbg!(&result); - assert_eq!(27, result.score); - assert_eq!("MMIMM", &result.short()); - } - - #[test] - fn deletion() { - let alphabet = Alphabet::default(); - let a = vec![A, C, F, G, W]; - let b = vec![A, C, G, W]; - let result = align(&a, &b, &alphabet, Type::Local); - dbg!(&result); - assert_eq!(27, result.score); - assert_eq!("MMDMM", &result.short()); - } - - #[test] - fn iso_mass() { - let alphabet = Alphabet::default(); - let a = vec![A, F, G, G, W]; - let b = vec![A, F, N, W]; - let result = align(&a, &b, &alphabet, Type::Local); - dbg!(&result); - dbg!(result.short()); - assert_eq!(29, result.score); - assert_eq!("MMS[1,2]M", &result.short()); - } - - #[test] - fn switched() { - let alphabet = Alphabet::default(); - let a = vec![A, F, G, G, W]; - let b = vec![A, G, F, G, W]; - let result = align(&a, &b, &alphabet, Type::Local); - dbg!(&result); - dbg!(result.short()); - assert_eq!(28, result.score); - assert_eq!("MS[2,2]MM", &result.short()); - } - - #[test] - fn local() { - let alphabet = Alphabet::default(); - let a = vec![A, F, G, G, E, W]; - let b = vec![F, G, G, D]; - let result = align(&a, &b, &alphabet, Type::Local); - dbg!(&result); - dbg!(result.short()); - assert_eq!(24, result.score); - assert_eq!("MMM", &result.short()); - } - - #[test] - fn global() { - let alphabet = Alphabet::default(); - let a = vec![A, F, G, G, E, W]; - let b = vec![F, G, G, D]; - let result = align(&a, &b, &alphabet, Type::Global); - dbg!(&result); - println!("{}", result.summary()); - assert_eq!(13, result.score); - assert_eq!("DMMMDM", &result.short()); - assert_eq!(0, result.start_a, "A global alignment should start at 0"); - } - - #[test] - fn global_for_b() { - let alphabet = Alphabet::default(); - let a = vec![A, F, G, G, E, W]; - let b = vec![F, G, G, D]; - let result = align(&a, &b, &alphabet, Type::GlobalForB); - dbg!(&result); - dbg!(result.short()); - assert_eq!(23, result.score); - assert_eq!("MMMM", &result.short()); - assert_eq!(0, result.start_b, "A global alignment should start at 0"); - } -} diff --git a/src-tauri/alignment/src/alphabet.rs b/src-tauri/alignment/src/alphabet.rs deleted file mode 100644 index 6b58ea0..0000000 --- a/src-tauri/alignment/src/alphabet.rs +++ /dev/null @@ -1,283 +0,0 @@ -use crate::aminoacid::AminoAcid; -use crate::aminoacid::AminoAcid::*; -use itertools::Itertools; - -/// An alphabet to determine the score of two amino acid sets -pub struct Alphabet { - array: Vec>, -} - -impl std::ops::Index<(&[AminoAcid], &[AminoAcid])> for Alphabet { - type Output = i8; - fn index(&self, index: (&[AminoAcid], &[AminoAcid])) -> &Self::Output { - &self.array[get_index(index.0)][get_index(index.1)] - } -} - -fn get_index_ref(set: &[&AminoAcid]) -> usize { - set.iter() - .fold(0, |acc, item| acc * AminoAcid::MAX + **item as usize) -} - -fn get_index(set: &[AminoAcid]) -> usize { - set.iter() - .fold(0, |acc, item| acc * AminoAcid::MAX + *item as usize) -} - -impl Alphabet { - /// The number of steps to trace back, if updated a lot of other code has to be updated as well - pub const STEPS: usize = 3; -} - -#[repr(i8)] -#[derive(Clone, Default, Debug)] -pub enum Scoring { - /// The score for identity, should be the highest score of the bunch - Identity = 8, - /// The score for a mismatch - #[default] - Mismatch = -1, - /// The score for an iso mass set, eg Q<>AG - IsoMass = 5, - /// The score for a modification - Modification = 3, - /// The score for a switched set, defined as this value times the size of the set (eg AG scores 4 with GA) - Switched = 2, - /// The score for scoring a gap, should be less than `MISMATCH` - GapStartPenalty = -5, - GapExtendPenalty = -3, -} - -#[allow(clippy::too_many_lines)] -impl Default for Alphabet { - fn default() -> Self { - macro_rules! sets { - ($($($($id:ident),+);+)|+) => { - vec![ - $(vec![ - $(vec![$($id),+],)+ - ],)+ - ] - }; - } - - #[allow(clippy::cast_possible_truncation)] - // STEPS is always within bounds for u32 - let mut alphabet = Self { - array: vec![ - vec![0; (AminoAcid::MAX + 1).pow(Self::STEPS as u32)]; - (AminoAcid::MAX + 1).pow(Self::STEPS as u32) - ], - }; - - for x in 0..=AminoAcid::MAX { - for y in 0..=AminoAcid::MAX { - alphabet.array[x][y] = if x == y { - Scoring::Identity as i8 - } else { - Scoring::Mismatch as i8 - }; - } - } - let iso_mass = sets!( - I; L| - N; G,G| - Q; A,G| - A,V; G,L; G,I| - A,N; Q,G; A,G,G| - L,S; I,S; T,V| - A,M; C,V| - N,V; A,A,A; G,G,V| - N,T; Q,S; A,G,S; G,G,T| - L,N; I,N; Q,V; A,G,V; G,G,L; G,G,I| - D,L; D,I; E,V| - Q,T; A,A,S; A,G,T| - A,Y; F,S| - L,Q; I,Q; A,A,V; A,G,L; A,G,I| - N,Q; A,N,G; Q,G,G| - K,N; G,G,K| - E,N; D,Q; A,D,G; E,G,G| - D,K; A,A,T; G,S,V| - M,N; A,A,C; G,G,M| - A,S; G,T| - A,A,L; A,A,I; G,V,V| - Q,Q; A,A,N; A,Q,G| - E,Q; A,A,D; A,E,G| - E,K; A,S,V; G,L,S; G,I,S; G,T,V| - M,Q; A,G,M; C,G,V| - A,A,Q; N,G,V - ); - - for set in iso_mass { - for set in set.iter().permutations(2) { - let a = set[0]; - let b = set[1]; - for seq_a in a.iter().permutations(a.len()) { - for seq_b in b.iter().permutations(b.len()) { - alphabet.array[get_index_ref(&seq_a)][get_index_ref(&seq_b)] = - Scoring::IsoMass as i8; - } - } - } - } - - let modifications = sets!( - //N;D| // Amidation only at N term - Q;E| // Deamidation - D;N| // Deamidation - C;T| // Disulfide bond - T;D| // Methylation - S;T| // Methylation - D;E| // Methylation - R;A,V;G,L| // Methylation - Q;A,A // Methylation - ); - - for set in modifications { - let a = &set[0]; - for seq_b in set.iter().skip(1) { - alphabet.array[get_index(a)][get_index(seq_b.as_slice())] = - Scoring::Modification as i8; - } - } - - let amino_acids = (1..=AminoAcid::MAX) - .map(|a| AminoAcid::try_from(a).unwrap()) - .collect_vec(); - for size in 2..=Self::STEPS { - for set in amino_acids - .iter() - .combinations_with_replacement(size) - .flat_map(|v| v.into_iter().permutations(size)) - { - if set.iter().all(|v| *v == set[0]) { - continue; // Do not add [A, A] or [A, A, A] etc as SWITCHED - } - #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] - // set.len() is at max equal to Self::STEPS - for switched in set.clone().into_iter().permutations(size) { - alphabet.array[get_index_ref(&set)][get_index_ref(&switched)] = - Scoring::Switched as i8 * set.len() as i8; - } - } - } - - alphabet - } -} - -#[cfg(test)] -mod tests { - use super::{Alphabet, Scoring}; - use crate::aminoacid::AminoAcid::*; - - #[test] - fn identity() { - let alphabet = Alphabet::default(); - assert_eq!( - Scoring::Identity as i8, - alphabet[([A].as_slice(), [A].as_slice())] - ); - assert_eq!(0, alphabet[([A, A].as_slice(), [A, A].as_slice())]); - assert_eq!(0, alphabet[([A, A, A].as_slice(), [A, A, A].as_slice())]); - } - - #[test] - fn similarity() { - let alphabet = Alphabet::default(); - assert_eq!( - Scoring::IsoMass as i8, - alphabet[([I].as_slice(), [L].as_slice())] - ); - assert_eq!( - Scoring::IsoMass as i8, - alphabet[([N].as_slice(), [G, G].as_slice())] - ); - assert_eq!( - Scoring::IsoMass as i8, - alphabet[([G, G].as_slice(), [N].as_slice())] - ); - assert_eq!( - Scoring::IsoMass as i8, - alphabet[([A, S].as_slice(), [G, T].as_slice())] - ); - assert_eq!( - Scoring::IsoMass as i8, - alphabet[([S, A].as_slice(), [G, T].as_slice())] - ); - assert_eq!( - Scoring::IsoMass as i8, - alphabet[([S, A].as_slice(), [T, G].as_slice())] - ); - assert_eq!( - Scoring::IsoMass as i8, - alphabet[([A, S].as_slice(), [T, G].as_slice())] - ); - assert_eq!( - Scoring::IsoMass as i8, - alphabet[([L, Q].as_slice(), [A, V, A].as_slice())] - ); - } - - #[test] - fn inequality() { - let alphabet = Alphabet::default(); - assert_eq!( - Scoring::Mismatch as i8, - alphabet[([I].as_slice(), [Q].as_slice())] - ); - assert_eq!(0, alphabet[([Q].as_slice(), [G, G].as_slice())]); - assert_eq!(0, alphabet[([A, E].as_slice(), [G, T].as_slice())]); - assert_eq!(0, alphabet[([E, Q].as_slice(), [A, V, A].as_slice())]); - } - - #[test] - fn switched() { - let alphabet = Alphabet::default(); - assert_eq!( - Scoring::Switched as i8 * 2, - alphabet[([E, Q].as_slice(), [Q, E].as_slice())] - ); - assert_eq!( - Scoring::Switched as i8 * 3, - alphabet[([D, A, C].as_slice(), [A, C, D].as_slice())] - ); - assert_eq!( - Scoring::Switched as i8 * 3, - alphabet[([C, D, A].as_slice(), [A, C, D].as_slice())] - ); - assert_eq!( - Scoring::Switched as i8 * 3, - alphabet[([A, C, D].as_slice(), [D, A, C].as_slice())] - ); - assert_eq!( - Scoring::Switched as i8 * 3, - alphabet[([C, D, A].as_slice(), [D, A, C].as_slice())] - ); - assert_eq!( - Scoring::Switched as i8 * 3, - alphabet[([A, C, D].as_slice(), [C, D, A].as_slice())] - ); - assert_eq!( - Scoring::Switched as i8 * 3, - alphabet[([D, A, C].as_slice(), [C, D, A].as_slice())] - ); - assert_eq!( - Scoring::Switched as i8 * 3, - alphabet[([V, A, A].as_slice(), [A, V, A].as_slice())] - ); - } - - #[test] - fn modification() { - let alphabet = Alphabet::default(); - assert_eq!( - Scoring::Modification as i8, - alphabet[([D].as_slice(), [N].as_slice())] - ); - assert_eq!( - Scoring::Mismatch as i8, - alphabet[([N].as_slice(), [D].as_slice())] - ); - } -} diff --git a/src-tauri/alignment/src/aminoacid.rs b/src-tauri/alignment/src/aminoacid.rs deleted file mode 100644 index 71c5ffd..0000000 --- a/src-tauri/alignment/src/aminoacid.rs +++ /dev/null @@ -1,170 +0,0 @@ -use itertools::Itertools; -use std::fmt::Display; - -/// All aminoacids -#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)] -pub enum AminoAcid { - /// Alanine - A = 1, - /// Arginine - R, - /// Asparagine - N, - /// Aspartic acid - D, - /// Cysteine - C, - /// Glutamine - Q, - /// Glutamic acid - E, - /// Glycine - G, - /// Histidine - H, - /// Isoleucine - I, - /// Leucine - L, - /// Lysine - K, - /// Methionine - M, - /// Phenylalanine - F, - /// Proline - P, - /// Serine - S, - /// Threonine - T, - /// Tryptophan - W, - /// Tyrosine - Y, - /// Valine - V, - /// Weird - B, - /// Also weird - Z, - /// Single gap - X, - /// Longer gap - Gap, -} - -impl AminoAcid { - /// The total number of normal amino acids (disregards Gap) - pub const MAX: usize = 23; -} - -impl Display for AminoAcid { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(match self { - Self::A => "A", - Self::R => "R", - Self::N => "N", - Self::D => "D", - Self::C => "C", - Self::Q => "Q", - Self::E => "E", - Self::G => "G", - Self::H => "H", - Self::I => "I", - Self::L => "L", - Self::K => "K", - Self::M => "M", - Self::F => "F", - Self::P => "P", - Self::S => "S", - Self::T => "T", - Self::W => "W", - Self::Y => "Y", - Self::V => "V", - Self::B => "B", - Self::Z => "Z", - Self::X => "X", - Self::Gap => "*", - }) - } -} - -impl TryFrom for AminoAcid { - type Error = (); - fn try_from(num: usize) -> Result { - match num { - 1 => Ok(Self::A), - 2 => Ok(Self::R), - 3 => Ok(Self::N), - 4 => Ok(Self::D), - 5 => Ok(Self::C), - 6 => Ok(Self::Q), - 7 => Ok(Self::E), - 8 => Ok(Self::G), - 9 => Ok(Self::H), - 10 => Ok(Self::I), - 11 => Ok(Self::L), - 12 => Ok(Self::K), - 13 => Ok(Self::M), - 14 => Ok(Self::F), - 15 => Ok(Self::P), - 16 => Ok(Self::S), - 17 => Ok(Self::T), - 18 => Ok(Self::W), - 19 => Ok(Self::Y), - 20 => Ok(Self::V), - 21 => Ok(Self::B), - 22 => Ok(Self::Z), - 23 => Ok(Self::X), - 24 => Ok(Self::Gap), - _ => Err(()), - } - } -} - -impl TryFrom for AminoAcid { - type Error = (); - fn try_from(value: char) -> Result { - match value { - 'A' => Ok(Self::A), - 'R' => Ok(Self::R), - 'N' => Ok(Self::N), - 'D' => Ok(Self::D), - 'C' => Ok(Self::C), - 'Q' => Ok(Self::Q), - 'E' => Ok(Self::E), - 'G' => Ok(Self::G), - 'H' => Ok(Self::H), - 'I' => Ok(Self::I), - 'L' => Ok(Self::L), - 'K' => Ok(Self::K), - 'M' => Ok(Self::M), - 'F' => Ok(Self::F), - 'P' => Ok(Self::P), - 'S' => Ok(Self::S), - 'T' => Ok(Self::T), - 'W' => Ok(Self::W), - 'Y' => Ok(Self::Y), - 'V' => Ok(Self::V), - 'B' => Ok(Self::B), - 'Z' => Ok(Self::Z), - 'X' => Ok(Self::X), - '*' => Ok(Self::Gap), - _ => Err(()), - } - } -} - -/// Create an aminoacid sequence from a string, just ignores any non aminoacids characters -pub fn sequence_from_string(value: &str) -> Vec { - value - .chars() - .filter_map(|v| AminoAcid::try_from(v).ok()) - .collect() -} - -/// Generate a string from a sequence of aminoacids -pub fn sequence_to_string(value: &[AminoAcid]) -> String { - value.iter().map(std::string::ToString::to_string).join("") -} diff --git a/src-tauri/alignment/src/bin.rs b/src-tauri/alignment/src/bin.rs deleted file mode 100644 index a75f168..0000000 --- a/src-tauri/alignment/src/bin.rs +++ /dev/null @@ -1,102 +0,0 @@ -#![allow(dead_code)] -#![warn(clippy::pedantic, clippy::nursery, clippy::all)] -#![allow(clippy::enum_glob_use, clippy::wildcard_imports)] -use mass_alignment::template::Template; -use mass_alignment::*; - -fn main() { - let alphabet = Alphabet::default(); - //let template = aminoacid::sequence_from_string("XXXXXXXXXXXXXXXXXXXXYFDYWGQGTLVTVSS"); - let template = mass_alignment::sequence_from_string("EVQLVESGGGLVQPGGSLRLSCAASGFTVSSNYMSWVRQAPGKGLEWVSVIYSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARXXXXXXXXXXXXXXXXXXXX"); - let reads: Vec> = [ - //"SRWGGDGFYAMDYWGQGTLVTV", - //"DWNGFYAMDYWGQGTLVTVSS", - //"RWGGDGFYAMDYWGQGTLVTV", - //"HVPHGDGFYAMDYWGQGTLVT", - //"WRGGDGFYAMDYWGQGTLVT", - //"SRWGGDGFYAMDYWGQGTLV", - //"RWGGDGFYAMDYWGQGTLVT", - //"WRNDGFYAMDYWGQGTLVT", - //"RWGGDGFYAMDYWGQGTLV", - //"MARNDGFYAMDYWGQGTLV", - //"RWNDGFYAMDYWGQGTLV", - //"SRWGGNGFYWDYWGQGT", - //"RWNDGFYWDYWGQGT", - //"DYWGQGTLVVTSS", - //"DYWGQGTLVTVSS", - //"DYWGQGTLVTV", - //"DYWGQGTLVT", - //"WGQGTLVT", - "DLQLVESGGGLVGAKSPPGTLSAAASGFNL", - "DLQLVESGGGLVGAKSPPGTLSAAASGFNL", - "EVQLVESGGGLVQPGGSLSGAKYHSGFNL", - "EVVQLVESGGGLVQPGGSLGVLSCAASGF", - "DLQLVESGGGLVQPGGSLGVLSCAASGF", - "DLQLVESGGGLVQPGTPLYWNAASGFNL", - "DLQLVESGGGLVQPGGSLRLSCAASGF", - "QVQLVESGGGLVQPGGSLRLSCAASGF", - "EVQLVESGGGLPVQGGSLRLSCAADGF", - "EVQLVESGGGLVQPGGSLRLSCAASGF", - "EVQLVSGEGGLVQPGGSLRLSCAASGF", - "QVELVESGGGLVQPGGSLRLSCAASGF", - "TLSADTSKNTAYLQMNSLRAEDTAVY", - "RFTLSADTSKNTAYLQMNSLRAEDTA", - "QLVESGGGLVQPGGSLTHVAGAGHSGF", - "SADTSKNTAYLQMNSLRAEDTAVYY", - "LMLTDGYTRYADSVKGRFTLSADTS", - "QLVESGGGLVQPGGSLRLSCAASGF", - "QLVESGGGLVQPGGSLRLSCQTGF", - "LVESGGGLVQPNSLRLSCAASGF", - ] - .into_iter() - .map(mass_alignment::sequence_from_string) - .collect(); - - let template = Template::new( - template, - reads.iter().map(std::vec::Vec::as_slice).collect(), - &alphabet, - Type::GlobalForB, - ); - let content = format!( - " - - - - - - - -
-
{} -
-
- -", - template.generate_html() - ); - std::fs::write("test.html", content).unwrap(); -} diff --git a/src-tauri/alignment/src/lib.rs b/src-tauri/alignment/src/lib.rs deleted file mode 100644 index 720fa4f..0000000 --- a/src-tauri/alignment/src/lib.rs +++ /dev/null @@ -1,42 +0,0 @@ -//! An algorithm based on Needleman Wunsch/Smith Waterman but extended to allow for mass based alignment. -//! The mass based part gives the option to match two sets of aminoacids with different sizes. -//! For example the set {Q} matches {AG} because these have the same mass and so are commonly misclassified -//! is de novo sequencing for peptides. Besides iso mass definitions it also handles swaps with finesse, -//! meaning that {AG} matches {GA} with a well defined score to allow for these mistakes to be fixed. The -//! last important addition is the handling of post translational modifications meaning that {Q} matches {E} -//! but not the other way around to allow for deamidation of the sample in reference to the template. -//! -//! ```rust -//! use mass_alignment::*; -//! use mass_alignment::AminoAcid::*; -//! -//! let alphabet = Alphabet::default(); -//! let template = &[A,G,Q,S,T,Q]; -//! let query = &[Q,E,S,W]; -//! let result = align(template, query, &alphabet, Type::GlobalForB); -//! println!("{}", result.summary()); -//! assert_eq!(15, result.score) -//! ``` - -#![allow(dead_code)] -#![warn(clippy::pedantic, clippy::nursery, clippy::all, missing_docs)] -#![allow( - clippy::enum_glob_use, - clippy::wildcard_imports, - clippy::must_use_candidate -)] -/// The module containing all alignment handling -mod alignment; -/// The module containing all alphabet handling -mod alphabet; -/// The module containing the definition for aminoacids -mod aminoacid; -/// The module containing the definition for templates -pub mod template; - -pub use crate::alignment::align; -pub use crate::alignment::*; -pub use crate::alphabet::Alphabet; -pub use crate::aminoacid::sequence_from_string; -pub use crate::aminoacid::sequence_to_string; -pub use crate::aminoacid::AminoAcid; diff --git a/src-tauri/alignment/src/template.rs b/src-tauri/alignment/src/template.rs deleted file mode 100644 index d94f6e3..0000000 --- a/src-tauri/alignment/src/template.rs +++ /dev/null @@ -1,128 +0,0 @@ -use crate::alignment::{self, Alignment}; -use crate::alphabet::Scoring; -use crate::aminoacid::{self, AminoAcid}; -use crate::{align, Alphabet}; -use itertools::Itertools; -use std::fmt::Write; - -/// A template that is matched with many reads -pub struct Template { - /// The sequence of this template - pub sequence: Vec, - /// The reads matched to this template - pub reads: Vec, -} - -impl Template { - /// Create a new template by matching the given reads to the given template sequence - pub fn new( - sequence: Vec, - reads: Vec<&[AminoAcid]>, - alphabet: &Alphabet, - alignment_type: alignment::Type, - ) -> Self { - Self { - reads: reads - .into_iter() - .map(|v| align(&sequence, v, alphabet, alignment_type)) - .collect(), - sequence, - } - } - - /// Generate HTML for a reads alignment, all styling is missing and it is only a small part of a document - pub fn generate_html(&self) -> String { - let mut insertions = vec![0; self.sequence.len()]; - for read in &self.reads { - let mut loc_a = read.start_a; - let mut insertion = 0; - - for piece in &read.path { - if piece.step_a == 0 && piece.step_b == 1 { - insertion += 1; - } else if insertion != 0 { - insertions[loc_a] = std::cmp::max(insertions[loc_a], insertion); - insertion = 0; - } else { - insertion = 0; - } - loc_a += piece.step_a as usize; - } - } - - let mut output = format!("
1....
", insertions.iter().sum::() + self.sequence.len()); - for (ins, seq) in insertions.iter().zip(&self.sequence) { - let _ = write!(output, "{:-<1$}{2}", "", ins, seq); - } - let _ = write!(output, "
"); - - for read in &self.reads { - let _ = write!( - output, - "
", - insertions[0..read.start_a].iter().sum::() + read.start_a + 1, - insertions[0..read.start_a + read.len_a()] - .iter() - .sum::() - + read.start_a - + read.len_a() - + 3 - ); - let mut loc_a = read.start_a; - let mut loc_b = read.start_b; - let mut insertion = 0; - for piece in &read.path { - if piece.step_a == 0 && piece.step_b == 1 { - insertion += 1; - } else { - let _ = write!(output, "{:-<1$}", "", insertions[loc_a] - insertion); - insertion = 0; - } - let _ = write!( - output, - "{}", - match (piece.step_a, piece.step_b) { - (0 | 1, 1) => read.seq_b[loc_b].to_string(), - (1, 0) => "-".to_string(), - #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] - // a is defined to be in range 0..=Alphabet::STEPS - (a, b) => { - let inner = if a == b { - // As a equals b it is a swap or iso length iso mass sets, add in the missing insertions (if any) - // Because a equals b the length of the sequence patch and insertion patch is always equal. - // This means that the resulting insertions makes the text nicely aligned. - read.seq_b[loc_b..loc_b + b as usize] - .iter() - .zip(&insertions[loc_a..loc_a + a as usize]) - .map(|(sb, sa)| format!("{:->1$}", sb.to_string(), sa + 1)) - .join("") - } else { - aminoacid::sequence_to_string( - &read.seq_b[loc_b..loc_b + b as usize], - ) - }; - format!( - "{}", - if a == b && piece.local_score == Scoring::Switched as i8 * a as i8 - { - " swap" - } else { - "" - }, - inner.len(), - insertions[loc_a..loc_a + a as usize].iter().sum::() - + a as usize, - inner - ) - } - } - ); - loc_a += piece.step_a as usize; - loc_b += piece.step_b as usize; - } - let _ = write!(output, "
"); - } - let _ = write!(output, "
"); - output - } -} diff --git a/src-tauri/build.rs b/src-tauri/build.rs index 76b12c6..2eca015 100644 --- a/src-tauri/build.rs +++ b/src-tauri/build.rs @@ -15,7 +15,7 @@ r#" - Stitch+Ox + Annotator @@ -23,112 +23,59 @@ r#" - -
- Spectra -
-

Load spectra

- - - -
-
-

Annotate

- - - - - - - - - - - - -
- Custom model -

Ion

-

Location

-

Loss

"#).unwrap(); +
+

Load spectra

+ + + +
+
+

Annotate

+ + + + + + + + + + + + +
+ Custom model +

Ion

+

Location

+

Loss

"#).unwrap(); for ion in ["a", "b", "c", "d", "v", "w", "x", "y", "z"] { write!( writer, r#" -
- - - -
- "#, +
+ + + +
+ "#, ion ) .unwrap(); @@ -136,19 +83,18 @@ LVESGGGLVQPNSLRLSCAASGF write!( writer, r#" - -
- - - -
-

-    
-
- Logs -
-

-    
+ +
+ + + +
+

+  
+
+ Logs +
+

   
diff --git a/src-tauri/src/main.rs b/src-tauri/src/main.rs index fc8d936..31cc6f0 100644 --- a/src-tauri/src/main.rs +++ b/src-tauri/src/main.rs @@ -3,99 +3,15 @@ windows_subsystem = "windows" )] -use itertools::Itertools; -use mass_alignment::{template::Template, *}; -use pdbtbx::*; use rustyms::{e, Charge, Location, Model, NeutralLoss}; use rustyms::{mz, MassOverCharge}; use state::State; -use std::collections::HashMap; use std::sync::Mutex; mod html_builder; mod render; mod state; -// Learn more about Tauri commands at https://tauri.app/v1/guides/features/command -#[tauri::command] -fn align_sequences(template: &str, reads: &str, alignment_type: &str) -> String { - let alphabet = Alphabet::default(); - let template = sequence_from_string(template); - let reads: Vec> = reads.split('\n').map(sequence_from_string).collect(); - let alignment_type = match alignment_type { - "1" => Type::Local, - "2" => Type::GlobalForB, - "3" => Type::Global, - _ => panic!("Incorrect alignment type"), - }; - - let result = Template::new( - template, - reads.iter().map(|a| a.as_slice()).collect(), - &alphabet, - alignment_type, - ); - result.generate_html() -} - -#[tauri::command] -fn load_cif(path: &str, min_length: usize, warn: bool) -> Result<(String, String), String> { - let result = open(path, StrictnessLevel::Loose); - if let Ok(file) = result { - let warnings = file.1.into_iter().map(|err| format!("{}", err)).join("\n"); - let pdb = file.0; - let mut found_unknown = HashMap::new(); - let output = pdb - .chains() - .map(|c| { - c.conformers() - .filter_map(|a| { - match AMINO_ACIDS - .iter() - .position(|err| *err == a.name()) - .and_then(|v| AMINO_ACIDS_CHAR.get(v)) - { - Some(s) => Some(s), - None => { - if warn && !IGNORE_LIST.contains(&a.name()) { - found_unknown.insert( - a.name(), - 1 + found_unknown.get(a.name()).unwrap_or(&0), - ); - }; - None - } - } - }) - .collect::() - }) - .filter(|a| a.len() >= min_length) - .join("\n"); - let warnings = warnings + "\n" + &found_unknown.into_iter().map(|name| { - format!( - "{}", - PDBError::new( - ErrorLevel::GeneralWarning, - "Unrecognised residue", - format!( - "This name was not recognised as an Amino Acid or common solvent. It was found {} time{}.", - name.1, - if name.1 != 1 { "s" } else { "" } - ), - Context::show(name.0), - ) - ) - }).join("\n"); - Ok((output, warnings)) - } else { - Err(result - .unwrap_err() - .into_iter() - .map(|a| format!("{}", a)) - .collect()) - } -} - type ModifiableState<'a> = tauri::State<'a, std::sync::Mutex>; // Learn more about Tauri commands at https://tauri.app/v1/guides/features/command @@ -173,32 +89,12 @@ fn annotate_spectrum( )) } -/// All amino acids. Includes Amber-specific naming conventions for (de-)protonated versions, CYS involved in -/// disulfide bonding and the like. -const AMINO_ACIDS: &[&str] = &[ - "ALA", "ARG", "ASH", "ASN", "ASP", "ASX", "CYS", "CYX", "GLH", "GLN", "GLU", "GLY", "HID", - "HIE", "HIM", "HIP", "HIS", "ILE", "LEU", "LYN", "LYS", "MET", "PHE", "PRO", "SER", "THR", - "TRP", "TYR", "VAL", "SEC", "PYL", -]; - -const AMINO_ACIDS_CHAR: &[char] = &[ - 'A', 'R', 'N', 'N', 'D', 'B', 'C', 'C', 'Q', 'Q', 'E', 'G', 'H', 'H', 'H', 'H', 'H', 'I', 'L', - 'K', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', 'U', 'O', -]; - -const IGNORE_LIST: &[&str] = &["HOH", "WAT", "ADP", "DMS"]; // Common solvents I recognised - fn main() { tauri::Builder::default() .manage(Mutex::new(State { spectra: Vec::new(), })) - .invoke_handler(tauri::generate_handler![ - align_sequences, - load_cif, - load_mgf, - annotate_spectrum - ]) + .invoke_handler(tauri::generate_handler![load_mgf, annotate_spectrum]) .run(tauri::generate_context!()) .expect("error while running tauri application"); } diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index 792234f..47fd1f0 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -7,8 +7,8 @@ "withGlobalTauri": true }, "package": { - "productName": "stitch-oxide", - "version": "0.0.0" + "productName": "annotator", + "version": "0.1.0" }, "tauri": { "allowlist": { @@ -32,7 +32,7 @@ "icons/icon.icns", "icons/icon.ico" ], - "identifier": "com.stitch.stitch-oxide", + "identifier": "com.snijderlab.annotator", "longDescription": "", "macOS": { "entitlements": null, @@ -62,7 +62,7 @@ "height": 600, "resizable": true, "alwaysOnTop": false, - "title": "Stitch[+Oxide]", + "title": "Annotator", "width": 800 } ] diff --git a/src/main.js b/src/main.js index 1db9926..829999d 100644 --- a/src/main.js +++ b/src/main.js @@ -1,25 +1,5 @@ const { invoke } = window.__TAURI__.tauri; -let sequenceInputA; -let sequenceInputB; -let sequenceType; -let alignmentScore; - -async function align() { - alignmentScore.innerHTML = await invoke("align_sequences", { template: sequenceInputA.value, reads: sequenceInputB.value, alignmentType: sequenceType.value }); -} - -async function load_cif() { - try { - var result = await invoke("load_cif", { path: document.querySelector("#load-path").value, minLength: Number(document.querySelector("#load-min-length").value), warn: true }); - sequenceInputB.value = result[0]; - document.querySelector("#error-log").innerText = result[1]; - } catch (error) { - console.log(error); - document.querySelector("#error-log").innerText = error; - } -} - async function load_mgf() { try { let result = await invoke("load_mgf", { path: document.querySelector("#load-mgf-path").dataset.filepath });