diff --git a/src/process.rs b/src/process.rs index 2c45f803..e36de8a2 100644 --- a/src/process.rs +++ b/src/process.rs @@ -4,7 +4,7 @@ use crate::stemcell::{assign_background_mutations, StemCell}; use crate::subclone::{ save_variant_fraction, CloneId, Distributions, Fitness, SubClones, Variants, }; -use crate::tree::PhyloTree; +use crate::tree::{save_tree, PhyloTree}; use crate::{ProbsPerYear, MAX_SUBCLONES, TIME_AT_BIRTH}; use anyhow::{ensure, Context}; use chrono::Utc; @@ -503,20 +503,13 @@ impl Moran { &self.make_path(Stats2Save::VariantFraction, nb_cells, time)?, self.verbosity, )?; - if self.verbosity > 0 { - println!("Saving the tree"); - } - let mut pruned = self - .tree - .create_tree_without_dead_cells(&cells, self.verbosity)?; - pruned.name_leaves_with_cell_idx()?; - if self.verbosity > 0 { - println!("Name leaf nodes with idx of cells"); - } - pruned.tree.to_file( + save_tree( + &self.tree, + &cells, &self .make_path(Stats2Save::Tree, nb_cells, time)? .with_extension("txt"), + self.verbosity, )?; } @@ -557,6 +550,11 @@ impl Moran { rng, self.verbosity, ); + // update edges of tree as well + let node = self.tree.get_mut_leaf(stem_cell.id)?; + node.parent_edge = node + .parent_edge + .map(|edge| edge + stem_cell.variants.len() as f64); } } let population = self.subclones.get_cells_with_clones_idx(); diff --git a/src/proliferation.rs b/src/proliferation.rs index 16720797..4cffa09e 100644 --- a/src/proliferation.rs +++ b/src/proliferation.rs @@ -1,4 +1,4 @@ -use anyhow::Context; +use anyhow::{ensure, Context}; use rand::Rng; use rand_distr::{Bernoulli, Distribution}; @@ -94,7 +94,7 @@ impl Proliferation { .interdivision_time(time) .with_context(|| "wrong interdivision time") .unwrap()) as f64; - // the fit variant is sampled here + // the fit variant is sampled assuming a background mutation let clone_id = next_clone(subclones, proliferating_subclone, p, rng, verbosity); if verbosity > 1 { println!("proliferation at time {}", time); @@ -106,7 +106,8 @@ impl Proliferation { println!("cell {:#?} is dividing", stem_cell); } } - let mutational_distance = stem_cell.variants.len(); + // the mutations of the ancestral cell + let nb_muts_old = stem_cell.variants.len(); if let NeutralMutations::UponDivisionAndBackground = self.neutral_mutation { assign_background_mutations( &mut stem_cell, @@ -116,10 +117,8 @@ impl Proliferation { verbosity, ); } - if verbosity > 1 { - println!("Adding the cell on a new node"); - } - match self.division { + // perform the division + let cells = match self.division { Division::Asymmetric(prob) => { if prob.sample(rng) { if verbosity > 1 { @@ -131,122 +130,116 @@ impl Proliferation { rng, verbosity, ); - stem_cell - .set_last_division_time(time) - .with_context(|| "wrong time") - .unwrap(); - tree.put_cell_on_new_node( - stem_cell.id, - (stem_cell.variants.len() - mutational_distance) as f64, - verbosity, - )?; - subclones - .get_mut_clone_unchecked(clone_id) - .assign_cell(stem_cell); + let mutational_distance = (stem_cell.variants.len() - nb_muts_old) as f64; + ensure!( + tree.put_cell_on_new_node(stem_cell.id, mutational_distance, verbosity)?, + "the proliferating cell is not registered as a leaf" + ); + vec![stem_cell] } else { - assign_divisional_mutations( + let new_cell = self.symmtetric_division_helper( &mut stem_cell, - &distributions.neutral_poisson, + nb_muts_old, + tree, + distributions, rng, verbosity, - ); - stem_cell - .set_last_division_time(time) - .with_context(|| "wrong time") - .unwrap(); - tree.put_cell_on_new_node( - stem_cell.id, - (stem_cell.variants.len() - mutational_distance) as f64, - verbosity, )?; - - let mut new_cell = stem_cell.clone(); - self.last_id += 1; - new_cell.id = self.last_id; - if verbosity > 1 { - println!( - "adding the sibling cell with id {} on a new node", - self.last_id - ); - } - assign_divisional_mutations( - &mut new_cell, - &distributions.neutral_poisson, - rng, - verbosity, - ); - new_cell - .set_last_division_time(time) - .with_context(|| "wrong time") - .unwrap(); - tree.assign_sibling( - new_cell.id, - stem_cell.id, - (new_cell.variants.len() - mutational_distance) as f64, - verbosity, - )?; - - subclones - .get_mut_clone_unchecked(clone_id) - .assign_cell(stem_cell); - subclones - .get_mut_clone_unchecked(clone_id) - .assign_cell(new_cell); + vec![stem_cell, new_cell] } } Division::Symmetric => { - assign_divisional_mutations( + let new_cell = self.symmtetric_division_helper( &mut stem_cell, - &distributions.neutral_poisson, + nb_muts_old, + tree, + distributions, rng, verbosity, - ); - stem_cell - .set_last_division_time(time) - .with_context(|| "wrong time") - .unwrap(); - tree.put_cell_on_new_node( - stem_cell.id, - (stem_cell.variants.len() - mutational_distance) as f64, - verbosity, )?; - - let mut new_cell = stem_cell.clone(); - self.last_id += 1; - new_cell.id = self.last_id; - if verbosity > 1 { - println!( - "adding the sibling cell with id {} on a new node", - self.last_id - ); - } - assign_divisional_mutations( - &mut new_cell, - &distributions.neutral_poisson, - rng, - verbosity, - ); - new_cell - .set_last_division_time(time) - .with_context(|| "wrong time") - .unwrap(); - tree.assign_sibling( - new_cell.id, - stem_cell.id, - (new_cell.variants.len() - mutational_distance) as f64, - verbosity, - )?; - - subclones - .get_mut_clone_unchecked(clone_id) - .assign_cell(stem_cell); - subclones - .get_mut_clone_unchecked(clone_id) - .assign_cell(new_cell); + vec![stem_cell, new_cell] } }; + // assign new cells to subclones: if new fit variant present, move cell + // to a new subclone, otherwise reassign both cells to the subclone of + // the ancestral cell (i.e. assume fit variants are only background + // mutations) + for mut stem_cell in cells { + stem_cell + .set_last_division_time(time) + .with_context(|| "wrong time") + .unwrap(); + subclones + .get_mut_clone_unchecked(clone_id) + .assign_cell(stem_cell); + } Ok(()) } + + fn symmtetric_division_helper( + &mut self, + stem_cell: &mut StemCell, + nb_muts_old: usize, + tree: &mut PhyloTree, + distributions: &Distributions, + rng: &mut impl Rng, + verbosity: u8, + ) -> anyhow::Result { + //! Creates a new leaf node to which `stem_cell` will be assigned with + //! a distance equal to the new mutations acquired. + //! The background mutations are not simulated but assumed to be + //! acquired somewhere else, whereas the divisional ones are simulated + //! here. + //! + //! Creates also a new cell by cloning the ancestral cell (i.e. + //! `stem_cell` before assigning divisional mutations) and put this + //! cell on a leaf node whose parent is the node of the ancestral cell. + //! As a results, the new cell will be a sibling of the new `stem_cell`, + //! i.e. the cell with divisional mutations assigned to a new node. + //! + //! ## Returns + //! A new cell which is a sibling of `stem_cell` and daughter of the + //! ancestral cell shared with `stem_cell`. + // clone before assigning divisional mutations + let mut new_cell = stem_cell.clone(); + self.last_id += 1; + new_cell.id = self.last_id; + + assign_divisional_mutations(stem_cell, &distributions.neutral_poisson, rng, verbosity); + // this takes into account also background mutations + let mutational_distance = stem_cell.variants.len() - nb_muts_old; + ensure!( + tree.put_cell_on_new_node(stem_cell.id, mutational_distance as f64, verbosity)?, + "the proliferating cell is not registered as a leaf" + ); + + assign_divisional_mutations( + &mut new_cell, + &distributions.neutral_poisson, + rng, + verbosity, + ); + // nb_muts_old is the same for both cell as it comes from the + // ancestral cell + let mutational_distance = new_cell.variants.len() - nb_muts_old; + if verbosity > 1 { + println!( + "adding the sibling cell with id {} on a new node", + self.last_id + ); + } + // a new cell is not registered as a leaf node yet + ensure!( + !tree.assign_sibling( + new_cell.id, + stem_cell.id, + mutational_distance as f64, + verbosity + )?, + "the new cell is already registered as a leaf" + ); + Ok(new_cell) + } } #[derive(Clone, Debug, Default)] diff --git a/src/stemcell.rs b/src/stemcell.rs index f97f5a5a..3f60f815 100644 --- a/src/stemcell.rs +++ b/src/stemcell.rs @@ -83,12 +83,22 @@ pub fn assign_divisional_mutations( ) { let mutations = neutral_poisson.new_muts_upon_division(rng); if let Some(mutations) = mutations { - if verbosity > 2 { - println!("assigning {:#?} to cell {:#?}", mutations, stem_cell); + if verbosity > 1 { + println!( + "assigning {} divisional mutations to cell {}", + mutations.len(), + stem_cell.id + ); + if verbosity > 2 { + println!( + "assigning {:#?} dividisional mutations to cell {:#?}", + mutations, stem_cell + ); + } } mutate(stem_cell, mutations); - } else if verbosity > 2 { - println!("no mutations to assign to cell {:#?}", stem_cell); + } else if verbosity > 1 { + println!("no divisional mutations to assign to cell {}", stem_cell.id); } } diff --git a/src/tree.rs b/src/tree.rs index 89887463..929d553b 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -1,6 +1,9 @@ -use anyhow::{ensure, Context}; +use anyhow::{bail, ensure, Context}; use phylotree::tree::{Node, NodeId, Tree}; -use std::collections::HashMap; +use std::{ + collections::{HashMap, HashSet}, + path::Path, +}; use crate::stemcell::{StemCell, StemCellId}; @@ -43,64 +46,79 @@ impl PhyloTree { } } - pub fn assign_sibling( + fn add_cell_from_parent_node( &mut self, cell_id: StemCellId, - sibling_id: StemCellId, + parent_id: NodeId, edge: f64, verbosity: u8, - ) -> anyhow::Result<()> { + ) -> anyhow::Result { + //! Returns whether the cell was already present in the system as a + //! leaf node. if verbosity > 1 { println!("create new node"); } - let parent_id = self - .tree - .get(self.leaves.get(&sibling_id).unwrap())? - .parent - .unwrap(); self.last_id += 1; let mut node = Node::new(); node.id = self.last_id; - self.tree.add_child(node, parent_id, Some(edge))?; if verbosity > 1 { println!( - "assign cell with id {} to new node with id {} which is sibling of cell with id {}", - cell_id, self.last_id, sibling_id, + "create new node with id {}, with parent {} located at {} mutational distance", + node.id, parent_id, edge ); } - ensure!(self.leaves.insert(cell_id, self.last_id).is_none()); - Ok(()) + self.tree.add_child(node, parent_id, Some(edge))?; + Ok(self.leaves.insert(cell_id, self.last_id).is_some()) + // .with_context(|| "the proliferating cell is not registered as a leaf")?; } - pub fn put_cell_on_new_node( + pub fn assign_sibling( &mut self, cell_id: StemCellId, + sibling_id: StemCellId, edge: f64, verbosity: u8, - ) -> anyhow::Result<()> { - //! Update the tree with a new node and affect the cell with `cell_id` - //! to it. - if verbosity > 1 { - println!("create new node"); - } - let parent_id = self.tree.get(self.leaves.get(&cell_id).unwrap())?.id; - self.last_id += 1; - let mut node = Node::new(); - node.id = self.last_id; - self.tree.add_child(node, parent_id, Some(edge))?; - - // update leaves + ) -> anyhow::Result { + //! Returns whether the cell was already present in the system as a + //! leaf node. + let sibling = self.tree.get(self.leaves.get(&sibling_id).unwrap())?; + ensure!(sibling.is_tip()); + let parent_id = sibling.parent.unwrap(); if verbosity > 1 { println!( - "assign cell with id {} to new node with id {}", - cell_id, self.last_id + "assign cell with id {} to new node which is sibling of cell with id {}", + cell_id, sibling_id, ); } - self.leaves - .insert(cell_id, self.last_id) - .with_context(|| "the proliferating cell is not registered as a leaf")?; - Ok(()) + self.add_cell_from_parent_node(cell_id, parent_id, edge, verbosity) + } + + pub fn put_cell_on_new_node( + &mut self, + cell_id: StemCellId, + edge: f64, + verbosity: u8, + ) -> anyhow::Result { + //! Update the tree with a new node and affect the cell with `cell_id` + //! to it. + //! + //! ## Returns + //! Whether the cell to add on the new node was already present in the + //! system as a leaf node. + let parent = self + .tree + .get( + self.leaves + .get(&cell_id) + .with_context(|| "cannot get cell from leaves")?, + ) + .with_context(|| "cannot get node from the tree")?; + ensure!( + parent.is_tip(), + format!("parent node {} is not a leaf node", parent) + ); + self.add_cell_from_parent_node(cell_id, parent.id, edge, verbosity) } pub fn remove_cell_from_tree( @@ -109,13 +127,15 @@ impl PhyloTree { verbosity: u8, ) -> anyhow::Result<()> { //! Remove a cell from the tree, do not change the tree's topology as - //! we still the node where the cell was for the other alive lineages. + //! we still need the node which acts as an ancestral cell. if verbosity > 1 { println!("removing cell with id {cell_id} from the leaves"); } - self.leaves + let removed = self + .leaves .remove(&cell_id) .with_context(|| "the cell to remove is not registered as a leaf")?; + ensure!(self.tree.get(&removed)?.is_tip()); Ok(()) } @@ -133,15 +153,30 @@ impl PhyloTree { ) -> anyhow::Result { //! Use `cells` to select only the leaves of interest, dropping all the //! other leaves. - let mut cell2prune = self.leaves.clone(); - cell2prune.retain(|&k, _| !cells.iter().any(|x| x.id == k)); + // select leaves in self.leaves that are in `cells` + let mut leaves = self.leaves.clone(); + leaves.retain(|&k, _| cells.iter().any(|x| x.id == k)); + if verbosity > 0 { + println!("creating new tree with {} cells", cells.len()); + } + // get the node idx of those subset of leaves + let leaves: HashSet = leaves.into_values().collect(); + let mut pruned = self.tree.clone(); - for node_id in cell2prune.values() { - if verbosity > 1 { - println!("removing node with id {}", node_id); + while pruned + .get_leaves() + .iter() + .any(|leaf| !leaves.contains(leaf)) + { + for leaf in pruned.get_leaves() { + if !leaves.contains(&leaf) { + pruned + .prune(&leaf) + .with_context(|| format!("cannot remove node {}", leaf))?; + } } - pruned.prune(node_id)?; } + pruned.compress().with_context(|| "cannot compress")?; // pruned.rescale( // 1. / pruned @@ -154,6 +189,40 @@ impl PhyloTree { leaves: self.leaves.clone(), }) } + + pub fn get_mut_leaf(&mut self, cell_id: StemCellId) -> anyhow::Result<&mut Node> { + let node = self + .tree + .get_mut( + self.leaves + .get_mut(&cell_id) + .with_context(|| "cell not registered as leaf node")?, + ) + .with_context(|| "cannot find leaf tree node")?; + if !node.is_tip() { + bail!("node found is not a leaf node") + } + Ok(node) + } +} + +pub fn save_tree( + tree: &PhyloTree, + cells: &Vec<&StemCell>, + path: &Path, + verbosity: u8, +) -> anyhow::Result<()> { + if verbosity > 0 { + println!("Saving the tree"); + } + let mut pruned = tree.create_tree_without_dead_cells(cells, verbosity)?; + pruned.name_leaves_with_cell_idx()?; + // let pruned = tree.clone(); + if verbosity > 1 { + println!("Name leaf nodes with idx of cells"); + } + pruned.tree.to_file(path)?; + Ok(()) } #[cfg(test)] @@ -298,6 +367,21 @@ mod tests { } } + fn get_leaves_4test( + tree1: &TreeTest, + tree2: &PhyloTree, + cells: Vec<&StemCell>, + ) -> (Vec, Vec) { + let mut leaves_exp = cells + .into_iter() + .map(|cell| *tree1.phylo.leaves.get(&cell.id).unwrap()) + .collect::>(); + leaves_exp.sort_unstable(); + let mut leaves = tree2.tree.get_leaves(); + leaves.sort_unstable(); + (leaves, leaves_exp) + } + #[test] fn create_tree_without_dead_cells_keep_all() { let phylo = TreeTest::new(); @@ -317,12 +401,14 @@ mod tests { .create_tree_without_dead_cells(&cells2keep_ref, 0) .unwrap(); - let mut expected = phylo.phylo.tree; + let mut expected = phylo.phylo.tree.clone(); expected.compress().unwrap(); assert_eq!( pruned.tree.to_newick().unwrap(), expected.to_newick().unwrap() ); + let (leaves, leaves_exp) = get_leaves_4test(&phylo, &pruned, cells2keep_ref); + assert_eq!(leaves, leaves_exp); } #[test] @@ -353,6 +439,8 @@ mod tests { .unwrap(), "((A,(C,E)D)B,Z)F;" ); + let (leaves, leaves_exp) = get_leaves_4test(&phylo, &pruned, cells2keep_ref); + assert_eq!(leaves, leaves_exp); } #[test] @@ -383,6 +471,8 @@ mod tests { .unwrap(), "((A,E)B,(H,Z)G)F;" ); + let (leaves, leaves_exp) = get_leaves_4test(&phylo, &pruned, cells2keep_ref); + assert_eq!(leaves, leaves_exp); } #[test] @@ -413,5 +503,39 @@ mod tests { .unwrap(), "((H,Z)G,(C,E)D)F;" ); + let (leaves, leaves_exp) = get_leaves_4test(&phylo, &pruned, cells2keep_ref); + assert_eq!(leaves, leaves_exp); + } + + #[test] + fn create_tree_without_dead_cells_remove_a_and_c() { + let phylo = TreeTest::new(); + let cells2keep: Vec = phylo + .clone() + .cell_idx + .into_iter() + .map(StemCell::new) + .collect(); + let mut cells2keep_ref: Vec<&StemCell> = Vec::new(); + for cell in cells2keep.iter() { + // remove a and c + if cell.id != 2 && cell.id != 40 { + cells2keep_ref.push(cell); + } + } + let pruned = phylo + .phylo + .create_tree_without_dead_cells(&cells2keep_ref, 0) + .unwrap(); + + assert_eq!( + pruned + .tree + .to_formatted_newick(phylotree::tree::NewickFormat::OnlyNames) + .unwrap(), + "((H,Z)G,E)F;" + ); + let (leaves, leaves_exp) = get_leaves_4test(&phylo, &pruned, cells2keep_ref); + assert_eq!(leaves, leaves_exp); } }