REWRITE EVERYTHING!!!!!!

OMRF · Mar 21, 2024 · e88441b · e88441b
1 parent 98e73af
commit e88441b
Show file tree

Hide file tree

Showing 17 changed files with 451 additions and 206 deletions.
diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml
@@ -17,7 +17,7 @@ tauri-build = { version = "1.5.0", features = [] }
 [dependencies]
 serde_json = "1.0"
 serde = { version = "1.0", features = ["derive"] }
-tauri = { version = "1.5.2", features = [ "app-all", "path-all", "shell-open", "dialog"] }
+tauri = { version = "1.5.2", features = [ "dialog-open", "app-all", "path-all", "shell-open", "dialog"] }
 tokio = { version = "1.32.0", features = ["full"] }
 reqwest = "0.11.20"
 zip-extract = "0.1.2"

diff --git a/src-tauri/src/aggregator.rs b/src-tauri/src/aggregator.rs
@@ -0,0 +1,61 @@
+use std::io::Cursor;
+use std::path::PathBuf;
+use csv::ReaderBuilder;
+use tokio::fs;
+
+pub struct Calculation {
+    pub protein: String,
+    pub peptide: String,
+    pub neh: String,
+    pub charge: String,
+    pub mean: String,
+    pub n_ret_1: String,
+    pub mpe_0: String,
+    pub mpe_1: String,
+    pub two_sd_minus: String,
+    pub n_ret_2: String,
+    pub two_sd_plus: String,
+    pub n_ret_3: String,
+    pub samples_omitted: u64,
+}
+
+pub async fn aggregate(spreadsheets: &Vec<(PathBuf, u64)>) -> Result<Vec<Calculation>, String> {
+    let mut calculations = vec![];
+
+    for spreadsheet in spreadsheets {
+        let mut spreadsheet_calculations = parse_calculations(spreadsheet).await?;
+        calculations.append(&mut spreadsheet_calculations);
+    }
+
+    Ok(calculations)
+}
+
+async fn parse_calculations(spreadsheet: &(PathBuf, u64)) -> Result<Vec<Calculation>, String> {
+    let contents = fs::read(&spreadsheet.0).await.map_err(|err| format!("Error reading calculations file: {}", err))?;
+    let mut rdr = ReaderBuilder::new()
+        .from_reader(Cursor::new(contents));
+
+    let mut calculations = vec![];
+
+    for result in rdr.records() {
+        let record = result.map_err(|err| format!("Error reading record: {}", err))?;
+        let calculation = Calculation {
+            protein: record[0].to_string(),
+            peptide: record[1].trim().to_string(),
+            neh: record[2].to_string(),
+            charge: record[3].to_string(),
+            mean: record[4].to_string(),
+            n_ret_1: record[5].to_string(),
+            mpe_0: record[6].to_string(),
+            mpe_1: record[7].to_string(),
+            two_sd_minus: record[8].to_string(),
+            n_ret_2: record[9].to_string(),
+            two_sd_plus: record[10].to_string(),
+            n_ret_3: record[11].to_string(),
+            samples_omitted: spreadsheet.1,
+        };
+        calculations.push(calculation);
+    }
+
+    Ok(calculations)
+}
diff --git a/src-tauri/src/analyzer.rs b/src-tauri/src/analyzer.rs
@@ -0,0 +1,35 @@
+use std::path::{Path, PathBuf};
+use tokio::process::Command;
+use crate::serializer::Dataset;
+
+
+pub async fn analyze(deps_dir: &Path, data_dir: &Path, datasets: &Vec<Dataset>) -> Result<Vec<(PathBuf, u64)>, String> {
+    let mut results = vec![];
+
+    for dataset in datasets {
+        let result = analyze_single(deps_dir, data_dir, dataset).await?;
+        results.push(result);
+    }
+
+    Ok(results)
+}
+
+async fn analyze_single(deps_dir: &Path, data_dir: &Path, dataset: &Dataset) -> Result<(PathBuf, u64), String> {
+    let mut command = Command::new(deps_dir.join("SRM_Rate.exe")); // TODO: figure out lifetimes here
+
+    command.arg(dataset.heavy_water.to_str().unwrap())
+        .arg(dataset.spreadsheet.to_str().unwrap());
+
+    let input_file_name = dataset.spreadsheet.file_stem().unwrap().to_str().unwrap();
+
+    let output = command
+        .output()
+        .await
+        .map_err(|err| format!("Command couldn't run: {err}"))?;
+
+    if output.status.success() {
+        Ok((data_dir.join(format!("{input_file_name}.RateConst.csv")), dataset.samples_removed))
+    } else {
+        Err(format!("The command didn't complete successfully: {}", String::from_utf8_lossy(&output.stderr)))
+    }
+}
diff --git a/src-tauri/src/commands.rs b/src-tauri/src/commands.rs
@@ -1,12 +1,15 @@
-use crate::processor;
-use reqwest::Client;
-use serde::{Deserialize, Serialize};
-use tokio::io::AsyncWriteExt;
 use std::io::Cursor;
 use std::path::Path;
+
+use reqwest::Client;
 use tauri::api::dialog::blocking::FileDialogBuilder;
 use tokio::fs;
-use crate::processor::parser::parse;
+use crate::aggregator::aggregate;
+use crate::analyzer::analyze;
+
+use crate::grouper::{group_by_na_columns, group_by_peptides};
+use crate::parser::parse;
+use crate::serializer::{serialize, serialize_calculations};
 
 #[tauri::command]
 pub async fn install_dependencies(app_handle: tauri::AppHandle) -> Result<(), String> {
@@ -38,65 +41,11 @@ pub async fn install_dependencies(app_handle: tauri::AppHandle) -> Result<(), St
     Ok(())
 }
 
-#[derive(Serialize, Deserialize, Debug)]
-pub struct File {
-    name: String,
-    path: String,
-}
-
-#[tauri::command]
-pub async fn select_data(data_input_type: String) -> Result<Option<File>, String> {
-    match data_input_type.as_str() {
-        "inputData" => {
-            let file_path = FileDialogBuilder::new()
-                .add_filter("Input Data File", &vec!["csv"])
-                .pick_file();
-
-            if let Some(file_path) = file_path {
-                let file_name = file_path
-                    .file_name()
-                    .unwrap()
-                    .to_string_lossy()
-                    .into_owned();
-
-                return Ok(Some(File {
-                    name: file_name,
-                    path: file_path.to_string_lossy().into_owned(),
-                }));
-            }
-
-            Ok(None)
-        }
-        "heavyWaterInputData" => {
-            let file_path = FileDialogBuilder::new()
-                .add_filter("Heavy Water File", &vec!["txt"])
-                .pick_file();
-
-            if let Some(file_path) = file_path {
-                let file_name = file_path
-                    .file_name()
-                    .unwrap()
-                    .to_string_lossy()
-                    .into_owned();
-
-                return Ok(Some(File {
-                    name: file_name,
-                    path: file_path.to_string_lossy().into_owned(),
-                }));
-            }
-
-            Ok(None)
-        }
-        _ => Err("Invalid data input type".into()),
-    }
-}
-
 #[tauri::command]
 pub async fn process_data(
     app_handle: tauri::AppHandle,
     should_remove_na_calculations: bool,
     input_file_path: String,
-    heavy_water_file_path: String,
 ) -> Result<(), String> {
     let data_dir = app_handle
         .path_resolver()
@@ -109,42 +58,38 @@ pub async fn process_data(
         .unwrap()
         .join("dependencies");
     let input_file_path = Path::new(&input_file_path);
-    let heavy_water_file_path = Path::new(&heavy_water_file_path);
-
-    dbg!(parse(input_file_path).await.unwrap());
-
-    // let output_contents = processor::handle(
-    //     should_remove_na_calculations,
-    //     &data_dir,
-    //     &dependencies_dir,
-    //     &input_file_path,
-    //     heavy_water_file_path,
-    // )
-    // .await?;
-    //
-    // let input_file_name = input_file_path
-    //     .file_stem()
-    //     .unwrap()
-    //     .to_string_lossy()
-    //     .into_owned();
-    //
-    // let file_path = FileDialogBuilder::new()
-    //         .set_file_name(&format!("{input_file_name}.RateConst.csv"))
-    //         .add_filter("Output CSV File", &vec!["csv"])
-    //         .save_file();
-    //
-    // if let Some(file_path) = file_path {
-    //     // overwrite file if it already exists
-    //     fs::OpenOptions::new()
-    //         .write(true)
-    //         .create(true)
-    //         .open(&file_path)
-    //         .await
-    //         .map_err(|err| format!("Failed to write output file: {err}"))?
-    //         .write(&output_contents)
-    //         .await
-    //         .map_err(|err| format!("Failed to write output file: {err}"))?;
-    // }
+
+    let (days, mice, labels, peptides) = parse(input_file_path).await.unwrap();
+
+    let groups = group_by_peptides(peptides);
+    let groups = group_by_na_columns(groups);
+
+    let datasets = serialize(
+        &data_dir,
+        days,
+        mice,
+        labels,
+        groups,
+    ).await.unwrap();
+
+    let calculations = analyze(&dependencies_dir, &data_dir, &datasets).await.unwrap();
+    let calculations = aggregate(&calculations).await.unwrap();
+
+    let input_file_name = input_file_path
+        .file_stem()
+        .unwrap()
+        .to_string_lossy()
+        .into_owned();
+
+    let file_path = FileDialogBuilder::new()
+            .set_file_name(&format!("{input_file_name}.RateConst.csv"))
+            .add_filter("Output CSV File", &vec!["csv"])
+            .save_file();
+
+    if let Some(file_path) = file_path {
+        serialize_calculations(&file_path, &calculations).unwrap();
+    }
+
 
     Ok(())
 }
diff --git a/src-tauri/src/grouper.rs b/src-tauri/src/grouper.rs
@@ -1,4 +1,5 @@
 use std::collections::HashMap;
+
 use crate::parser::Peptide;
 
 #[derive(Debug, Clone)]
@@ -7,7 +8,7 @@ pub struct PeptideGroup {
     pub na_columns: Vec<bool>,
 }
 
-pub fn group_by_peptides(peptides: Vec<Peptide>) -> (Vec<PeptideGroup>, Vec<NAGroup>) {
+pub fn group_by_peptides(peptides: Vec<Peptide>) -> Vec<PeptideGroup> {
     let mut sorted_peptides = peptides;
     sorted_peptides.sort_by(|a, b| {
         a.name.cmp(&b.name).then_with(|| {
@@ -16,7 +17,6 @@ pub fn group_by_peptides(peptides: Vec<Peptide>) -> (Vec<PeptideGroup>, Vec<NAGr
     });
 
     let mut groups: Vec<PeptideGroup> = Vec::new();
-    let mut separated_groups: Vec<NAGroup> = Vec::new();
     let mut current_group: Vec<Peptide> = Vec::new();
 
     for peptide in sorted_peptides {
@@ -29,11 +29,7 @@ pub fn group_by_peptides(peptides: Vec<Peptide>) -> (Vec<PeptideGroup>, Vec<NAGr
             if (peptide.mass_charge_ratio - last_ratio).abs() < threshold {
                 current_group.push(peptide);
             } else {
-                let peptide_group = create_peptide_group(&current_group);
-                separated_groups.push(NAGroup {
-                    groups: vec![peptide_group.clone()],
-                    na_columns: peptide_group.na_columns.clone()
-                });
+                groups.push(create_peptide_group(&current_group));
                 current_group = vec![peptide];
             }
         } else {
@@ -47,7 +43,7 @@ pub fn group_by_peptides(peptides: Vec<Peptide>) -> (Vec<PeptideGroup>, Vec<NAGr
         groups.push(create_peptide_group(&current_group));
     }
 
-    (groups, separated_groups)
+    groups
 }
 
 
@@ -62,7 +58,7 @@ fn dynamic_threshold(peptides: &[Peptide]) -> f64 {
         .sum::<f64>() / (peptides.len() - 1) as f64;
     let std_deviation = variance.sqrt();
 
-    std_deviation // You might want to scale this value based on your data.
+    std_deviation * 2.0 // You might want to scale this value based on your data.
 }
 
 
@@ -85,22 +81,38 @@ fn create_peptide_group(peptides: &[Peptide]) -> PeptideGroup {
 
 #[derive(Debug, Clone)]
 pub struct NAGroup {
-    pub groups: Vec<PeptideGroup>,
+    pub peptides: Vec<Peptide>,
     pub na_columns: Vec<bool>,
 }
 
 pub fn group_by_na_columns(groups: Vec<PeptideGroup>) -> Vec<NAGroup> {
-    let mut na_groups: HashMap<Vec<bool>, Vec<PeptideGroup>> = HashMap::new();
+    let mut na_groups: HashMap<(Vec<bool>, u64), Vec<Peptide>> = HashMap::new();
 
-    // Group peptide groups by similar na_columns
     for group in groups {
-        na_groups.entry(group.na_columns.clone())
-            .or_insert_with(Vec::new)
-            .push(group);
+        let mut count = 1;
+        loop {
+            let key = (group.na_columns.clone(), count);
+            if let Some(peptide_group) = na_groups.get(&key) {
+                let name = group.peptides[0].name.clone();
+
+                if let Some(_) = peptide_group.iter().find(|&x| x.name == name) {
+                    count += 1;
+                } else {
+                    na_groups.entry(key).or_insert_with(Vec::new).extend(group.peptides);
+                    break;
+                }
+            } else {
+                na_groups.insert(key, group.peptides);
+                break;
+            }
+        }
     }
 
     // Convert each group into a NAGroup with na_columns computed
-    na_groups.into_iter().map(|(na_columns, groups)| {
-        NAGroup { groups, na_columns }
+    na_groups.iter().map(|((na_cols, _), peptides)| {
+        NAGroup {
+            peptides: peptides.to_vec(),
+            na_columns: na_cols.to_vec(),
+        }
     }).collect()
 }
diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
@@ -1,2 +1,7 @@
 pub mod commands;
-pub mod processor;
+pub mod processor;
+pub mod parser;
+pub mod grouper;
+pub mod serializer;
+pub mod analyzer;
+pub mod aggregator;
diff --git a/src-tauri/src/main.rs b/src-tauri/src/main.rs
@@ -6,7 +6,7 @@ use app::commands;
 #[tokio::main]
 async fn main() {
   tauri::Builder::default()
-    .invoke_handler(tauri::generate_handler![commands::install_dependencies, commands::process_data, commands::select_data])
+    .invoke_handler(tauri::generate_handler![commands::install_dependencies, commands::process_data])
     .run(tauri::generate_context!())
     .expect("error while running tauri application");
 }