starting to fix the new line character case

lxaw · Nov 9, 2023 · a6c5cb4 · a6c5cb4
1 parent 3faee35
commit a6c5cb4
Show file tree

Hide file tree

Showing 8 changed files with 185 additions and 87 deletions.
diff --git a/decoded.txt b/decoded.txt
@@ -0,0 +1 @@
+The Project Gutenberg eBook of Anna Karenina, by Leo Tolstoy%This eBook is for the use of anyone anywhere in the United States and most other parts of the world at no cost and with almost no restrictions whatsoever. You may copy it, give it away or re-use it under the terms of the Project Gutenberg License included with this eBook or online at www.gutenberg.org. If you are not located in the United States, you will have to check the laws of the country where you are located before using this eBook.%Title: Anna Karenina%Author: Leo Tolstoy%Release Date: July 1, 1998 [eBook #1399]w
diff --git a/example_txt/anna_karenina.txt b/example_txt/anna_karenina.txt
@@ -1,4 +1,3 @@
-
 The Project Gutenberg eBook of Anna Karenina, by Leo Tolstoy
 This eBook is for the use of anyone anywhere in the United States and most other parts of the world at no cost and with almost no restrictions whatsoever. You may copy it, give it away or re-use it under the terms of the Project Gutenberg License included with this eBook or online at www.gutenberg.org. If you are not located in the United States, you will have to check the laws of the country where you are located before using this eBook.
 Title: Anna Karenina

diff --git a/example_txt/test01.txt b/example_txt/test01.txt
@@ -0,0 +1,5 @@
+The Project Gutenberg eBook of Anna Karenina, by Leo Tolstoy
+This eBook is for the use of anyone anywhere in the United States and most other parts of the world at no cost and with almost no restrictions whatsoever. You may copy it, give it away or re-use it under the terms of the Project Gutenberg License included with this eBook or online at www.gutenberg.org. If you are not located in the United States, you will have to check the laws of the country where you are located before using this eBook.
+Title: Anna Karenina
+Author: Leo Tolstoy
+Release Date: July 1, 1998 [eBook #1399]
diff --git a/out.bin b/out.bin
diff --git a/out.binc b/out.binc
diff --git a/out.txt b/out.txt
@@ -1,4 +1,50 @@
-A 0
-B 10
-C 110
-D 111
+  111
+# 100100000
+% 0100011
+, 1000010
+- 100100001
+. 1001100
+1 11000001
+3 100100010
+8 100100011
+9 1000011
+: 0000100
+A 0000101
+B 1001101
+D 100100100
+G 01000101
+I 100100101
+J 100100110
+K 10001010
+L 0000110
+P 10001011
+R 100100111
+S 10001100
+T 1001110
+U 10001101
+Y 110000000
+[ 110000001
+] 01000100
+a 0011
+b 1001111
+c 110001
+d 100101
+e 011
+f 100000
+g 010000
+h 10100
+i 11001
+j 10001110
+k 1100001
+l 00101
+m 1000100
+n 0101
+o 1101
+p 10001111
+r 0001
+s 10101
+t 1011
+u 01001
+v 0000111
+w 00000
+y 00100
diff --git a/src/huffman.rs b/src/huffman.rs
@@ -2,7 +2,7 @@ use std::{collections::{BTreeMap,VecDeque}};
 use super::node::{Node};
 
 const SPECIAL_CHAR: char = '\0';
-
+const NEW_LINE: char = '%';
 
 fn build_huff_tree(nodes : &mut Vec<Node>) -> Node{
     // alg:
@@ -54,25 +54,6 @@ fn mark_tree(root: &mut Option<Box<Node>>,marker:&mut Vec<bool>){
         }
     }
 }
-// fn mark_tree(root: &mut Option<Box<Node>>,marker: &mut String){
-//     let mut stack = VecDeque::new();
-
-//     if let Some(node) = root {
-//         stack.push_back(node);
-
-//         while let Some(node) = stack.pop_back() {
-//             print!("{} ", node.freq);
-
-//             if let Some(right) = node.r{
-//                 stack.push_back(& mut right);
-//             }
-
-//             if let Some(left) = node.r{
-//                 stack.push_back(&mut left);
-//             }
-//         }
-//     }
-// }
 pub fn get_hash_of_tree(root: Option<Box<Node>>) -> BTreeMap<char,Vec<bool>>{
     let mut ret_hash: BTreeMap<char,Vec<bool>> = BTreeMap::new();
 
@@ -122,7 +103,7 @@ pub fn get_tree_root(msg: &String) -> Option<Box<Node>>{
     tree_head_ref
 }
 
-pub fn decode_encoded_str(encoded_msg: String, map: &BTreeMap<char,String>) -> String{
+pub fn decode_encoded_str(encoded_msg: &String, map: &BTreeMap<char,String>) -> String{
     // decode string
     let mut ret = String::new();
     let mut msg_copy = encoded_msg.clone();
@@ -147,7 +128,11 @@ fn convert_to_code_str(original_msg: &String,map: &BTreeMap<char,Vec<bool>>) ->V
     let mut ret = Vec::new();
 
     for c in original_msg.chars(){
-        ret.extend(map.get(&c).unwrap());
+        if c == '\n'{
+            ret.extend(map.get(&NEW_LINE).unwrap());
+        }else{
+            ret.extend(map.get(&c).unwrap());
+        }
     }
 
     ret
@@ -164,15 +149,27 @@ fn get_hash_char_freq(msg:String) -> BTreeMap<char,usize> {
         ...
     }
     */
+    // very important
+    // FIX THIS SO THAT WE CAN WORK WITH SPACES AND NEW LINES AND TABS
     let mut ret_hash: BTreeMap<char,usize> = BTreeMap::new();
 
     for (_i,c) in msg.chars().enumerate(){
-        if ret_hash.contains_key(&c){
-            // if contains, just add to the freq
-            ret_hash.insert(c,1+ret_hash[&c]);
+        if c == '\n'{
+            if ret_hash.contains_key(&NEW_LINE){
+                // if contains, just add to the freq
+                ret_hash.insert(NEW_LINE,1+ret_hash[&NEW_LINE]);
+            }else{
+                // first entry
+                ret_hash.insert(NEW_LINE,1);
+            }
         }else{
-            // first entry
-            ret_hash.insert(c,1);
+            if ret_hash.contains_key(&c){
+                // if contains, just add to the freq
+                ret_hash.insert(c,1+ret_hash[&c]);
+            }else{
+                // first entry
+                ret_hash.insert(c,1);
+            }
         }
     }
 

diff --git a/src/main.rs b/src/main.rs
@@ -1,12 +1,34 @@
 use std::collections::BTreeMap;
 use std::fs::File;
 use std::fs;
-use std::io::{Read, Result,Write};
+use std::io::{Read, Result,Write, BufReader, BufRead};
 use chibiTxt::huffman;
+use std::env;
 
 
-// for command line args
-use clap::{Arg,Command};
+fn get_hash_from_txt(file_name: &str) -> Result<BTreeMap<char, String> > {
+    // Create a BTreeMap to store the data
+    let mut data_map: BTreeMap<char, String> = BTreeMap::new();
+
+    if let Ok(file) = File::open(file_name) {
+        let reader = BufReader::new(file);
+
+        for (line_number, line) in reader.lines().enumerate() {
+            if let Ok(line_text) = line {
+
+                // Check if the line has the expected format
+                if let (Some(first_char), Some(rest)) = (line_text.chars().next(),line_text.get(1..)) {
+                    // Trim white space from the "rest" string
+                    let trimmed_rest = rest.trim();
+                    // Insert the data into the BTreeMap
+                    data_map.insert(first_char, trimmed_rest.to_string());
+                }
+            }
+        }
+    }
+
+    Ok(data_map)
+}
 
 
 fn read_file_to_string(filename: &str) -> Result<String> {
@@ -88,58 +110,86 @@ fn print_hash_to_file(map: &BTreeMap<char,Vec<bool>>, filename: &str) -> std::io
 }
 
 fn main() {
+    let args: Vec<String> = env::args().collect();
+
+    if args.len() < 4 {
+        println!("Usage: {} <-e/-d> <input> <output>", args[0]);
+        return;
+    }
+
+    let flag = &args[1];
+
+    match flag.as_str() {
+        "-e" => {
+
+            let input_file_name= &args[2];
+            let output_file_name= &args[3];
+            // Handle the case for the "-e" argument
+
+            // this should be actually checking for error
+            let str_content =read_file_to_string(input_file_name).unwrap();
+            // let file_size_bytes = get_file_size_bytes(input_file_name).unwrap() as f64;
+            // println!("file size prior: {}",file_size_bytes);
+
+            // create the hash 
+            let tree_root = huffman::get_tree_root(&str_content);
+            let hash_code= huffman::get_hash_of_tree(tree_root);
+
+            // encode the file
+            let str_encoded = huffman::encode_file(&str_content,&hash_code);
+
+            let my_vec =str_encoded.to_vec();
+
+            // file size before
+            // should thread this
+            let prior_file_size = get_file_size_bytes(&input_file_name).unwrap() as f64;
+
+            write_str_to_file(&output_file_name, &my_vec);
+            let after_file_size = get_file_size_bytes(&output_file_name).unwrap() as f64;
+
+            let percentage = (prior_file_size / after_file_size) * 100.0;
+
+            println!("File compression percentage: {percentage}");
+            println!("Old file size: {prior_file_size} (in bytes)");
+            println!("After file size: {after_file_size} (in bytes)");
+
+            print_hash_to_file(&hash_code,"out.txt");
+        }
+        "-d" => {
+            let input_bin_name= &args[2];
+            let input_dict_name= &args[3];
+            let output_file_name = &args[4];
+
+            // input is a binary file
+            let mut file_contents = String::new();
+
+            let mut file = File::open(input_bin_name).unwrap();
+            // Create a buffer to read data into
+            let mut buffer = [0; 1]; // Read one byte (8 bits) at a time
+
+            // Read and process the file
+            while let Ok(bytes_read) = file.read(&mut buffer) {
+                if bytes_read == 0 {
+                    break; // End of file
+                }
+
+                // Process each bit in the byte
+                for i in 0..8 {
+                    let bit = (buffer[0] & (1 << i)) >> i;
+                    file_contents.push_str(&bit.to_string());
+                }
+            }
+
+            let map = get_hash_from_txt(&input_dict_name).unwrap();
+            let file_decrypted = huffman::decode_encoded_str(&file_contents, &map);
+            let mut file = File::create(output_file_name).unwrap();
+            write!(file,"{}",file_decrypted);
+        }
+        _ => {
+            println!("Unknown argument: {}", flag);
+            println!("Usage: {} <-e/-d> <input> <output>", args[0]);
+        }
+    }
 
-    // clap
-    let matches = Command::new("chibiTxt")
-        .version("0.true")
-        .author("Lex W. <https://github.com/lxaw/chibiTxt>")
-        .about("A simple file encoder.")
-        .arg(Arg::new("input")
-            .value_name("INPUT_FILE")
-            .help("Sets the input text file name.")
-            .required(true)
-            .index(1)
-        )
-        .arg(Arg::new("output")
-            .value_name("OUTPUT_FILE")
-            .help("Sets the output binary file name.")
-            .required(true)
-            .index(2)
-        )
-        .get_matches();
-
-    let input_file_name = matches.get_one::<String>("input").unwrap();
-    let output_file_name = matches.get_one::<String>("output").unwrap();
-
-    // this should be actually checking for error
-    let str_content =read_file_to_string(input_file_name).unwrap();
-    // let file_size_bytes = get_file_size_bytes(input_file_name).unwrap() as f64;
-    // println!("file size prior: {}",file_size_bytes);
-
-    // create the hash 
-    let tree_root = huffman::get_tree_root(&str_content);
-    let hash_code= huffman::get_hash_of_tree(tree_root);
-
-    // encode the file
-    let str_encoded = huffman::encode_file(&str_content,&hash_code);
-
-    let my_vec =str_encoded.to_vec();
-
-    // file size before
-    // should thread this
-    let prior_file_size = get_file_size_bytes(&input_file_name).unwrap() as f64;
-
-    // println!("{}",huffman::decode_encoded_str(str_encoded,&hash_code));
-
-    write_str_to_file(&output_file_name, &my_vec);
-    let after_file_size = get_file_size_bytes(&output_file_name).unwrap() as f64;
-
-    let percentage = (prior_file_size / after_file_size) * 100.0;
-
-    println!("File compression percentage: {percentage}");
-    println!("Old file size: {prior_file_size} (in bytes)");
-    println!("After file size: {after_file_size} (in bytes)");
-
-    print_hash_to_file(&hash_code,"out.txt");
 
 }