Skip to content

Commit

Permalink
starting to fix the new line character case
Browse files Browse the repository at this point in the history
  • Loading branch information
lxaw committed Nov 9, 2023
1 parent 3faee35 commit a6c5cb4
Show file tree
Hide file tree
Showing 8 changed files with 185 additions and 87 deletions.
1 change: 1 addition & 0 deletions decoded.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The Project Gutenberg eBook of Anna Karenina, by Leo Tolstoy%This eBook is for the use of anyone anywhere in the United States and most other parts of the world at no cost and with almost no restrictions whatsoever. You may copy it, give it away or re-use it under the terms of the Project Gutenberg License included with this eBook or online at www.gutenberg.org. If you are not located in the United States, you will have to check the laws of the country where you are located before using this eBook.%Title: Anna Karenina%Author: Leo Tolstoy%Release Date: July 1, 1998 [eBook #1399]w
1 change: 0 additions & 1 deletion example_txt/anna_karenina.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

The Project Gutenberg eBook of Anna Karenina, by Leo Tolstoy
This eBook is for the use of anyone anywhere in the United States and most other parts of the world at no cost and with almost no restrictions whatsoever. You may copy it, give it away or re-use it under the terms of the Project Gutenberg License included with this eBook or online at www.gutenberg.org. If you are not located in the United States, you will have to check the laws of the country where you are located before using this eBook.
Title: Anna Karenina
Expand Down
5 changes: 5 additions & 0 deletions example_txt/test01.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
The Project Gutenberg eBook of Anna Karenina, by Leo Tolstoy
This eBook is for the use of anyone anywhere in the United States and most other parts of the world at no cost and with almost no restrictions whatsoever. You may copy it, give it away or re-use it under the terms of the Project Gutenberg License included with this eBook or online at www.gutenberg.org. If you are not located in the United States, you will have to check the laws of the country where you are located before using this eBook.
Title: Anna Karenina
Author: Leo Tolstoy
Release Date: July 1, 1998 [eBook #1399]
Binary file modified out.bin
Binary file not shown.
Binary file added out.binc
Binary file not shown.
54 changes: 50 additions & 4 deletions out.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,50 @@
A 0
B 10
C 110
D 111
111
# 100100000
% 0100011
, 1000010
- 100100001
. 1001100
1 11000001
3 100100010
8 100100011
9 1000011
: 0000100
A 0000101
B 1001101
D 100100100
G 01000101
I 100100101
J 100100110
K 10001010
L 0000110
P 10001011
R 100100111
S 10001100
T 1001110
U 10001101
Y 110000000
[ 110000001
] 01000100
a 0011
b 1001111
c 110001
d 100101
e 011
f 100000
g 010000
h 10100
i 11001
j 10001110
k 1100001
l 00101
m 1000100
n 0101
o 1101
p 10001111
r 0001
s 10101
t 1011
u 01001
v 0000111
w 00000
y 00100
51 changes: 24 additions & 27 deletions src/huffman.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::{collections::{BTreeMap,VecDeque}};
use super::node::{Node};

const SPECIAL_CHAR: char = '\0';

const NEW_LINE: char = '%';

fn build_huff_tree(nodes : &mut Vec<Node>) -> Node{
// alg:
Expand Down Expand Up @@ -54,25 +54,6 @@ fn mark_tree(root: &mut Option<Box<Node>>,marker:&mut Vec<bool>){
}
}
}
// fn mark_tree(root: &mut Option<Box<Node>>,marker: &mut String){
// let mut stack = VecDeque::new();

// if let Some(node) = root {
// stack.push_back(node);

// while let Some(node) = stack.pop_back() {
// print!("{} ", node.freq);

// if let Some(right) = node.r{
// stack.push_back(& mut right);
// }

// if let Some(left) = node.r{
// stack.push_back(&mut left);
// }
// }
// }
// }
pub fn get_hash_of_tree(root: Option<Box<Node>>) -> BTreeMap<char,Vec<bool>>{
let mut ret_hash: BTreeMap<char,Vec<bool>> = BTreeMap::new();

Expand Down Expand Up @@ -122,7 +103,7 @@ pub fn get_tree_root(msg: &String) -> Option<Box<Node>>{
tree_head_ref
}

pub fn decode_encoded_str(encoded_msg: String, map: &BTreeMap<char,String>) -> String{
pub fn decode_encoded_str(encoded_msg: &String, map: &BTreeMap<char,String>) -> String{
// decode string
let mut ret = String::new();
let mut msg_copy = encoded_msg.clone();
Expand All @@ -147,7 +128,11 @@ fn convert_to_code_str(original_msg: &String,map: &BTreeMap<char,Vec<bool>>) ->V
let mut ret = Vec::new();

for c in original_msg.chars(){
ret.extend(map.get(&c).unwrap());
if c == '\n'{
ret.extend(map.get(&NEW_LINE).unwrap());
}else{
ret.extend(map.get(&c).unwrap());
}
}

ret
Expand All @@ -164,15 +149,27 @@ fn get_hash_char_freq(msg:String) -> BTreeMap<char,usize> {
...
}
*/
// very important
// FIX THIS SO THAT WE CAN WORK WITH SPACES AND NEW LINES AND TABS
let mut ret_hash: BTreeMap<char,usize> = BTreeMap::new();

for (_i,c) in msg.chars().enumerate(){
if ret_hash.contains_key(&c){
// if contains, just add to the freq
ret_hash.insert(c,1+ret_hash[&c]);
if c == '\n'{
if ret_hash.contains_key(&NEW_LINE){
// if contains, just add to the freq
ret_hash.insert(NEW_LINE,1+ret_hash[&NEW_LINE]);
}else{
// first entry
ret_hash.insert(NEW_LINE,1);
}
}else{
// first entry
ret_hash.insert(c,1);
if ret_hash.contains_key(&c){
// if contains, just add to the freq
ret_hash.insert(c,1+ret_hash[&c]);
}else{
// first entry
ret_hash.insert(c,1);
}
}
}

Expand Down
160 changes: 105 additions & 55 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,34 @@
use std::collections::BTreeMap;
use std::fs::File;
use std::fs;
use std::io::{Read, Result,Write};
use std::io::{Read, Result,Write, BufReader, BufRead};
use chibiTxt::huffman;
use std::env;


// for command line args
use clap::{Arg,Command};
fn get_hash_from_txt(file_name: &str) -> Result<BTreeMap<char, String> > {
// Create a BTreeMap to store the data
let mut data_map: BTreeMap<char, String> = BTreeMap::new();

if let Ok(file) = File::open(file_name) {
let reader = BufReader::new(file);

for (line_number, line) in reader.lines().enumerate() {
if let Ok(line_text) = line {

// Check if the line has the expected format
if let (Some(first_char), Some(rest)) = (line_text.chars().next(),line_text.get(1..)) {
// Trim white space from the "rest" string
let trimmed_rest = rest.trim();
// Insert the data into the BTreeMap
data_map.insert(first_char, trimmed_rest.to_string());
}
}
}
}

Ok(data_map)
}


fn read_file_to_string(filename: &str) -> Result<String> {
Expand Down Expand Up @@ -88,58 +110,86 @@ fn print_hash_to_file(map: &BTreeMap<char,Vec<bool>>, filename: &str) -> std::io
}

fn main() {
let args: Vec<String> = env::args().collect();

if args.len() < 4 {
println!("Usage: {} <-e/-d> <input> <output>", args[0]);
return;
}

let flag = &args[1];

match flag.as_str() {
"-e" => {

let input_file_name= &args[2];
let output_file_name= &args[3];
// Handle the case for the "-e" argument

// this should be actually checking for error
let str_content =read_file_to_string(input_file_name).unwrap();
// let file_size_bytes = get_file_size_bytes(input_file_name).unwrap() as f64;
// println!("file size prior: {}",file_size_bytes);

// create the hash
let tree_root = huffman::get_tree_root(&str_content);
let hash_code= huffman::get_hash_of_tree(tree_root);

// encode the file
let str_encoded = huffman::encode_file(&str_content,&hash_code);

let my_vec =str_encoded.to_vec();

// file size before
// should thread this
let prior_file_size = get_file_size_bytes(&input_file_name).unwrap() as f64;

write_str_to_file(&output_file_name, &my_vec);
let after_file_size = get_file_size_bytes(&output_file_name).unwrap() as f64;

let percentage = (prior_file_size / after_file_size) * 100.0;

println!("File compression percentage: {percentage}");
println!("Old file size: {prior_file_size} (in bytes)");
println!("After file size: {after_file_size} (in bytes)");

print_hash_to_file(&hash_code,"out.txt");
}
"-d" => {
let input_bin_name= &args[2];
let input_dict_name= &args[3];
let output_file_name = &args[4];

// input is a binary file
let mut file_contents = String::new();

let mut file = File::open(input_bin_name).unwrap();
// Create a buffer to read data into
let mut buffer = [0; 1]; // Read one byte (8 bits) at a time

// Read and process the file
while let Ok(bytes_read) = file.read(&mut buffer) {
if bytes_read == 0 {
break; // End of file
}

// Process each bit in the byte
for i in 0..8 {
let bit = (buffer[0] & (1 << i)) >> i;
file_contents.push_str(&bit.to_string());
}
}

let map = get_hash_from_txt(&input_dict_name).unwrap();
let file_decrypted = huffman::decode_encoded_str(&file_contents, &map);
let mut file = File::create(output_file_name).unwrap();
write!(file,"{}",file_decrypted);
}
_ => {
println!("Unknown argument: {}", flag);
println!("Usage: {} <-e/-d> <input> <output>", args[0]);
}
}

// clap
let matches = Command::new("chibiTxt")
.version("0.true")
.author("Lex W. <https://github.com/lxaw/chibiTxt>")
.about("A simple file encoder.")
.arg(Arg::new("input")
.value_name("INPUT_FILE")
.help("Sets the input text file name.")
.required(true)
.index(1)
)
.arg(Arg::new("output")
.value_name("OUTPUT_FILE")
.help("Sets the output binary file name.")
.required(true)
.index(2)
)
.get_matches();

let input_file_name = matches.get_one::<String>("input").unwrap();
let output_file_name = matches.get_one::<String>("output").unwrap();

// this should be actually checking for error
let str_content =read_file_to_string(input_file_name).unwrap();
// let file_size_bytes = get_file_size_bytes(input_file_name).unwrap() as f64;
// println!("file size prior: {}",file_size_bytes);

// create the hash
let tree_root = huffman::get_tree_root(&str_content);
let hash_code= huffman::get_hash_of_tree(tree_root);

// encode the file
let str_encoded = huffman::encode_file(&str_content,&hash_code);

let my_vec =str_encoded.to_vec();

// file size before
// should thread this
let prior_file_size = get_file_size_bytes(&input_file_name).unwrap() as f64;

// println!("{}",huffman::decode_encoded_str(str_encoded,&hash_code));

write_str_to_file(&output_file_name, &my_vec);
let after_file_size = get_file_size_bytes(&output_file_name).unwrap() as f64;

let percentage = (prior_file_size / after_file_size) * 100.0;

println!("File compression percentage: {percentage}");
println!("Old file size: {prior_file_size} (in bytes)");
println!("After file size: {after_file_size} (in bytes)");

print_hash_to_file(&hash_code,"out.txt");

}

0 comments on commit a6c5cb4

Please sign in to comment.