From 0f1af68d4ab36a4e63b3d0ff248e60e0d5a7b8e0 Mon Sep 17 00:00:00 2001 From: Alexey Akhunov Date: Fri, 24 Mar 2017 22:48:07 +0000 Subject: [PATCH] Add multi-stage processing --- Cargo.lock | 100 ++++++++++++++++++++++ Cargo.toml | 2 + src/main.rs | 235 ++++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 301 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 521d803..a7f2436 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,9 +2,25 @@ name = "parse_bitcoin" version = "0.1.0" dependencies = [ + "bloomfilter 0.0.10 (registry+https://github.com/rust-lang/crates.io-index)", + "rust-base58 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)", "rust-crypto 0.2.36 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "bit-vec" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "bloomfilter" +version = "0.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bit-vec 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "gcc" version = "0.3.43" @@ -24,6 +40,72 @@ name = "libc" version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "num" +version = "0.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-bigint 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", + "num-complex 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", + "num-integer 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)", + "num-iter 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)", + "num-rational 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-bigint" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-integer 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-serialize 0.3.22 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-complex" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-traits 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-serialize 0.3.22 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-integer" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-traits 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-iter" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-integer 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-rational" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-bigint 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", + "num-integer 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-serialize 0.3.22 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-traits" +version = "0.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "rand" version = "0.3.15" @@ -37,6 +119,14 @@ name = "redox_syscall" version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "rust-base58" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "rust-crypto" version = "0.2.36" @@ -76,11 +166,21 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" [metadata] +"checksum bit-vec 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "5b97c2c8e8bbb4251754f559df8af22fb264853c7d009084a576cdf12565089d" +"checksum bloomfilter 0.0.10 (registry+https://github.com/rust-lang/crates.io-index)" = "91829eee74e7770807cc4f2ff8a18ec6560dbc835ba648fb2fab1c16c8dfbad3" "checksum gcc 0.3.43 (registry+https://github.com/rust-lang/crates.io-index)" = "c07c758b972368e703a562686adb39125707cc1ef3399da8c019fc6c2498a75d" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" "checksum libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)" = "88ee81885f9f04bff991e306fea7c1c60a5f0f9e409e99f6b40e3311a3363135" +"checksum num 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)" = "98b15ba84e910ea7a1973bccd3df7b31ae282bf9d8bd2897779950c9b8303d40" +"checksum num-bigint 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "5e2955fede25639c4f4f797e864b7585f20d98069c45e0c86b1d22a808eb9f77" +"checksum num-complex 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "3534898d8a1f6b16c12f9fc2f4eaabc7ecdcc55f267213caa8988fdc7d60ff94" +"checksum num-integer 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)" = "21e4df1098d1d797d27ef0c69c178c3fab64941559b290fcae198e0825c9c8b5" +"checksum num-iter 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)" = "f7d1891bd7b936f12349b7d1403761c8a0b85a18b148e9da4429d5d102c1a41e" +"checksum num-rational 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "c2dc5ea04020a8f18318ae485c751f8cfa1c0e69dcf465c29ddaaa64a313cc44" +"checksum num-traits 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)" = "e1cbfa3781f3fe73dc05321bed52a06d2d491eaa764c52335cf4399f046ece99" "checksum rand 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "022e0636ec2519ddae48154b028864bdce4eaf7d35226ab8e65c611be97b189d" "checksum redox_syscall 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "8dd35cc9a8bdec562c757e3d43c1526b5c6d2653e23e2315065bc25556550753" +"checksum rust-base58 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b313b91fcdc6719ad41fa2dad2b7e810b03833fae4bf911950e15529a5f04439" "checksum rust-crypto 0.2.36 (registry+https://github.com/rust-lang/crates.io-index)" = "f76d05d3993fd5f4af9434e8e436db163a12a9d40e1a58a726f27a01dfd12a2a" "checksum rustc-serialize 0.3.22 (registry+https://github.com/rust-lang/crates.io-index)" = "237546c689f20bb44980270c73c3b9edd0891c1be49cc1274406134a66d3957b" "checksum time 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "211b63c112206356ef1ff9b19355f43740fc3f85960c598a93d3a3d3ba7beade" diff --git a/Cargo.toml b/Cargo.toml index 138923d..ae4dec8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,3 +5,5 @@ authors = ["alexeyakhunov"] [dependencies] rust-crypto = "0.2.36" +rust-base58 = "0.0.4" +bloomfilter = "0.0.10" diff --git a/src/main.rs b/src/main.rs index 7468575..2ae9432 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,29 +1,46 @@ use std::fs; -use std::io::Result; +use std::result::Result; use std::fs::File; use std::fs::Metadata; -use std::io::Read; -use std::io::BufReader; -use std::io::Seek; -use std::io::SeekFrom; +use std::io; +use std::io::{Read, BufReader, Seek, SeekFrom}; use std::option::Option; +use std::fmt::{Write, format}; extern crate crypto; use crypto::digest::Digest; +extern crate rust_base58; + +use rust_base58::{ToBase58, FromBase58}; + +use std::collections::HashMap; + +extern crate bloomfilter; +use bloomfilter::Bloom; + fn main() { - match fs::read_dir("/Users/alexeyakhunov/Library/Application Support/Bitcoin/blocks") { - Err(why) => println!("{:?}", why), - Ok(dir_entries) => { - let mut block_number: u32 = 0; - for dir_entry in dir_entries { - block_number = parse_file(dir_entry, block_number); + let mut out_map = HashMap::new(); + let mut in_map = HashMap::new(); + for prefix in 0..4 { + match fs::read_dir("/Users/alexeyakhunov/Library/Application Support/Bitcoin/blocks") { + Err(why) => println!("{:?}", why), + Ok(dir_entries) => { + let mut block_number: u32 = 0; + for dir_entry in dir_entries { + block_number = parse_file(prefix, dir_entry, block_number, &mut in_map, &mut out_map); + if block_number > 1000000 { + break; + } + } } } }; } -fn parse_file(dir_entry: Result, initial_block: u32) -> u32 { +fn parse_file(prefix: u8, dir_entry: io::Result, initial_block: u32, + in_map: &mut HashMap<[u8;32], Vec>>>, + out_map: &mut HashMap<[u8;32], Vec>>>) -> u32 { match dir_entry { Err(why) => { println!("{:?}", why); @@ -41,8 +58,10 @@ fn parse_file(dir_entry: Result, initial_block: u32) -> u32 { 0 } Ok(mut file) => { - println!("Opened {:?}", filename); - read_blocks(&mut file, metadata.len(), initial_block) + println!("Opened {:?}, prefix {:?}", filename, prefix); + let bn = read_blocks(prefix, &mut file, metadata.len(), initial_block, in_map, out_map); + println!("outputs: {:?}, inputs: {:?}", out_map.len(), in_map.len()); + bn }, } } else { @@ -52,10 +71,11 @@ fn parse_file(dir_entry: Result, initial_block: u32) -> u32 { } } -fn read_blocks(file: &mut File, filesize: u64, initial_block: u32) -> u32 { +fn read_blocks(prefix: u8, file: &mut File, filesize: u64, initial_block: u32, + in_map: &mut HashMap<[u8;32], Vec>>>, + out_map: &mut HashMap<[u8;32], Vec>>>) -> u32 { let mut reader = BufReader::new(file); let mut buf: [u8; 4] = [0u8; 4]; - let mut block: Vec = vec![]; let mut block_number: u32 = initial_block; loop { match reader.read_exact(&mut buf) { @@ -71,23 +91,26 @@ fn read_blocks(file: &mut File, filesize: u64, initial_block: u32) -> u32 { reader.read_exact(&mut buf); let size = (buf[0] as u32) | ((buf[1] as u32)<<8) | ((buf[2] as u32)<<16) | ((buf[3] as u32)<<24); //println!("Read size {:?}", size); - println!("Block {:?}", block_number); - read_block(&mut reader, size as u64, &mut block); + //println!("Block {:?}", block_number); + read_block(prefix, &mut reader, size as u64, in_map, out_map); block_number += 1; } block_number } -fn read_block(reader:R, size: u64, block: &mut Vec) where R: Read, { - block.clear(); +fn read_block(prefix: u8, reader: R, size: u64, + in_map: &mut HashMap<[u8;32], Vec>>>, + out_map: &mut HashMap<[u8;32], Vec>>>) where R: Read { + let mut block: Vec = Vec::with_capacity(1024*1024); let mut block_reader = reader.take(size); - match block_reader.read_to_end(block) { + match block_reader.read_to_end(&mut block) { Err(_) => println!("Error reading block"), Ok(len) => { let (tx_count, pos) = read_varint(&block, 80); - println!("Number of transactions: {:?}", tx_count); + //println!("Number of transactions: {:?}", tx_count); let mut p = pos; for _ in 0..tx_count { + let tx_start = p; let version_p = read_u32(&block, p); p = version_p.1; // Read number of inputs @@ -96,38 +119,163 @@ fn read_block(reader:R, size: u64, block: &mut Vec) where R: Read, { p = num_inputs_p.1; for _ in 0..num_inputs { let is_coinbase = block[p..p+32] == [0u8;32]; - p += 32; // Skip prevout_hash + let prevout_hash_pos = p; + p += 32; + let prevout_n = read_u32(&block, p).0 as usize; p += 4; // Skip prevous_n let script_sig_len_p = read_varint(&block, p); let script_sig_len = script_sig_len_p.0; p = script_sig_len_p.1; if !is_coinbase { - let script_sig: &[u8] = &block[p..p+(script_sig_len as usize)]; - let decoded_script_sig = decode_script(script_sig); - //public_key_from_script(decoded_script_sig); + if (block[prevout_hash_pos] & 0x3) == prefix { + let id = copy_id(&block[prevout_hash_pos..prevout_hash_pos+32]); + let remove_from_map = match out_map.get_mut(&id) { + None => { + let inputs: &mut Vec>>; + if !in_map.contains_key(&id) { + in_map.insert(id, vec![]); + } + inputs = in_map.get_mut(&id).unwrap(); + // Add the input into the inputs vector + while inputs.len() < prevout_n { + inputs.push(None); + }; + if inputs.len() == prevout_n { + let cloned_script_sig = block[p..p+(script_sig_len as usize)].to_owned(); + inputs.push(Some(cloned_script_sig)); + }; + // Do not remove from map, because there is nothing to remove + false + }, + Some(prevout_scripts) => { + let to_remove = match prevout_scripts[prevout_n] { + None => false, + Some(ref prevout_script) => { + action_input_output(&id, &block[p..p+(script_sig_len as usize)], prevout_script.as_slice()); + true + } + }; + if to_remove { + prevout_scripts[prevout_n] = None; + }; + // If all elements are None, remove from the map + prevout_scripts.iter().filter(|x| x.is_some()).count() == 0 + } + }; + if remove_from_map { + out_map.remove(&block[prevout_hash_pos..prevout_hash_pos+32]); + } + } } - p = script_sig_len_p.1; p += script_sig_len as usize; // Skip script_sig for now p += 4; // Skip sequence } let num_outputs_p = read_varint(&block, p); let num_outputs = num_outputs_p.0; p = num_outputs_p.1; + let mut utxos: Vec>> = vec![]; for _ in 0..num_outputs { let value_p = read_u64(&block, p); p = value_p.1; let script_pub_key_len_p = read_varint(&block, p); - let script_pub_key_len = script_pub_key_len_p.0; + let script_pub_key_len = script_pub_key_len_p.0 as usize; p = script_pub_key_len_p.1; - p += script_pub_key_len as usize; // Skip script_pub_key for now + let cloned_script = block[p..p+script_pub_key_len].to_owned(); + utxos.push(Some(cloned_script)); + p += script_pub_key_len; // Skip script_pub_key for now } p += 4; // Skip locktime + let id = tx_id(&block[tx_start..p]); + if (id[0] & 0x3) == prefix { + // Go through UTXOs and check if we have matching inputs + let mut done_utxos = 0; + match in_map.remove(&id) { + None => {}, + Some(inputs) => { + for i in 0..inputs.len() { + match inputs[i] { + None => {}, + Some(ref input_script) => { + let to_remove = match utxos[i] { + None => false, + Some(ref output) => { + action_input_output(&id, input_script, output.as_slice()); + true + } + }; + if to_remove { + utxos[i] = None; + done_utxos += 1; + } + } + } + } + } + }; + if done_utxos < utxos.len() { + out_map.insert(id, utxos); + } + } } assert_eq!(size as usize, p); }, } } +fn action_input_output(tx_id: &[u8], input: &[u8], output: &[u8]) { + let decoded_script_sig = decode_script(input); + match decoded_script_sig { + Err(why) => { + println!("txid: {:?}, error: {:?}", print_32bytes(tx_id), why); + }, + Ok(decoded) => { + let addr = public_key_from_script(decoded); + match addr { + Some(addr_str) => { + //println!("{:?}", addr_str); + }, + None => {} + } + } + } +} + +fn prevout_hash(prevout_hash_slice: &[u8]) { + let mut a: [u8;32] = [0;32]; + a.clone_from_slice(prevout_hash_slice); + a.reverse(); + println!("Prevout_hash: {}", print_32bytes(&a)); +} + +fn copy_id(id_slice: &[u8]) -> [u8;32] { + let mut buffer32b: [u8;32] = [0;32]; + for i in 0..32 { + buffer32b[i] = id_slice[i]; + } + buffer32b +} + +fn tx_id(tx_slice: &[u8]) -> [u8;32] { + let mut sha256 = crypto::sha2::Sha256::new(); + sha256.input(tx_slice); + let mut buffer32b: [u8;32] = [0;32]; + sha256.result(&mut buffer32b); + sha256.reset(); + sha256.input(&buffer32b[0..32]); + sha256.result(&mut buffer32b); + buffer32b.reverse(); + buffer32b.reverse(); + buffer32b +} + +fn print_32bytes(bytes: &[u8]) -> String { + let mut s = String::new(); + for &byte in bytes { + write!(&mut s, "{:02x}", byte).unwrap(); + }; + s +} + fn read_u16(slice: &[u8], pos: usize) -> (u16, usize) { ((slice[pos] as u16) | ((slice[pos+1] as u16)<<8), pos+2) } @@ -172,7 +320,7 @@ enum Opcode { OpInvalidopcode = 0xFF, } -fn decode_script(slice: &[u8]) -> Vec<(u8, Option<&[u8]>)> { +fn decode_script(slice: &[u8]) -> Result)>,String> { let mut script: Vec<(u8, Option<&[u8]>)> = vec![]; let mut pos: usize = 0; while pos < slice.len() { @@ -192,16 +340,19 @@ fn decode_script(slice: &[u8]) -> Vec<(u8, Option<&[u8]>)> { (opcode as usize, pos) }; pos = new_pos; + if pos+n_size > slice.len() { + return Err(format!("pos {:?}, n_size {:?}", pos, n_size)); + } script.push((opcode, Some(&slice[pos..pos+n_size]))); pos += n_size; } else { script.push((opcode, None)); } } - script + Ok(script) } -fn public_key_from_script(decoded_script: Vec<(u8, Option<&[u8]>)>) -> Option<[u8;20]>{ +fn public_key_from_script(decoded_script: Vec<(u8, Option<&[u8]>)>) -> Option{ if decoded_script.len() == 0 { None } else if decoded_script[0].0 == Opcode::Op0 as u8 { @@ -209,16 +360,28 @@ fn public_key_from_script(decoded_script: Vec<(u8, Option<&[u8]>)>) -> Option<[u match decoded_script[decoded_script.len()-1].1 { None => None, Some(sub_script) => { - let decoded_sub_script = decode_script(sub_script); + //let decoded_sub_script = decode_script(sub_script); let mut sha256 = crypto::sha2::Sha256::new(); sha256.input(sub_script); let mut buffer32b: [u8;32] = [0;32]; sha256.result(&mut buffer32b); let mut ripemd160 = crypto::ripemd160::Ripemd160::new(); ripemd160.input(&buffer32b); - let mut buffer20b: [u8;20] = [0;20]; - ripemd160.result(&mut buffer20b); - None + let mut buffer25b: [u8;25] = [0;25]; + buffer25b[0] = 5; + ripemd160.result(&mut buffer25b[1..21]); + sha256.reset(); + sha256.input(&buffer25b[0..21]); + sha256.result(&mut buffer32b); + sha256.reset(); + sha256.input(&buffer32b[0..32]); + sha256.result(&mut buffer32b); + buffer25b[21] = buffer32b[0]; + buffer25b[22] = buffer32b[1]; + buffer25b[23] = buffer32b[2]; + buffer25b[24] = buffer32b[3]; + let addr = buffer25b.to_base58(); + Some(addr) } } } else if decoded_script[0].0 <= Opcode::OpPushdata4 as u8 {