@@ -6,19 +6,21 @@ use std::{
6
6
env:: args_os,
7
7
fs:: File ,
8
8
io:: { stdout, Write as _} ,
9
- path:: Path , sync:: { Arc , Mutex } ,
9
+ path:: Path ,
10
+ sync:: { Arc , Mutex } ,
10
11
} ;
11
12
12
13
use fixed:: types:: I48F16 ;
13
14
use fxhash:: FxHashMap ;
14
15
use memmap2:: Mmap ;
15
16
use mimalloc:: MiMalloc ;
16
17
use rayon:: {
17
- iter:: { ParallelIterator , IntoParallelRefIterator } ,
18
+ iter:: { IntoParallelRefIterator , ParallelIterator , FromParallelIterator } ,
18
19
slice:: { ParallelSlice , ParallelSliceMut } ,
19
20
} ;
20
21
21
- use std:: hash:: { Hash , Hasher } ;
22
+ use lazy_static:: lazy_static;
23
+ use std:: borrow:: Cow ;
22
24
23
25
type Value = I48F16 ;
24
26
@@ -89,12 +91,38 @@ fn fast_parse(input: &[u8]) -> Value {
89
91
Value :: from_num ( int) / Value :: from_num ( 10 )
90
92
}
91
93
94
+ #[ inline( always) ]
95
+ /// experimental and cost more than fast_parse (hashing+lookup cost > than trick above)
96
+ fn lookup_temp ( input : & [ u8 ] ) -> Value {
97
+ //get values from hashmap instead of parsing them.
98
+ //println!("getting value {:?}", std::str::from_utf8(input));
99
+ let key = Cow :: Borrowed ( input) ;
100
+ * TEMPURATURE_HASHMAP . get ( & key) . unwrap ( )
101
+ }
102
+
92
103
fn write_pair ( city : & [ u8 ] , record : & Record , out : & mut Vec < u8 > ) {
93
104
out. extend_from_slice ( city) ;
94
105
out. push ( b'=' ) ;
95
106
record. write ( out) ;
96
107
}
97
108
109
+ //store possible temp values -100.9 => 100.9
110
+ lazy_static ! {
111
+ static ref TEMPURATURE_HASHMAP : FxHashMap <Cow <' static , [ u8 ] >, Value > = {
112
+ let mut hashmap = FxHashMap :: with_capacity_and_hasher( 100 * 10 * 2 +10 , Default :: default ( ) ) ;
113
+
114
+ // Populate the hashmap with keys and values
115
+ for i in ( -1009 ..=1009 ) . step_by( 1 ) {
116
+ let value = i as f64 / 10.0 ;
117
+ let key_str = format!( "{:.1}" , value) ;
118
+ let key_bytes = key_str. as_bytes( ) . to_vec( ) ; // Convert to owned Vec<u8>
119
+ let key_cow: Cow <' static , [ u8 ] > = Cow :: Owned ( key_bytes) ;
120
+ hashmap. insert( key_cow, Value :: from_num( value) ) ;
121
+ }
122
+ hashmap
123
+ } ;
124
+ }
125
+
98
126
fn main ( ) {
99
127
// Simple mega parallel rayon solution
100
128
let path = args_os ( )
@@ -140,13 +168,12 @@ fn main() {
140
168
map1
141
169
} ) ;
142
170
143
- let mut sorted_data: Vec < ( & [ u8 ] , & Record ) > =
144
- data. par_iter ( ) . map ( |( & city, record) | ( city, record) ) . collect ( ) ;
171
+ let mut sorted_data: Vec < ( & & [ u8 ] , & Record ) > = Vec :: from_par_iter ( data. par_iter ( ) ) ;
145
172
146
173
// Use Rayon to parallelize the sorting in chunks.
147
174
// TODO : adjust the Chunk size depending on the target.
148
- const CHUNK_SIZE : usize = 1000000 ;
149
- sorted_data. par_chunks_mut ( CHUNK_SIZE ) . for_each ( |chunk| {
175
+ let chunk_size : usize = 1_000_000_000 / num_cpus :: get ( ) ;
176
+ sorted_data. par_chunks_mut ( chunk_size ) . for_each ( |chunk| {
150
177
chunk. par_sort_unstable_by_key ( |& ( city, _) | city) ;
151
178
} ) ;
152
179
@@ -165,10 +192,10 @@ fn main() {
165
192
166
193
out. push ( b'{' ) ;
167
194
168
- let out = Arc :: new ( Mutex :: new ( out) ) ;
195
+ let out = Arc :: new ( Mutex :: new ( out) ) ;
169
196
170
197
// Parallelize writing to output
171
- sorted_data. par_chunks ( CHUNK_SIZE ) . for_each ( |chunk| {
198
+ sorted_data. par_chunks ( chunk_size ) . for_each ( |chunk| {
172
199
let mut local_out = Vec :: with_capacity ( chunk. len ( ) * est_record_size) ;
173
200
174
201
if let Some ( & ( city, record) ) = chunk. first ( ) {
@@ -195,4 +222,4 @@ fn main() {
195
222
196
223
// No reason to waste time freeing memory and closing files and stuff
197
224
std:: process:: exit ( 0 ) ;
198
- }
225
+ }
0 commit comments