@@ -45,10 +45,10 @@ impl CsvReader {
4545
4646 let mut current_row = Vec :: new ( ) ;
4747
48- let cursor = 0 ;
48+ let mut cursor = 0 ;
4949
5050 for i in 0 ..self . quotation_bitsets . len ( ) {
51- let valid_quotations = remove_escaped_quotations ( self . quotation_bitsets [ i] ) ;
51+ let valid_quotations = remove_escaped_quotations ( self . quotation_bitsets [ i] ) ;
5252 let outside_quotations = !mark_inside_quotations ( valid_quotations) ;
5353
5454 let mut valid_commas = self . comma_bitsets [ i] & outside_quotations;
@@ -70,12 +70,10 @@ impl CsvReader {
7070 break ;
7171 }
7272
73- current_row. push (
74- Range {
75- start : cursor + bitset_cursor,
76- end : cursor + bitset_cursor + bits_traveled
77- }
78- ) ;
73+ current_row. push ( Range {
74+ start : cursor + bitset_cursor,
75+ end : cursor + bitset_cursor + bits_traveled,
76+ } ) ;
7977
8078 if first_whitespace < first_comma {
8179 rows. push ( Row :: from ( current_row. clone ( ) ) ) ;
@@ -86,15 +84,17 @@ impl CsvReader {
8684 valid_commas <<= bits_traveled + 1 ;
8785 valid_whitespace <<= bits_traveled + 1 ;
8886 }
87+
88+ cursor += bitset_cursor;
8989 }
90-
90+
9191 Ok ( rows)
9292 }
9393}
9494
9595fn remove_escaped_quotations ( q : u64 ) -> u64 {
9696 let escaped = q & ( q << 1 ) ;
97- let escaped = escaped | ( escaped>> 1 ) ;
97+ let escaped = escaped | ( escaped >> 1 ) ;
9898
9999 q & !escaped
100100}
@@ -116,21 +116,50 @@ fn mark_inside_quotations(mut x: u64) -> u64 {
116116
117117// todo, find a quicker way to do this
118118#[ inline]
119- fn build_u64 ( chunk : & [ u8x16 ] , broadcast : u8x16 ) -> u64 {
119+ fn build_u64 ( chunks : & [ u8x16 ] , broadcast : u8x16 ) -> u64 {
120120 let mut packed: u64 = 0 ;
121- for ( i, & c ) in chunk . iter ( ) . enumerate ( ) {
122- let word = c . eq ( broadcast) . bitset ( ) as u64 ;
121+ for ( i, & chunk ) in chunks . iter ( ) . enumerate ( ) {
122+ let word = chunk . eq ( broadcast) . bitset ( ) as u64 ;
123123 packed |= word << ( 48 - i * 16 ) ;
124124 }
125-
126125 packed
127126}
128127
129-
130128#[ cfg( test) ]
131129mod tests {
132130 use super :: * ;
133131
132+ #[ test]
133+ fn test_init ( ) -> Result < ( ) > {
134+ let data = std:: fs:: read ( "taxi_zone_lookup.csv" ) ?;
135+
136+ // todo fix alignment
137+ // let csv_reader1 = CsvReader::new(&data[..64]);
138+
139+ let slice63 = & data[ ..63 ] ;
140+
141+ let csv_reader1 = CsvReader :: new ( & slice63) ;
142+
143+ assert_eq ! ( csv_reader1. quotation_bitsets. len( ) , 1 ) ;
144+ assert_eq ! ( csv_reader1. whitespace_bitsets. len( ) , 1 ) ;
145+ assert_eq ! ( csv_reader1. comma_bitsets. len( ) , 1 ) ;
146+
147+ let mut longer_slice = vec ! [ ] ;
148+ longer_slice. extend_from_slice ( slice63) ;
149+ longer_slice. push ( 0x0A ) ;
150+ longer_slice. extend_from_slice ( & data[ 63 ..87 ] ) ;
151+
152+ let csv_reader2 = CsvReader :: new ( & longer_slice) ;
153+ assert_eq ! ( csv_reader2. quotation_bitsets. len( ) , 2 ) ;
154+ assert_eq ! ( csv_reader2. whitespace_bitsets. len( ) , 2 ) ;
155+ assert_eq ! ( csv_reader2. comma_bitsets. len( ) , 2 ) ;
156+
157+ assert_eq ! ( csv_reader1. quotation_bitsets[ 0 ] , csv_reader2. quotation_bitsets[ 0 ] ) ;
158+ assert_eq ! ( csv_reader1. whitespace_bitsets[ 0 ] , csv_reader2. whitespace_bitsets[ 0 ] ) ;
159+ assert_eq ! ( csv_reader1. comma_bitsets[ 0 ] , csv_reader2. comma_bitsets[ 0 ] ) ;
160+
161+ Ok ( ( ) )
162+ }
134163 fn check_line ( test : & [ u8 ] , expected : Vec < Vec < String > > ) -> Result < ( ) > {
135164 let mut reader = CsvReader :: new ( test) ;
136165 let rows = reader
@@ -232,7 +261,7 @@ mod tests {
232261 #[ test]
233262 fn read_taxi_zone_lookup_header ( ) -> Result < ( ) > {
234263 let data = b"\" LocationID\" ,\" Borough\" ,\" Zone\" ,\" service_zone\" \n " ;
235-
264+
236265 check_line (
237266 data,
238267 vec ! [ vec![
@@ -242,31 +271,29 @@ mod tests {
242271 r#""service_zone""# . to_string( ) ,
243272 ] ] ,
244273 ) ?;
245-
274+
246275 Ok ( ( ) )
247276 }
248277
249278 // #[test]
250279 // fn read_taxi_zone_lookup() -> Result<()> {
251280 // let data = std::fs::read("taxi_zone_lookup.csv")?;
252- // assert_eq!(
253- // b"\"LocationID\",\"Borough\",\"Zone\",\"service_zone\"\r\n",
254- // &data[..46]
255- // );
256- //
281+ //
282+ // // dbg!(String::from_utf8(data[..66].to_vec()).unwrap());
283+ //
257284 // let rows = CsvReader::new(&data).read()?;
258285 // dbg!(&rows);
259- //
286+ //
260287 // for row in CsvReader::new(&data).read()? {
261288 // let fields = row
262289 // .fields()
263290 // .into_iter()
264291 // .map(|field_range| String::from_utf8(data[field_range.clone()].to_vec()).unwrap())
265292 // .collect::<Vec<_>>();
266- //
293+ //
267294 // dbg!(fields);
268295 // }
269- //
296+ //
270297 // Ok(())
271298 // }
272299
0 commit comments