Skip to content

Commit

Permalink
Truncate negative millisecond fractions (#71)
Browse files Browse the repository at this point in the history
  • Loading branch information
sydney-runkle authored Jul 3, 2024
1 parent a036efb commit 27b17f7
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 42 deletions.
22 changes: 22 additions & 0 deletions benches/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,3 +248,25 @@ fn format_date_time(bench: &mut Bencher) {
black_box(date.to_string());
})
}

#[bench]
fn parse_timestamp_str(bench: &mut Bencher) {
let timestamps = black_box([
"1654646400",
"-1654646400",
"1654646404",
"-1654646404",
"1654646404.5",
"1654646404.123456",
"1654646404000.5",
"1654646404123.456",
"-1654646404.123456",
"-1654646404000.123",
]);

bench.iter(|| {
for timestamp in &timestamps {
black_box(DateTime::parse_str(black_box(*timestamp)).unwrap());
}
});
}
77 changes: 35 additions & 42 deletions src/datetime.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use crate::date::MS_WATERSHED;
use crate::{int_parse_bytes, MicrosecondsPrecisionOverflowBehavior, TimeConfigBuilder};
use crate::{
float_parse_bytes, numbers::decimal_digits, IntFloat, MicrosecondsPrecisionOverflowBehavior, TimeConfigBuilder,
};
use crate::{time::TimeConfig, Date, ParseError, Time};
use std::cmp::Ordering;
use std::fmt;
Expand Down Expand Up @@ -339,50 +341,41 @@ impl DateTime {
pub fn parse_bytes_with_config(bytes: &[u8], config: &TimeConfig) -> Result<Self, ParseError> {
match Self::parse_bytes_rfc3339_with_config(bytes, config) {
Ok(d) => Ok(d),
Err(e) => {
let mut split = bytes.splitn(2, |&b| b == b'.');
let Some(timestamp) =
int_parse_bytes(split.next().expect("splitn always returns at least one element"))
else {
return Err(e);
};
let float_fraction = split.next();
debug_assert!(split.next().is_none()); // at most two elements
match float_fraction {
// If fraction exists but is empty (i.e. trailing `.`), allow for backwards compatibility;
// TODO might want to reconsider this later?
Some(b"") | None => Self::from_timestamp_with_config(timestamp, 0, config),
Some(fract) => {
// fraction is either:
// - up to 3 digits of millisecond fractions, i.e. microseconds
// - or up to 6 digits of second fractions, i.e. milliseconds
let max_digits = if timestamp > MS_WATERSHED { 3 } else { 6 };
let Some(fract_integers) = int_parse_bytes(fract) else {
return Err(e);
};
if config.microseconds_precision_overflow_behavior
== MicrosecondsPrecisionOverflowBehavior::Error
&& fract.len() > max_digits
{
return Err(if timestamp > MS_WATERSHED {
ParseError::MillisecondFractionTooLong
} else {
ParseError::SecondFractionTooLong
});
Err(e) => match float_parse_bytes(bytes) {
IntFloat::Int(int) => Self::from_timestamp_with_config(int, 0, config),
IntFloat::Float(float) => {
let timestamp_in_milliseconds = float.abs() > MS_WATERSHED as f64;

if config.microseconds_precision_overflow_behavior == MicrosecondsPrecisionOverflowBehavior::Error {
let decimal_digits_count = decimal_digits(bytes);

// If the number of decimal digits exceeds the maximum allowed for the timestamp precision,
// return an error. For timestamps in milliseconds, the maximum is 3, for timestamps in seconds,
// the maximum is 6. These end up being the same in terms of allowing microsecond precision.
if timestamp_in_milliseconds && decimal_digits_count > 3 {
return Err(ParseError::MillisecondFractionTooLong);
} else if !timestamp_in_milliseconds && decimal_digits_count > 6 {
return Err(ParseError::SecondFractionTooLong);
}
// TODO: Technically this is rounding, but this is what the existing
// behaviour already did. Probably this is always better than "truncating"
// so we might want to change MicrosecondsPrecisionOverflowBehavior and
// make other uses also round / deprecate truncating.
let multiple = 10f64.powf(max_digits as f64 - fract.len() as f64);
Self::from_timestamp_with_config(
timestamp,
(fract_integers as f64 * multiple).round() as u32,
config,
)
}

let timestamp_normalized: f64 = if timestamp_in_milliseconds {
float / 1_000f64
} else {
float
};

// if seconds is negative, we round down (left on the number line), so -6.25 -> -7
// which allows for a positive number of microseconds to compensate back up to -6.25
// which is the equivalent of doing (seconds - 1) and (microseconds + 1_000_000)
// like we do in Date::timestamp_watershed
let seconds = timestamp_normalized.floor() as i64;
let microseconds = ((timestamp_normalized - seconds as f64) * 1_000_000f64).round() as u32;

Self::from_timestamp_with_config(seconds, microseconds, config)
}
}
IntFloat::Err => Err(e),
},
}
}

Expand Down
10 changes: 10 additions & 0 deletions src/numbers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,13 @@ pub fn float_parse_bytes(s: &[u8]) -> IntFloat {
IntFloat::Int(int_part)
}
}

/// Count the number of decimal places in a byte slice.
/// Caution: does not verify the integrity of the input,
/// so it may return incorrect results for invalid inputs.
pub(crate) fn decimal_digits(bytes: &[u8]) -> usize {
match bytes.splitn(2, |&b| b == b'.').nth(1) {
Some(b"") | None => 0,
Some(fraction) => fraction.len(),
}
}
4 changes: 4 additions & 0 deletions tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -863,10 +863,14 @@ param_tests! {
dt_underscore: ok => "2020-01-01_12:13:14,123z", "2020-01-01T12:13:14.123000Z";
dt_unix1: ok => "1654646400", "2022-06-08T00:00:00";
dt_unix2: ok => "1654646404", "2022-06-08T00:00:04";
dt_unix_1_neg: ok => "-1654646400", "1917-07-27T00:00:00";
dt_unix_2_neg: ok => "-1654646404", "1917-07-26T23:59:56";
dt_unix_float: ok => "1654646404.5", "2022-06-08T00:00:04.500000";
dt_unix_float_limit: ok => "1654646404.123456", "2022-06-08T00:00:04.123456";
dt_unix_float_ms: ok => "1654646404000.5", "2022-06-08T00:00:04.000500";
dt_unix_float_ms_limit: ok => "1654646404123.456", "2022-06-08T00:00:04.123456";
dt_unix_float_ms_neg: ok => "-1654646404.123456", "1917-07-26T23:59:55.876544";
dt_unix_float_ms_neg_limit: ok => "-1654646404000.123", "1917-07-26T23:59:55.999877";
dt_unix_float_empty: ok => "1654646404.", "2022-06-08T00:00:04";
dt_unix_float_ms_empty: ok => "1654646404000.", "2022-06-08T00:00:04";
dt_unix_float_too_long: err => "1654646404.1234567", SecondFractionTooLong;
Expand Down

0 comments on commit 27b17f7

Please sign in to comment.