Skip to content

Commit ab0ada4

Browse files
authored
Statically computes text value kind lookup table (#894)
1 parent 0943766 commit ab0ada4

File tree

6 files changed

+96
-85
lines changed

6 files changed

+96
-85
lines changed

src/lazy/expanded/macro_evaluator.rs

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -943,10 +943,7 @@ impl<'top, D: Decoder> StackedMacroEvaluator<'top, D> {
943943
/// current encoding context and push the resulting `MacroExpansion` onto the stack.
944944
pub fn push(&mut self, invocation: impl Into<MacroExpr<'top, D>>) -> IonResult<()> {
945945
let macro_expr = invocation.into();
946-
let expansion = match macro_expr.expand() {
947-
Ok(expansion) => expansion,
948-
Err(e) => return Err(e),
949-
};
946+
let expansion = macro_expr.expand()?;
950947
self.macro_stack.push(expansion);
951948
Ok(())
952949
}
@@ -989,10 +986,7 @@ impl<'top, D: Decoder> StackedMacroEvaluator<'top, D> {
989986
Some(expansion) => expansion,
990987
};
991988
// Ask that expansion to continue its evaluation by one step.
992-
let step = match current_expansion.next_step() {
993-
Ok(step) => step,
994-
Err(e) => return Err(e),
995-
};
989+
let step = current_expansion.next_step()?;
996990
current_expansion.is_complete = step.is_final();
997991
use ValueExpr::*;
998992
let maybe_output_value = match step.value_expr() {

src/lazy/expanded/mod.rs

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -558,10 +558,7 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
558558
// It's another macro invocation, we'll add it to the evaluator so it will be evaluated
559559
// on the next call and then we'll return the e-expression itself.
560560
EExp(e_exp) => {
561-
let resolved_e_exp = match e_exp.resolve(context_ref) {
562-
Ok(resolved) => resolved,
563-
Err(e) => return Err(e),
564-
};
561+
let resolved_e_exp = e_exp.resolve(context_ref)?;
565562

566563
// Get the current evaluator or make a new one
567564
let evaluator = match self.evaluator_ptr.get() {
@@ -639,10 +636,7 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
639636
}
640637
// It's another macro invocation, we'll start evaluating it.
641638
EExp(e_exp) => {
642-
let resolved_e_exp = match e_exp.resolve(context_ref) {
643-
Ok(resolved) => resolved,
644-
Err(e) => return Err(e),
645-
};
639+
let resolved_e_exp = e_exp.resolve(context_ref)?;
646640

647641
// If this e-expression invokes a template with a non-system, singleton expansion, we can use the
648642
// e-expression to back a LazyExpandedValue. It will only be evaluated if the user calls `read()`.
@@ -664,11 +658,7 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
664658
};
665659

666660
// Try to get a value by starting to evaluate the e-expression.
667-
let next_value = match evaluator.next() {
668-
Ok(value) => value,
669-
Err(e) => return Err(e),
670-
};
671-
if let Some(value) = next_value {
661+
if let Some(value) = evaluator.next()? {
672662
// If we get a value and the evaluator isn't empty yet, save its pointer
673663
// so we can try to get more out of it when `next_at_or_above_depth` is called again.
674664
if !evaluator.is_empty() {

src/lazy/expanded/struct.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ impl<'top, D: Decoder> LazyExpandedField<'top, D> {
9393
self.name
9494
}
9595

96-
pub fn to_field_expr(&self) -> FieldExpr<'top, D> {
96+
pub fn to_field_expr(self) -> FieldExpr<'top, D> {
9797
FieldExpr::NameValue(self.name(), self.value())
9898
}
9999
}

src/lazy/text/buffer.rs

Lines changed: 43 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -27,24 +27,20 @@ use crate::lazy::text::matched::{
2727
use crate::lazy::text::parse_result::{fatal_parse_error, InvalidInputError, IonParseError};
2828
use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult};
2929
use crate::lazy::text::raw::v1_1::arg_group::{EExpArg, EExpArgExpr, TextEExpArgGroup};
30-
use crate::lazy::text::raw::v1_1::reader::{
31-
MacroIdRef,
32-
SystemMacroAddress, TextEExpression_1_1
33-
};
30+
use crate::lazy::text::raw::v1_1::reader::{MacroIdRef, SystemMacroAddress, TextEExpression_1_1};
3431
use crate::lazy::text::value::{
3532
LazyRawTextValue, LazyRawTextValue_1_0, LazyRawTextValue_1_1, LazyRawTextVersionMarker,
3633
};
3734
use crate::result::DecodingError;
38-
use crate::{
39-
Encoding, HasRange, IonError, IonResult, IonType, RawSymbolRef, TimestampPrecision,
40-
};
35+
use crate::{Encoding, HasRange, IonError, IonResult, IonType, RawSymbolRef, TimestampPrecision};
4136

4237
use crate::lazy::expanded::macro_table::{Macro, ION_1_1_SYSTEM_MACROS};
4338
use crate::lazy::expanded::template::{Parameter, RestSyntaxPolicy};
4439
use crate::lazy::text::as_utf8::AsUtf8;
40+
use crate::lazy::text::raw::sequence::RawTextSExpIterator;
41+
use crate::lazy::text::token_kind::{ValueTokenKind, TEXT_ION_TOKEN_KINDS};
4542
use bumpalo::collections::Vec as BumpVec;
4643
use winnow::ascii::{digit0, digit1};
47-
use crate::lazy::text::raw::sequence::RawTextSExpIterator;
4844

4945
/// Generates parser functions that map from an Ion type representation (`Decimal`, `Int`, etc)
5046
/// to an `EncodedTextValue`.
@@ -402,7 +398,10 @@ impl<'top> TextBuffer<'top> {
402398
// int `3` while recognizing the input `-3` as the int `-3`. If `match_operator` runs before
403399
// `match_value`, it will consume the sign (`-`) of negative number values, treating
404400
// `-3` as an operator (`-`) and an int (`3`). Thus, we run `match_value` first.
405-
whitespace_and_then(alt((Self::match_value::<TextEncoding_1_1>, Self::match_operator))),
401+
whitespace_and_then(alt((
402+
Self::match_value::<TextEncoding_1_1>,
403+
Self::match_operator,
404+
))),
406405
)
407406
.map(|(maybe_annotations, value)| input.apply_annotations(maybe_annotations, value))
408407
.map(RawValueExpr::ValueLiteral)
@@ -446,7 +445,9 @@ impl<'top> TextBuffer<'top> {
446445
}
447446

448447
/// Matches an optional annotation sequence and a trailing value.
449-
pub fn match_annotated_value<E: TextEncoding<'top>>(&mut self) -> IonParseResult<'top, E::Value<'top>> {
448+
pub fn match_annotated_value<E: TextEncoding<'top>>(
449+
&mut self,
450+
) -> IonParseResult<'top, E::Value<'top>> {
450451
let input = *self;
451452
(
452453
opt(Self::match_annotations),
@@ -524,49 +525,34 @@ impl<'top> TextBuffer<'top> {
524525

525526
/// Matches a single Ion 1.0 value.
526527
pub fn match_value<E: TextEncoding<'top>>(&mut self) -> IonParseResult<'top, E::Value<'top>> {
528+
use ValueTokenKind::*;
527529
dispatch! {
528-
|input: &mut TextBuffer<'top>| input.peek_byte();
529-
byte if byte.is_ascii_digit() || byte == b'-' => {
530-
alt((
531-
Self::match_int_value,
532-
Self::match_float_value,
533-
Self::match_decimal_value,
534-
Self::match_timestamp_value,
535-
))
536-
},
537-
byte if byte.is_ascii_alphabetic() => {
538-
alt((
539-
Self::match_null_value,
540-
Self::match_bool_value,
541-
Self::match_identifier_value,
542-
Self::match_float_special_value, // nan
543-
))
544-
},
545-
b'$' | b'_' => {
546-
Self::match_symbol_value // identifiers and symbol IDs
547-
},
548-
b'"' | b'\'' => {
549-
alt((
550-
Self::match_string_value,
551-
Self::match_symbol_value,
552-
))
553-
},
554-
b'[' => E::list_matcher(),
555-
b'(' => E::sexp_matcher(),
556-
b'{' => {
557-
alt((
558-
Self::match_blob_value,
559-
Self::match_clob_value,
560-
E::struct_matcher(),
561-
))
562-
},
563-
b'+' => Self::match_float_special_value, // +inf
564-
_other => {
565-
// `other` is not a legal start-of-value byte.
566-
|input: &mut TextBuffer<'top>| {
567-
let error = InvalidInputError::new(*input);
568-
Err(ErrMode::Backtrack(IonParseError::Invalid(error)))
569-
}
530+
|input: &mut TextBuffer<'top>| Ok(TEXT_ION_TOKEN_KINDS[input.peek_byte()? as usize]);
531+
NumberOrTimestamp => alt((
532+
Self::match_int_value,
533+
Self::match_float_value,
534+
Self::match_decimal_value,
535+
Self::match_timestamp_value,
536+
)),
537+
Letter => alt((
538+
Self::match_null_value,
539+
Self::match_bool_value,
540+
Self::match_identifier_value,
541+
Self::match_float_special_value, // nan
542+
)),
543+
Symbol => Self::match_symbol_value,
544+
QuotedText => alt((Self::match_string_value, Self::match_symbol_value)),
545+
List => E::list_matcher(),
546+
SExp => E::sexp_matcher(),
547+
LobOrStruct => alt((
548+
Self::match_blob_value,
549+
Self::match_clob_value,
550+
E::struct_matcher(),
551+
)),
552+
Invalid(byte) => |input: &mut TextBuffer<'top>| {
553+
let error = InvalidInputError::new(*input)
554+
.with_label(format!("a value cannot begin with '{}'", char::from(byte)));
555+
Err(ErrMode::Backtrack(IonParseError::Invalid(error)))
570556
},
571557
}
572558
.with_taken()
@@ -598,16 +584,15 @@ impl<'top> TextBuffer<'top> {
598584
&mut self,
599585
parameter: &'top Parameter,
600586
) -> IonParseResult<'top, TextEExpArgGroup<'top>> {
601-
602587
TextEncoding_1_1::container_matcher(
603588
"an explicit argument group",
604589
"(::",
605590
RawTextSExpIterator::<TextEncoding_1_1>::new,
606-
whitespace_and_then(")")
591+
whitespace_and_then(")"),
607592
)
608-
.with_taken()
609-
.map(|(expr_cache, input)| TextEExpArgGroup::new(parameter, input, expr_cache))
610-
.parse_next(self)
593+
.with_taken()
594+
.map(|(expr_cache, input)| TextEExpArgGroup::new(parameter, input, expr_cache))
595+
.parse_next(self)
611596
}
612597

613598
pub fn match_e_expression_name(&mut self) -> IonParseResult<'top, MacroIdRef<'top>> {
@@ -819,8 +804,6 @@ impl<'top> TextBuffer<'top> {
819804
}
820805
}
821806

822-
823-
824807
pub fn match_empty_arg_group(
825808
&mut self,
826809
parameter: &'top Parameter,
@@ -1127,10 +1110,7 @@ impl<'top> TextBuffer<'top> {
11271110
/// Matches an Ion float of any syntax
11281111
fn match_float(&mut self) -> IonParseResult<'top, MatchedFloat> {
11291112
terminated(
1130-
alt((
1131-
Self::match_float_special,
1132-
Self::match_float_numeric_value,
1133-
)),
1113+
alt((Self::match_float_special, Self::match_float_numeric_value)),
11341114
Self::peek_stop_character,
11351115
)
11361116
.parse_next(self)

src/lazy/text/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ pub mod encoded_value;
44
pub mod matched;
55
pub mod parse_result;
66
pub mod raw;
7+
mod token_kind;
78
pub mod value;

src/lazy/text/token_kind.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#[derive(Debug, Clone, Copy)]
2+
pub enum ValueTokenKind {
3+
// An ASCII decimal digit, 0-9 inclusive, as well as `-` and `+`
4+
// Could be the start of an int, float, decimal, or timestamp.
5+
NumberOrTimestamp,
6+
// An ASCII letter, [a-zA-Z] inclusive.
7+
// Could be the start of a null, bool, identifier, or float (`nan`).
8+
Letter,
9+
// A `$` or `_`, which could be either a symbol ID (`$10`)
10+
// or an identifier (`$foo`, `_`).
11+
Symbol,
12+
// A `"` or `'`, which could be either a string or symbol.
13+
QuotedText,
14+
// `[`
15+
List,
16+
// `(`
17+
SExp,
18+
// `{`
19+
LobOrStruct,
20+
// Any other byte
21+
Invalid(u8),
22+
}
23+
24+
/// A table of `ValueTokenKind` instances that can be queried by using the
25+
/// byte in question as an index.
26+
pub(crate) static TEXT_ION_TOKEN_KINDS: &[ValueTokenKind] = &init_value_token_cache();
27+
28+
pub(crate) const fn init_value_token_cache() -> [ValueTokenKind; 256] {
29+
let mut jump_table = [ValueTokenKind::Invalid(0); 256];
30+
let mut index: usize = 0;
31+
while index < 256 {
32+
let byte = index as u8;
33+
jump_table[index] = match byte {
34+
b'0'..=b'9' | b'-' | b'+' => ValueTokenKind::NumberOrTimestamp,
35+
b'a'..=b'z' | b'A'..=b'Z' => ValueTokenKind::Letter,
36+
b'$' | b'_' => ValueTokenKind::Symbol,
37+
b'"' | b'\'' => ValueTokenKind::QuotedText,
38+
b'[' => ValueTokenKind::List,
39+
b'(' => ValueTokenKind::SExp,
40+
b'{' => ValueTokenKind::LobOrStruct,
41+
other_byte => ValueTokenKind::Invalid(other_byte),
42+
};
43+
index += 1;
44+
}
45+
jump_table
46+
}

0 commit comments

Comments
 (0)