Skip to content

Commit

Permalink
Change CTLexerBuilder src gen to use the quote crate.
Browse files Browse the repository at this point in the history
  • Loading branch information
ratmice committed Mar 4, 2025
1 parent 2d554c2 commit d70b294
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 49 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,4 @@ sparsevec = "0.2"
static_assertions = "1.1"
unicode-width = "0.1.11"
vob = ">=3.0.2"
proc-macro2 = "1.0"
2 changes: 2 additions & 0 deletions cfgrammar/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ num-traits.workspace = true
regex.workspace = true
serde = { workspace = true, features = ["derive"], optional = true }
vob = { workspace = true, features = ["serde"] }
quote.workspace = true
proc-macro2.workspace = true
9 changes: 9 additions & 0 deletions cfgrammar/src/lib/span.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use proc_macro2::TokenStream;
use quote::{quote, ToTokens, TokenStreamExt};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -54,3 +56,10 @@ pub trait Spanned: std::fmt::Display {
/// Returns the `SpansKind` associated with this error.
fn spanskind(&self) -> crate::yacc::parser::SpansKind;
}

impl ToTokens for Span {
fn to_tokens(&self, tokens: &mut TokenStream) {
let Span { start, end } = self;
tokens.append_all(quote! {::cfgrammar::Span::new(#start, #end)});
}
}
1 change: 1 addition & 0 deletions lrlex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,6 @@ lazy_static.workspace = true
regex.workspace = true
regex-syntax.workspace = true
num-traits.workspace = true
proc-macro2.workspace = true
quote.workspace = true
serde.workspace = true
144 changes: 95 additions & 49 deletions lrlex/src/lib/ctbuilder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ use cfgrammar::{newlinecache::NewlineCache, Spanned};
use lazy_static::lazy_static;
use lrpar::{CTParserBuilder, LexerTypes};
use num_traits::{AsPrimitive, PrimInt, Unsigned};
use quote::quote;
use proc_macro2::TokenStream;
use quote::{quote, ToTokens, TokenStreamExt};
use regex::Regex;
use serde::Serialize;

Expand Down Expand Up @@ -78,11 +79,48 @@ pub enum RustEdition {
Rust2021,
}

/// The quote impl of `ToTokens` for `Option` prints an empty string for `None`
/// and the inner value for `Some(inner_value)`.
///
/// This wrapper instead emits both `Some` and `None` variants.
/// See: [quote #20](https://github.com/dtolnay/quote/issues/20)
struct QuoteOption<T>(Option<T>);

impl<T: ToTokens> ToTokens for QuoteOption<T> {
fn to_tokens(&self, tokens: &mut TokenStream) {
tokens.append_all(match self.0 {
Some(ref t) => quote! { ::std::option::Option::Some(#t) },
None => quote! { ::std::option::Option::None },
});
}
}

/// This wrapper adds a missing impl of `ToTokens` for tuples.
/// For a tuple `(a, b)` emits `(a.to_tokens(), b.to_tokens())`
struct QuoteTuple<T>(T);

impl<A: ToTokens, B: ToTokens> ToTokens for QuoteTuple<(A, B)> {
fn to_tokens(&self, tokens: &mut TokenStream) {
let (a, b) = &self.0;
tokens.append_all(quote!((#a, #b)));
}
}

/// The wrapped `&str` value will be emitted with a call to `to_string()`
struct QuoteToString<'a>(&'a str);

impl ToTokens for QuoteToString<'_> {
fn to_tokens(&self, tokens: &mut TokenStream) {
let x = &self.0;
tokens.append_all(quote! { #x.to_string() });
}
}

/// A `CTLexerBuilder` allows one to specify the criteria for building a statically generated
/// lexer.
pub struct CTLexerBuilder<'a, LexerTypesT: LexerTypes = DefaultLexerTypes<u32>>
where
LexerTypesT::StorageT: Debug + Eq + Hash,
LexerTypesT::StorageT: Debug + Eq + Hash + ToTokens,
usize: num_traits::AsPrimitive<LexerTypesT::StorageT>,
{
lrpar_config: Option<Box<dyn Fn(CTParserBuilder<LexerTypesT>) -> CTParserBuilder<LexerTypesT>>>,
Expand All @@ -108,7 +146,7 @@ impl CTLexerBuilder<'_, DefaultLexerTypes<u32>> {
impl<'a, LexerTypesT: LexerTypes> CTLexerBuilder<'a, LexerTypesT>
where
LexerTypesT::StorageT:
'static + Debug + Eq + Hash + PrimInt + Serialize + TryFrom<usize> + Unsigned,
'static + Debug + Eq + Hash + PrimInt + Serialize + TryFrom<usize> + Unsigned + ToTokens,
usize: AsPrimitive<LexerTypesT::StorageT>,
{
/// Create a new [CTLexerBuilder].
Expand Down Expand Up @@ -438,31 +476,52 @@ pub fn lexerdef() -> {lexerdef_type} {{
)
.ok();

let RegexOptions {
dot_matches_new_line,
multi_line,
octal,
posix_escapes,
case_insensitive,
unicode,
swap_greed,
ignore_whitespace,
size_limit,
dfa_size_limit,
nest_limit,
} = self.regex_options;
let case_insensitive = QuoteOption(case_insensitive);
let unicode = QuoteOption(unicode);
let swap_greed = QuoteOption(swap_greed);
let ignore_whitespace = QuoteOption(ignore_whitespace);
let size_limit = QuoteOption(size_limit);
let dfa_size_limit = QuoteOption(dfa_size_limit);
let nest_limit = QuoteOption(nest_limit);

outs.push_str(&format!(
"let regex_options = ::lrlex::RegexOptions {{
dot_matches_new_line: {dot_matches_new_line:?},
multi_line: {multi_line:?},
octal: {octal:?},
posix_escapes: {posix_escapes:?},
case_insensitive: {case_insensitive:?},
unicode: {unicode:?},
swap_greed: {swap_greed:?},
ignore_whitespace: {ignore_whitespace:?},
size_limit: {size_limit:?},
dfa_size_limit: {dfa_size_limit:?},
nest_limit: {nest_limit:?},
dot_matches_new_line: {dot_matches_new_line},
multi_line: {multi_line},
octal: {octal},
posix_escapes: {posix_escapes},
case_insensitive: {case_insensitive},
unicode: {unicode},
swap_greed: {swap_greed},
ignore_whitespace: {ignore_whitespace},
size_limit: {size_limit},
dfa_size_limit: {dfa_size_limit},
nest_limit: {nest_limit},
}};",
dot_matches_new_line = self.regex_options.dot_matches_new_line,
multi_line = self.regex_options.multi_line,
octal = self.regex_options.octal,
posix_escapes = self.regex_options.posix_escapes,
case_insensitive = self.regex_options.case_insensitive,
unicode = self.regex_options.unicode,
swap_greed = self.regex_options.swap_greed,
ignore_whitespace = self.regex_options.ignore_whitespace,
size_limit = self.regex_options.size_limit,
dfa_size_limit = self.regex_options.dfa_size_limit,
nest_limit = self.regex_options.nest_limit,
dot_matches_new_line = quote!(#dot_matches_new_line),
multi_line = quote!(#multi_line),
octal = quote!(#octal),
posix_escapes = quote!(#posix_escapes),
case_insensitive = quote!(#case_insensitive),
unicode = quote!(#unicode),
swap_greed = quote!(#swap_greed),
ignore_whitespace = quote!(#ignore_whitespace),
size_limit = quote!(#size_limit),
dfa_size_limit = quote!(#dfa_size_limit),
nest_limit = quote!(#nest_limit),
));

outs.push_str(" let start_states: Vec<StartState> = vec![");
Expand All @@ -485,35 +544,22 @@ pub fn lexerdef() -> {lexerdef_type} {{

// Individual rules
for r in lexerdef.iter_rules() {
let tok_id = match r.tok_id {
Some(ref t) => format!("Some({:?})", t),
None => "None".to_owned(),
};
let n = match r.name() {
Some(ref n) => format!("Some({}.to_string())", quote!(#n)),
None => "None".to_owned(),
};
let target_state = match &r.target_state() {
Some((id, op)) => format!("Some(({}, ::lrlex::StartStateOperation::{:?}))", id, op),
None => "None".to_owned(),
};
let n_span = format!(
"::cfgrammar::Span::new({}, {})",
r.name_span().start(),
r.name_span().end()
);
let regex = &r.re_str;
let tok_id = QuoteOption(r.tok_id);
let n = QuoteOption(r.name().map(QuoteToString));
let target_state = QuoteOption(r.target_state().map(|(x, y)| QuoteTuple((x, y))));
let n_span = r.name_span();
let regex = QuoteToString(&r.re_str);
let start_states = r.start_states();
write!(
outs,
"
Rule::new(::lrlex::unstable_api::InternalPublicApi, {}, {}, {}, {}.to_string(), {}.to_vec(), {}, &regex_options).unwrap(),",
tok_id,
n,
n_span,
quote!(#tok_id),
quote!(#n),
quote!(#n_span),
quote!(#regex),
quote!([#(#start_states),*]),
target_state,
quote!(#target_state),
)
.ok();
}
Expand All @@ -537,10 +583,10 @@ pub fn lexerdef() -> {lexerdef_type} {{
if RE_TOKEN_ID.is_match(n) {
write!(
outs,
"#[allow(dead_code)]\npub const T_{}: {} = {:?};\n",
"#[allow(dead_code)]\npub const T_{}: {} = {};\n",
n.to_ascii_uppercase(),
type_name::<LexerTypesT::StorageT>(),
*id
quote!(#id)
)
.ok();
}
Expand Down
12 changes: 12 additions & 0 deletions lrlex/src/lib/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,18 @@ pub enum StartStateOperation {
Pop,
}

use proc_macro2::TokenStream;
use quote::quote;
impl quote::ToTokens for StartStateOperation {
fn to_tokens(&self, tokens: &mut TokenStream) {
tokens.extend(match *self {
StartStateOperation::ReplaceStack => quote!(::lrlex::StartStateOperation::ReplaceStack),
StartStateOperation::Push => quote!(::lrlex::StartStateOperation::Push),
StartStateOperation::Pop => quote!(::lrlex::StartStateOperation::Pop),
})
}
}

pub(super) struct LexParser<LexerTypesT: LexerTypes>
where
usize: AsPrimitive<LexerTypesT::StorageT>,
Expand Down

0 comments on commit d70b294

Please sign in to comment.