Skip to content

Commit

Permalink
Remove token value (#267)
Browse files Browse the repository at this point in the history
* Remove token value

* Update parser tests

* Remove extra brackets in fstring middle

* Fix compatibility tests

* Add test for fstring middle with brackets
  • Loading branch information
Glyphack authored Oct 9, 2024
1 parent e247e36 commit 3a82579
Show file tree
Hide file tree
Showing 211 changed files with 3,788 additions and 3,936 deletions.
2 changes: 1 addition & 1 deletion benchmark/benches/parser_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ pub fn benchmark_parser(c: &mut Criterion) {
&source,
|b, source| {
b.iter(|| {
let mut parser = Parser::new(source, path);
let mut parser = Parser::new(source);
parser.parse().unwrap();

0
Expand Down
10 changes: 9 additions & 1 deletion compat/src/lexer_compat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ fn check_tokens_match(
}

let python_token_value = python_token.value.clone();
let enderpy_token_value = enderpy_token.value.to_string();
let enderpy_token_value = enderpy_token.to_string(lexer.source);
// The Python tokenizer sets values in a number of places where Enderpy simply relies
// on kind to assume value. Handle those cases here.
let value_matches = matches_python_name_token(python_token.value.as_str(), &enderpy_token.kind)
Expand Down Expand Up @@ -507,6 +507,8 @@ fn matches_python_name_token(python_token_value: &str, token_kind: &Kind) -> boo
"while" => token_kind == &Kind::While,
"with" => token_kind == &Kind::With,
"yield" => token_kind == &Kind::Yield,
"match" => token_kind == &Kind::Match,
"type" => token_kind == &Kind::Type,
_ => token_kind == &Kind::Identifier,
}
}
Expand Down Expand Up @@ -903,6 +905,12 @@ print(a)
]);
}

// TODO: fstring middle offset is wrong in case of {{ or }}
#[test]
fn test_fstring_positions() {
python_tokenize_test_lexer(&["f\"{{{', '.join(dict_items)}}}\""]);
}

#[test]
#[should_panic]
fn test_lex_unterminated_string_double_quotes() {
Expand Down
2 changes: 1 addition & 1 deletion compat/src/parser_compat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ fn remove_unimplemented_attributes(value: &mut Value) {
}

fn parse_enderpy_source(source: &str) -> Result<Value> {
let mut parser = Parser::new(source, "string");
let mut parser = Parser::new(source);
let typed_ast = parser.parse().into_diagnostic()?;
let ast = typed_ast.as_python_compat(&parser);
Ok(ast)
Expand Down
5 changes: 2 additions & 3 deletions enderpy/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,12 @@ fn tokenize() -> Result<()> {
let (start_line_num, start_line_column, end_line_num, end_line_column) =
get_row_col_position(token.start, token.end, &lexer.line_starts);
println!(
"{}-{}, {}-{}: {} {} {} {}",
"{}-{}, {}-{}: {} {} {}",
start_line_num,
start_line_column,
end_line_num,
end_line_column,
token.kind,
token.value,
token.start,
token.end,
);
Expand All @@ -108,7 +107,7 @@ fn tokenize() -> Result<()> {
fn parse(file: &PathBuf) -> Result<()> {
let source = fs::read_to_string(file).into_diagnostic()?;
let file_path = file.to_str().unwrap_or("");
let mut parser = Parser::new(&source, file_path);
let mut parser = Parser::new(&source);
let ast = parser.parse();
println!("{:#?}", ast);
Ok(())
Expand Down
24 changes: 0 additions & 24 deletions parser/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
use miette::Diagnostic;
use thiserror::Error;

use crate::parser::parser::Parser;

#[derive(Error, Diagnostic, Debug, Clone)]
pub enum ParsingError {
#[error("Invalid syntax")]
Expand All @@ -16,28 +14,6 @@ pub enum ParsingError {
},
}

impl From<Parser<'_>> for ParsingError {
fn from(err: Parser) -> Self {
let token = err.cur_token();
ParsingError::InvalidSyntax {
msg: token.value.to_string(),
advice: String::default(),
span: err.get_span_on_line(token.start, token.end),
}
}
}

impl From<&mut Parser<'_>> for ParsingError {
fn from(err: &mut Parser) -> Self {
let token = err.cur_token();
ParsingError::InvalidSyntax {
msg: token.value.to_string(),
advice: String::default(),
span: err.get_span_on_line(token.start, token.end),
}
}
}

#[derive(Error, Debug)]
pub enum LexError {
#[error("String not terminated")]
Expand Down
66 changes: 9 additions & 57 deletions parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use unicode_id_start::{is_id_continue, is_id_start};
use crate::{
error::LexError,
get_row_col_position,
token::{Kind, Token, TokenValue},
token::{Kind, Token},
};

#[derive(Debug, Clone, Copy, PartialEq)]
Expand Down Expand Up @@ -101,7 +101,6 @@ impl<'a> Lexer<'a> {
self.next_token_is_dedent -= 1;
return Token {
kind: Kind::Dedent,
value: TokenValue::None,
start: self.current,
end: self.current,
};
Expand Down Expand Up @@ -138,22 +137,12 @@ impl<'a> Lexer<'a> {
if kind != Kind::Comment && kind != Kind::NL && kind != Kind::Dedent {
self.non_logical_line_state = kind == Kind::NewLine;
}
let value = self.parse_token_value(kind, start);
let end = match kind {
Kind::FStringMiddle => start + value.as_str().expect("").len() as u32,
_ => self.current,
};

let end = self.current;
if kind == Kind::Dedent {
start = end
}

Token {
kind,
value,
start,
end,
}
Token { kind, start, end }
}

// peek_token is a side-effect free version of next_token
Expand Down Expand Up @@ -750,6 +739,8 @@ impl<'a> Lexer<'a> {
"while" => Kind::While,
"with" => Kind::With,
"yield" => Kind::Yield,
"match" => Kind::Match,
"type" => Kind::Type,
_ => Kind::Identifier,
}
}
Expand Down Expand Up @@ -1053,43 +1044,6 @@ impl<'a> Lexer<'a> {
}
}

fn parse_token_value(&mut self, kind: Kind, start: u32) -> TokenValue {
let kind_value = &self.source[start as usize..self.current as usize];
match kind {
Kind::Integer
| Kind::Hexadecimal
| Kind::Binary
| Kind::PointFloat
| Kind::Octal
| Kind::ExponentFloat
| Kind::ImaginaryInteger
| Kind::ImaginaryExponentFloat
| Kind::ImaginaryPointFloat => TokenValue::Number(kind_value.to_string()),
Kind::Identifier => match kind_value {
"type" => TokenValue::Type,
"match" => TokenValue::Match,
_ => TokenValue::Str(kind_value.to_string()),
},
Kind::StringLiteral
| Kind::FStringStart
| Kind::FStringEnd
| Kind::RawBytes
| Kind::RawFStringStart
| Kind::Bytes
| Kind::Unicode
| Kind::Comment => TokenValue::Str(kind_value.to_string()),
Kind::FStringMiddle => {
let value = kind_value.replace("{{", "{");
let value = value.replace("}}", "}");
TokenValue::Str(value)
}
Kind::Dedent => TokenValue::Indent(1),
Kind::Indent => TokenValue::Indent(1),
Kind::Error => TokenValue::Str(kind_value.to_string()),
_ => TokenValue::None,
}
}

fn f_string_quote_count(&mut self, str_start: char) -> u8 {
let mut count = 1;
if self.peek() == Some(str_start) && self.double_peek() == Some(str_start) {
Expand All @@ -1115,15 +1069,14 @@ mod tests {

fn snapshot_test_lexer_and_errors(test_case: &str) {
let mut lexer = Lexer::new(test_case);
let mut tokens = vec![];
let mut snapshot = String::from("");
loop {
let token = lexer.next_token();
if token.kind == Kind::Eof {
break;
}
snapshot += format!("{}\n", token).as_str();
tokens.push(token);
snapshot += token.display_token(test_case).as_str();
snapshot += "\n";
}
let mut settings = insta::Settings::clone_current();
settings.set_snapshot_path("../../test_data/output/");
Expand All @@ -1136,15 +1089,14 @@ mod tests {
fn snapshot_test_lexer(snap_name: &str, inputs: &[&str]) -> Result<(), LexError> {
for (i, test_input) in inputs.iter().enumerate() {
let mut lexer = Lexer::new(test_input);
let mut tokens = vec![];
let mut snapshot = String::from("");
loop {
let token = lexer.next_token();
if token.kind == Kind::Eof {
break;
}
snapshot += format!("{}\n", token).as_str();
tokens.push(token);
snapshot += token.display_token(test_input).as_str();
snapshot += "\n";
}
let mut settings = insta::Settings::clone_current();
settings.set_snapshot_suffix(format!("{snap_name}-{i}"));
Expand Down
15 changes: 4 additions & 11 deletions parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,24 @@ use crate::{
parser::ast::{Expression, JoinedStr},
};
pub fn is_at_compound_statement(token: &Token) -> bool {
let kind_is_statement = match token.kind {
match token.kind {
Kind::If
| Kind::While
| Kind::For
| Kind::Try
| Kind::With
| Kind::Def
| Kind::Class
| Kind::Type
| Kind::Match
// Decorator
| Kind::MatrixMul
| Kind::Async => true,
_ => false,
};
if kind_is_statement {
return true;
}

// Match is a soft keyword so it's an identifier token
if Kind::Identifier == token.kind && token.value.to_string() == "match" {
return true;
}

false
}

// TODO: performance
pub fn extract_string_inside(val: String) -> String {
let delimiters = vec!["\"\"\"", "\"", "'''", "'"];
let mut result = String::new();
Expand Down
Loading

0 comments on commit 3a82579

Please sign in to comment.