From f472275b4a85da2eeb24e1590899066eaee120ae Mon Sep 17 00:00:00 2001 From: Glyphack Date: Thu, 28 Sep 2023 22:36:24 +0200 Subject: [PATCH 1/2] Improve parser errors --- enderpy/src/main.rs | 6 +- parser/Cargo.toml | 2 +- parser/src/parser/mod.rs | 1 + parser/src/parser/parser.rs | 162 ++++++++++++------- typechecker/src/build.rs | 6 +- typechecker/src/type_check/type_evaluator.rs | 2 +- 6 files changed, 115 insertions(+), 64 deletions(-) diff --git a/enderpy/src/main.rs b/enderpy/src/main.rs index c09f812d..e2f77df0 100644 --- a/enderpy/src/main.rs +++ b/enderpy/src/main.rs @@ -71,7 +71,11 @@ fn tokenize(file: &PathBuf) -> Result<()> { fn parse(file: &PathBuf) -> Result<()> { let source = fs::read_to_string(file)?; - let mut parser = Parser::new(source); + let file_path = match file.to_str() { + Some(path) => path, + None => "", + }; + let mut parser = Parser::new(source, file_path.into()); let ast = parser.parse(); println!("{:#?}", ast); Ok(()) diff --git a/parser/Cargo.toml b/parser/Cargo.toml index 87ea8497..b4d4736e 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -16,7 +16,7 @@ serde = { version = "1.0", features = ["derive"] } tracing = "0.1" tracing-subscriber = "0.3" unicode-id-start = "1.0.3" -miette = "5.6.0" +miette = { version = "5.6.0", features = ["fancy"] } thiserror = "1.0.40" [dev-dependencies] diff --git a/parser/src/parser/mod.rs b/parser/src/parser/mod.rs index 6bc82ef1..77d5576d 100644 --- a/parser/src/parser/mod.rs +++ b/parser/src/parser/mod.rs @@ -1,4 +1,5 @@ pub mod ast; +pub mod error; mod diagnostics; mod expression; mod operator; diff --git a/parser/src/parser/parser.rs b/parser/src/parser/parser.rs index 89cb4973..ec9f236d 100644 --- a/parser/src/parser/parser.rs +++ b/parser/src/parser/parser.rs @@ -7,6 +7,7 @@ use crate::token::{Kind, Token, TokenValue}; use miette::{Result, ErrReport, miette, bail}; use super::diagnostics; +use super::error::ParsingError; use super::expression::{is_atom, is_iterable}; use super::operator::{ is_bin_arithmetic_op, is_comparison_operator, is_unary_op, map_unary_operator, @@ -28,11 +29,14 @@ pub struct Parser { // see a closing bracket. nested_expression_list: usize, errors: Vec, + curr_line_string: String, + curr_line_number: u32, + path: String, } #[allow(unused)] impl Parser { - pub fn new(source: String) -> Self { + pub fn new(source: String, path: String) -> Self { let mut lexer = Lexer::new(&source); let cur_token = lexer.next_token().unwrap(); let prev_token_end = 0; @@ -44,6 +48,9 @@ impl Parser { prev_token_end, nested_expression_list: 0, errors: vec![], + curr_line_string: String::new(), + path, + curr_line_number: 1, } } @@ -74,7 +81,7 @@ impl Parser { } for err in &self.errors { - println!("{}", err); + println!("{:#?}", err); } Module { @@ -146,6 +153,12 @@ impl Parser { /// Move to the next token fn advance(&mut self) { let token = self.lexer.next_token(); + if self.at(Kind::NewLine) { + self.curr_line_string.clear(); + self.curr_line_number += 1; + } else { + self.curr_line_string.push_str(&self.source[self.prev_token_end..self.cur_token.end]); + } match token { Err(err) => { println!("Error: {:?}", err); @@ -161,15 +174,21 @@ impl Parser { /// Expect a `Kind` or return error pub fn expect(&mut self, kind: Kind) -> Result<()> { if !self.at(kind) { - let node = self.start_node(); let found = self.cur_token.kind; + let node = self.start_node(); self.bump_any(); let range = self.finish_node(node); - panic!( - "Error: {:?}", - diagnostics::ExpectToken(kind.to_str(), found.to_str(), range) - ); - return Err(diagnostics::ExpectToken(kind.to_str(), found.to_str(), range).into()); + let err = ParsingError::InvalidSyntax { + path: Box::from(self.path.as_str()), + msg: Box::from(format!("Expected {:?} but found {:?}", kind, found)), + line: self.curr_line_number, + input: self.curr_line_string.clone(), + advice: "maybe you forgot to put this character".to_string(), + span: (range.start, range.end), + }; + println!("errors until now: {:?}", self.errors); + panic!("{:#?}", err); + self.errors.push(err.into()); } self.advance(); Ok(()) @@ -228,11 +247,11 @@ impl Parser { self.parse_assignment_or_expression_statement() } } - }; + }?; - self.err_if_statement_not_ending_in_new_line_or_semicolon(); + self.err_if_statement_not_ending_in_new_line_or_semicolon(stmt.get_node(), stmt.clone()); - stmt + Ok(stmt) } fn parse_compount_statement(&mut self) -> Result { @@ -276,18 +295,22 @@ impl Parser { stmt } - fn err_if_statement_not_ending_in_new_line_or_semicolon(&mut self) { + fn err_if_statement_not_ending_in_new_line_or_semicolon(&mut self, node: Node, stmt: Statement) { while self.eat(Kind::WhiteSpace) || self.eat(Kind::Comment) {} if !matches!(self.cur_kind(), Kind::NewLine | Kind::SemiColon | Kind::Eof) { - let node = self.start_node(); + println!("stmt: {:?}", stmt); + let node = self.finish_node(node); let kind = self.cur_kind(); - // TODO: Better errors - let err = miette!( - "Statement must be seperated with new line or semicolon but found {:?}", - self.cur_token() - ); - self.errors.push(err); + let err = ParsingError::InvalidSyntax { + path: Box::from(self.path.as_str()), + msg: Box::from("Statement does not end in new line or semicolon"), + line: self.curr_line_number, + input: self.curr_line_string.clone(), + advice: "Split the statements into two seperate lines or add a semicolon".to_string(), + span: (node.start, node.end), + }; + self.errors.push(err.into()); } } @@ -1382,15 +1405,34 @@ impl Parser { let (module, level) = self.parse_module_name(); self.bump(Kind::Import); let mut aliases = vec![]; - while self.at(Kind::Identifier) { - let alias_name = self.start_node(); - let name = self.cur_token().value.to_string(); - self.bump(Kind::Identifier); - let asname = self.parse_alias(name, alias_name); - aliases.push(asname); - if !self.eat(Kind::Comma) { - break; + if self.eat(Kind::LeftParen) { + while self.at(Kind::Identifier) { + let alias_name = self.start_node(); + let name = self.cur_token().value.to_string(); + self.bump(Kind::Identifier); + let asname = self.parse_alias(name, alias_name); + aliases.push(asname); + if !self.eat(Kind::Comma) { + break; + } + } + self.expect(Kind::RightParen)?; + } else if self.at(Kind::Identifier) { + while self.at(Kind::Identifier) { + let alias_name = self.start_node(); + let name = self.cur_token().value.to_string(); + self.bump(Kind::Identifier); + let asname = self.parse_alias(name, alias_name); + aliases.push(asname); + if !self.eat(Kind::Comma) { + break; + } } + } else if self.at(Kind::Mul) { + aliases.push(self.parse_alias("*".to_string(), self.start_node())); + self.bump(Kind::Mul); + } else { + return Err(self.unepxted_token(import_node, self.cur_kind()).unwrap_err()); } Ok(Statement::ImportFrom(ImportFrom { node: self.finish_node(import_node), @@ -2862,7 +2904,7 @@ mod tests { "a |= 1", // annotated assignment ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -2877,7 +2919,7 @@ mod tests { #[test] fn test_parse_assert_stmt() { for test_case in &["assert a", "assert a, b", "assert True, 'fancy message'"] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -2892,7 +2934,7 @@ mod tests { #[test] fn test_pass_stmt() { for test_case in &["pass", "pass ", "pass\n"] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -2907,7 +2949,7 @@ mod tests { #[test] fn test_parse_del_stmt() { for test_case in &["del a", "del a, b", "del a, b, "] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -2922,7 +2964,7 @@ mod tests { #[test] fn parse_yield_statement() { for test_case in &["yield", "yield a", "yield a, b", "yield a, b, "] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -2937,7 +2979,7 @@ mod tests { #[test] fn test_raise_statement() { for test_case in &["raise", "raise a", "raise a from c"] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -2952,7 +2994,7 @@ mod tests { #[test] fn test_parse_break_continue() { for test_case in &["break", "continue"] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -2983,7 +3025,7 @@ mod tests { "from .......a import b", "from ...", ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -2998,7 +3040,7 @@ mod tests { #[test] fn test_parse_bool_op() { for test_case in &["a or b", "a and b", "a or b or c", "a and b or c"] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3013,7 +3055,7 @@ mod tests { #[test] fn test_parse_unary_op() { for test_case in &["not a", "+ a", "~ a", "-a"] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3031,7 +3073,7 @@ mod tests { "a + b", "a - b", "a * b", "a / b", "a // b", "a % b", "a ** b", "a << b", "a >> b", "a & b", "a ^ b", "a | b", "a @ b", ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3047,7 +3089,7 @@ mod tests { fn test_named_expression() { { let test_case = &"(a := b)"; - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3074,7 +3116,7 @@ mod tests { )", "(a, b, c,)", ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3089,7 +3131,7 @@ mod tests { #[test] fn test_yield_expression() { for test_case in &["yield", "yield a", "yield from a"] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3105,7 +3147,7 @@ mod tests { fn test_starred() { { let test_case = &"(*a)"; - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3121,7 +3163,7 @@ mod tests { fn test_await_expression() { { let test_case = &"await a"; - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3145,7 +3187,7 @@ mod tests { "a[b, c:d:e, f]", "a[::d,]", ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3160,7 +3202,7 @@ mod tests { #[test] fn test_attribute_ref() { for test_case in &["a.b", "a.b.c", "a.b_c", "a.b.c.d"] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3184,7 +3226,7 @@ mod tests { "func(a, b=c, d=e, *f, **g)", "func(a,)", ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3209,7 +3251,7 @@ mod tests { "lambda a=1 : a", "lambda a=1 : a,", ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3225,7 +3267,7 @@ mod tests { fn test_conditional_expression() { { let test_case = &"a if b else c if d else e"; - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3257,7 +3299,7 @@ mod tests { "'d' f'a' 'b'", "f'a_{1}' 'b' ", ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3279,7 +3321,7 @@ mod tests { // unsupported // "f'hello_{f'''{a}'''}'", ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3306,7 +3348,7 @@ mod tests { "a not in b", "a < b < c", ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3346,7 +3388,7 @@ else: pass ", ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3370,7 +3412,7 @@ else: b = 1 ", ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3417,7 +3459,7 @@ except *Exception as e: pass ", ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3438,7 +3480,7 @@ except *Exception as e: "a = ...", "... + 1", ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3469,7 +3511,7 @@ except *Exception as e: pass", ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ description => test_case.to_string(), // the template source code @@ -3494,7 +3536,7 @@ except *Exception as e: "@decor class a: pass", ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3556,7 +3598,7 @@ class a: pass", case [a, b, c]: pass", ] { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3572,7 +3614,7 @@ class a: pass", fn test_complete() { glob!("../../test_data", "inputs/*.py", |path| { let test_case = fs::read_to_string(path).unwrap(); - let mut parser = Parser::new(test_case.clone()); + let mut parser = Parser::new(test_case.clone(), String::from("")); let program = parser.parse(); insta::with_settings!({ @@ -3598,7 +3640,7 @@ class a: pass", glob!("../../test_data", "inputs/one_liners/*.py", |path| { let input = fs::read_to_string(path).unwrap(); for test_case in input.split("\n\n") { - let mut parser = Parser::new(test_case.to_string()); + let mut parser = Parser::new(test_case.to_string(), String::from("")); let program = parser.parse(); insta::with_settings!({ diff --git a/typechecker/src/build.rs b/typechecker/src/build.rs index 5d88295c..e310ac06 100644 --- a/typechecker/src/build.rs +++ b/typechecker/src/build.rs @@ -85,7 +85,11 @@ impl BuildManager { } pub fn parse_file(build_source: BuildSource) -> EnderpyFile { - let mut parser = Parser::new(build_source.source.clone()); + let file_path = match build_source.path.to_str() { + Some(path) => path, + None => "", + }; + let mut parser = Parser::new(build_source.source.clone(), file_path.into()); let tree = parser.parse(); EnderpyFile::from( tree, diff --git a/typechecker/src/type_check/type_evaluator.rs b/typechecker/src/type_check/type_evaluator.rs index 0bff7bb1..a57a1b72 100755 --- a/typechecker/src/type_check/type_evaluator.rs +++ b/typechecker/src/type_check/type_evaluator.rs @@ -528,7 +528,7 @@ mod tests { use crate::state::State; use enderpy_python_parser::Parser; - let mut parser = Parser::new(source.to_string()); + let mut parser = Parser::new(source.to_string(), "".into()); let ast_module = parser.parse(); let enderpy_file = EnderpyFile::from( From a06bcb72260474c5534c7a3630ede268532af8e8 Mon Sep 17 00:00:00 2001 From: Glyphack Date: Sat, 30 Sep 2023 11:39:55 +0200 Subject: [PATCH 2/2] Use ParsingError enum for all errors in parser --- parser/src/parser/diagnostics.rs | 31 - parser/src/parser/error.rs | 27 + parser/src/parser/mod.rs | 1 - parser/src/parser/parser.rs | 678 ++++++++++-------- ...ts__complete@separate_statements.py-2.snap | 24 +- ...r__tests__one_liners@from_import.py-2.snap | 18 + ...ser__tests__one_liners@from_import.py.snap | 291 ++++++++ ...ser__tests__parse_import_statement-15.snap | 14 +- parser/src/parser/string.rs | 57 +- .../inputs/one_liners/from_import.py | 15 + typechecker/src/settings.rs | 2 +- 11 files changed, 794 insertions(+), 364 deletions(-) delete mode 100644 parser/src/parser/diagnostics.rs create mode 100644 parser/src/parser/error.rs create mode 100644 parser/src/parser/snapshots/enderpy_python_parser__parser__parser__tests__one_liners@from_import.py-2.snap create mode 100644 parser/src/parser/snapshots/enderpy_python_parser__parser__parser__tests__one_liners@from_import.py.snap create mode 100644 parser/test_data/inputs/one_liners/from_import.py diff --git a/parser/src/parser/diagnostics.rs b/parser/src/parser/diagnostics.rs deleted file mode 100644 index 309b2470..00000000 --- a/parser/src/parser/diagnostics.rs +++ /dev/null @@ -1,31 +0,0 @@ -use crate::{parser::ast::Node}; -use miette::{self, Diagnostic}; -use thiserror::{self, Error}; - -#[derive(Debug, Error, Diagnostic)] -#[error("Expect token")] -#[diagnostic()] -pub struct ExpectToken( - pub &'static str, - pub &'static str, - #[label("Expect `{0}` here, but found `{1}`")] pub Node, -); - -#[derive(Debug, Error, Diagnostic)] -#[error("Unexpected token")] -#[diagnostic()] -pub struct UnexpectedToken( - pub u32, - pub &'static str, - #[label("line: {0} Unexpected token `{1}`")] pub Node, -); - -#[derive(Debug, Error, Diagnostic)] -#[error("Unknown statement")] -#[diagnostic()] -pub struct UnknownStatement(pub &'static str, #[label("Unknown statement {0}")] pub Node); - -#[derive(Debug, Error, Diagnostic)] -#[error("invalid syntax")] -#[diagnostic()] -pub struct InvalidSyntax(pub String, #[label("invalid syntax {0}")] pub Node); diff --git a/parser/src/parser/error.rs b/parser/src/parser/error.rs new file mode 100644 index 00000000..ea0291e5 --- /dev/null +++ b/parser/src/parser/error.rs @@ -0,0 +1,27 @@ +use miette::Diagnostic; +use thiserror::Error; + +#[derive(Error, Diagnostic, Debug)] +pub enum ParsingError { + #[error(transparent)] + #[diagnostic(code(gen_color::io_error))] + IoError(#[from] std::io::Error), + + #[error( + "Invalid syntax" + )] + #[diagnostic(code( + gen_color::colors_and_steps_mismatch + ))] + InvalidSyntax { + path: Box, + msg: Box, + line: u32, + #[source_code] + input: String, + #[help] + advice: String, + #[label("span")] + span: (usize, usize), + }, +} diff --git a/parser/src/parser/mod.rs b/parser/src/parser/mod.rs index 77d5576d..a0d6fb71 100644 --- a/parser/src/parser/mod.rs +++ b/parser/src/parser/mod.rs @@ -1,6 +1,5 @@ pub mod ast; pub mod error; -mod diagnostics; mod expression; mod operator; pub mod parser; diff --git a/parser/src/parser/parser.rs b/parser/src/parser/parser.rs index ec9f236d..2b5bcc99 100644 --- a/parser/src/parser/parser.rs +++ b/parser/src/parser/parser.rs @@ -4,9 +4,8 @@ use crate::lexer::lexer::Lexer; use crate::parser::ast::*; use crate::parser::string::{extract_string_inside, is_string}; use crate::token::{Kind, Token, TokenValue}; -use miette::{Result, ErrReport, miette, bail}; +use miette::Result; -use super::diagnostics; use super::error::ParsingError; use super::expression::{is_atom, is_iterable}; use super::operator::{ @@ -28,7 +27,7 @@ pub struct Parser { // This is incremented when we see an opening bracket and decremented when we // see a closing bracket. nested_expression_list: usize, - errors: Vec, + pub errors: Vec, curr_line_string: String, curr_line_number: u32, path: String, @@ -75,7 +74,7 @@ impl Parser { if stmt.is_ok() { body.push(stmt.unwrap()); } else { - self.errors.push(stmt.err().unwrap()); + self.errors.push(stmt.err().unwrap().into()); self.bump_any(); } } @@ -90,15 +89,6 @@ impl Parser { } } - pub fn get_errors(&self) -> Vec { - let mut errors = vec![]; - for err in &self.errors { - errors.push(err.to_string()); - } - - errors - } - fn start_node(&self) -> Node { let token = self.cur_token(); Node::new(token.start, 0) @@ -115,12 +105,27 @@ impl Parser { self.cur_token.kind } - fn peek_token(&mut self) -> Result { - self.lexer.peek_token() + fn peek_token(&mut self) -> Result { + match self.lexer.peek_token() { + Ok(token) => Ok(token), + Err(err) => { + let pos = self.cur_token.end; + let line_number = self.get_line_number_of_character_position(pos); + let err = ParsingError::InvalidSyntax { + path: Box::from(self.path.as_str()), + msg: Box::from(format!("Syntax error: {:?}", err)), + line: line_number, + input: self.curr_line_string.clone(), + advice: "".to_string(), + span: (pos, pos), + }; + Err(err) + } + } } - fn peek_kind(&mut self) -> Result { - let token = self.lexer.peek_token()?; + fn peek_kind(&mut self) -> Result { + let token = self.peek_token()?; Ok(token.kind) } @@ -157,7 +162,8 @@ impl Parser { self.curr_line_string.clear(); self.curr_line_number += 1; } else { - self.curr_line_string.push_str(&self.source[self.prev_token_end..self.cur_token.end]); + self.curr_line_string + .push_str(&self.source[self.prev_token_end..self.cur_token.end]); } match token { Err(err) => { @@ -171,12 +177,17 @@ impl Parser { } } + fn advance_to_next_line_or_semicolon(&mut self) { + while !self.eat(Kind::NewLine) && !self.eat(Kind::SemiColon) && !self.at(Kind::Eof) { + self.advance(); + } + } + /// Expect a `Kind` or return error - pub fn expect(&mut self, kind: Kind) -> Result<()> { + pub fn expect(&mut self, kind: Kind) -> Result<(), ParsingError> { if !self.at(kind) { let found = self.cur_token.kind; let node = self.start_node(); - self.bump_any(); let range = self.finish_node(node); let err = ParsingError::InvalidSyntax { path: Box::from(self.path.as_str()), @@ -186,24 +197,51 @@ impl Parser { advice: "maybe you forgot to put this character".to_string(), span: (range.start, range.end), }; - println!("errors until now: {:?}", self.errors); - panic!("{:#?}", err); - self.errors.push(err.into()); + self.advance_to_next_line_or_semicolon(); + return Err(err); } - self.advance(); + self.bump_any(); Ok(()) } - fn unepxted_token(&mut self, node: Node, kind: Kind) -> Result<()> { + fn unepxted_token(&mut self, node: Node, kind: Kind) -> Result<(), ParsingError> { self.bump_any(); let range = self.finish_node(node); let line_number = self.get_line_number_of_character_position(range.start); - Err(miette!( - "Unexpected token {:?} at line {} at position {}", - kind, - line_number, - range.start, - )) + let err = ParsingError::InvalidSyntax { + path: Box::from(self.path.as_str()), + msg: Box::from(format!("Unexpected token {:?}", kind)), + line: line_number, + input: self.curr_line_string.clone(), + advice: "".to_string(), + span: (range.start, range.end), + }; + Err(err) + } + + // write this like the expect function + fn unexpected_token_new(&mut self, node: Node, kinds: Vec, advice: &str) -> ParsingError { + let curr_kind = self.cur_kind(); + self.bump_any(); + let range = self.finish_node(node); + let line_number = self.curr_line_number; + let mut expected = String::new(); + for kind in kinds { + expected.push_str(&format!("{:?}, ", kind)); + } + let err = ParsingError::InvalidSyntax { + path: Box::from(self.path.as_str()), + msg: Box::from(format!( + "Expected one of {:?} but found {:?}", + expected, + self.cur_kind() + )), + line: line_number, + input: self.curr_line_string.clone(), + advice: advice.to_string(), + span: (range.start, range.end), + }; + err } fn get_line_number_of_character_position(&self, pos: usize) -> u32 { @@ -219,7 +257,7 @@ impl Parser { line_number } - fn parse_simple_statement(&mut self) -> Result { + fn parse_simple_statement(&mut self) -> Result { let stmt = match self.cur_kind() { Kind::Assert => self.parse_assert_statement(), Kind::Pass => self.parse_pass_statement(), @@ -254,7 +292,7 @@ impl Parser { Ok(stmt) } - fn parse_compount_statement(&mut self) -> Result { + fn parse_compount_statement(&mut self) -> Result { let stmt = match self.cur_kind() { Kind::If => self.parse_if_statement(), Kind::While => self.parse_while_statement(), @@ -285,17 +323,25 @@ impl Parser { } _ => { let range = self.finish_node(self.start_node()); - Err( - diagnostics::ExpectToken("compound statement", self.cur_kind().to_str(), range) - .into(), - ) + Err(ParsingError::InvalidSyntax { + path: Box::from(self.path.as_str()), + msg: Box::from("Expected compound statement"), + line: self.curr_line_number, + input: self.curr_line_string.clone(), + advice: "maybe you forgot to put this character".to_string(), + span: (range.start, range.end), + }) } }; stmt } - fn err_if_statement_not_ending_in_new_line_or_semicolon(&mut self, node: Node, stmt: Statement) { + fn err_if_statement_not_ending_in_new_line_or_semicolon( + &mut self, + node: Node, + stmt: Statement, + ) { while self.eat(Kind::WhiteSpace) || self.eat(Kind::Comment) {} if !matches!(self.cur_kind(), Kind::NewLine | Kind::SemiColon | Kind::Eof) { @@ -307,14 +353,15 @@ impl Parser { msg: Box::from("Statement does not end in new line or semicolon"), line: self.curr_line_number, input: self.curr_line_string.clone(), - advice: "Split the statements into two seperate lines or add a semicolon".to_string(), + advice: "Split the statements into two seperate lines or add a semicolon" + .to_string(), span: (node.start, node.end), }; - self.errors.push(err.into()); + self.errors.push(err); } } - fn parse_if_statement(&mut self) -> Result { + fn parse_if_statement(&mut self) -> Result { self.bump(Kind::If); let node = self.start_node(); let test = Box::new(self.parse_named_expression()?); @@ -369,7 +416,7 @@ impl Parser { })) } - fn parse_while_statement(&mut self) -> Result { + fn parse_while_statement(&mut self) -> Result { let node = self.start_node(); self.bump(Kind::While); let test = Box::new(self.parse_named_expression()?); @@ -393,12 +440,14 @@ impl Parser { })) } - fn parse_for_statement(&mut self) -> Result { + fn parse_for_statement(&mut self) -> Result { let node = self.start_node(); let is_async = self.eat(Kind::Async); self.bump(Kind::For); let target = Box::new(self.parse_target_list()?); self.expect(Kind::In)?; + // TODO: I think this would not work for: + // for a in [1, 2, 3]: let iter_list = self.parse_starred_list(Kind::Colon)?; let iter = if iter_list.len() > 1 { Box::new(Expression::Tuple(Box::new(Tuple { @@ -408,12 +457,7 @@ impl Parser { } else if iter_list.len() == 1 { Box::new(iter_list.into_iter().next().unwrap()) } else { - return Err(diagnostics::ExpectToken( - "exptected iterator in for loop", - self.cur_kind().to_str(), - self.finish_node(node), - ) - .into()); + return Err(self.unexpected_token_new(node, vec![], "Expected expression")); }; self.expect(Kind::Colon)?; let body = self.parse_suite()?; @@ -445,7 +489,7 @@ impl Parser { } } - fn parse_with_statement(&mut self) -> Result { + fn parse_with_statement(&mut self) -> Result { let node = self.start_node(); let is_async = self.eat(Kind::Async); self.bump(Kind::With); @@ -470,7 +514,7 @@ impl Parser { } } - fn parse_with_items(&mut self) -> Result> { + fn parse_with_items(&mut self) -> Result, ParsingError> { let mut items = vec![]; if self.eat(Kind::LeftParen) { @@ -489,7 +533,7 @@ impl Parser { Ok(items) } - fn parse_with_item(&mut self) -> Result { + fn parse_with_item(&mut self) -> Result { let node = self.start_node(); let context_expr = Box::new(self.parse_expression_2()?); let optional_vars = if self.eat(Kind::As) { @@ -505,7 +549,7 @@ impl Parser { }) } - fn parse_try_statement(&mut self) -> Result { + fn parse_try_statement(&mut self) -> Result { let node = self.start_node(); let mut is_try_star = false; self.bump(Kind::Try); @@ -552,7 +596,7 @@ impl Parser { } } - fn parse_except_clauses(&mut self) -> Result> { + fn parse_except_clauses(&mut self) -> Result, ParsingError> { let mut handlers = vec![]; while self.at(Kind::Except) { let node = self.start_node(); @@ -585,7 +629,10 @@ impl Parser { Ok(handlers) } - fn parse_function_definition(&mut self, decorators: Vec) -> Result { + fn parse_function_definition( + &mut self, + decorators: Vec, + ) -> Result { // TODO: node excludes decorators // later we can extract the node for first decorator // and start the node from there @@ -631,7 +678,7 @@ impl Parser { } } - fn parse_decorated_function_def_or_class_def(&mut self) -> Result { + fn parse_decorated_function_def_or_class_def(&mut self) -> Result { let mut decorators = vec![]; while self.eat(Kind::MatrixMul) { let name = self.parse_named_expression()?; @@ -646,7 +693,10 @@ impl Parser { } } - fn parse_class_definition(&mut self, decorators: Vec) -> Result { + fn parse_class_definition( + &mut self, + decorators: Vec, + ) -> Result { // TODO: node excludes decorators // later we can extract the node for first decorator // and start the node from there @@ -675,7 +725,7 @@ impl Parser { } // https://peps.python.org/pep-0622/#appendix-a-full-grammar - fn parse_match_statement(&mut self) -> Result { + fn parse_match_statement(&mut self) -> Result { let node = self.start_node(); // This identifier is match word // match is a soft keyword @@ -695,14 +745,14 @@ impl Parser { // This is inaccuracy, but I don't know how // the grammar should be - fn parse_subject(&mut self) -> Result { + fn parse_subject(&mut self) -> Result { self.parse_star_named_expressions() } // star named expresison is similar to starred expression // but it does not accept expression as a value // https://docs.python.org/3/reference/grammar.html - fn parse_star_named_expression(&mut self) -> Result { + fn parse_star_named_expression(&mut self) -> Result { if self.at(Kind::Mul) { self.parse_or_expr() } else { @@ -710,7 +760,7 @@ impl Parser { } } - fn parse_star_named_expressions(&mut self) -> Result { + fn parse_star_named_expressions(&mut self) -> Result { let node = self.start_node(); let mut exprs = vec![self.parse_star_named_expression()?]; loop { @@ -730,7 +780,7 @@ impl Parser { } } - fn parse_cases(&mut self) -> Result> { + fn parse_cases(&mut self) -> Result, ParsingError> { let mut cases = vec![]; loop { if self.at(Kind::Dedent) || self.at(Kind::Eof) { @@ -759,7 +809,7 @@ impl Parser { Ok(cases) } - fn parse_guard(&mut self) -> Result { + fn parse_guard(&mut self) -> Result { self.expect(Kind::If)?; self.parse_named_expression() } @@ -767,7 +817,7 @@ impl Parser { // https://docs.python.org/3/reference/compound_stmts.html#grammar-token-python-grammar-patterns // The open sequence pattern is either a pattern or maybe star pattern // Here we expect at least one ( pattern or maybe star pattern ) - fn parse_patterns(&mut self) -> Result { + fn parse_patterns(&mut self) -> Result { let mut patterns = self.parse_open_sequence_pattern()?; if patterns.len() == 1 { @@ -777,7 +827,7 @@ impl Parser { } } - fn parse_pattern(&mut self) -> Result { + fn parse_pattern(&mut self) -> Result { let or_pattern = self.parse_or_pattern()?; if self.at(Kind::As) { @@ -794,7 +844,7 @@ impl Parser { } } - fn parse_or_pattern(&mut self) -> Result { + fn parse_or_pattern(&mut self) -> Result { let mut patterns = vec![]; patterns.push(self.parse_closed_pattern()?); loop { @@ -810,7 +860,7 @@ impl Parser { } } - fn parse_closed_pattern(&mut self) -> Result { + fn parse_closed_pattern(&mut self) -> Result { match self.cur_kind() { Kind::LeftParen => self.parse_sequence_pattern(), Kind::LeftBrace => self.parse_sequence_pattern(), @@ -852,18 +902,40 @@ impl Parser { }, _ => { let node = self.start_node(); - let msg = format!("unexpected token {:?}", self.cur_token().value); - self.bump_any(); - Err(diagnostics::InvalidSyntax( - msg, - self.finish_node(node), - ).into()) + Err(self.unexpected_token_new( + node, + vec![ + Kind::LeftParen, + Kind::LeftBrace, + Kind::LeftBracket, + Kind::Identifier, + Kind::Integer, + Kind::Binary, + Kind::Octal, + Kind::Hexadecimal, + Kind::PointFloat, + Kind::ExponentFloat, + Kind::ImaginaryInteger, + Kind::ImaginaryPointFloat, + Kind::ImaginaryExponentFloat, + Kind::None, + Kind::True, + Kind::False, + Kind::StringLiteral, + Kind::RawBytes, + Kind::Bytes, + Kind::RawString, + Kind::Minus, + Kind::Plus, + ], + "A match pattern starts with these characters", + )) }, } } // https://docs.python.org/3/reference/compound_stmts.html#literal-patterns - fn parse_literal_pattern(&mut self) -> Result { + fn parse_literal_pattern(&mut self) -> Result { let node = self.start_node(); let value = Box::new(self.parse_binary_arithmetic_operation()?); Ok(MatchPattern::MatchValue(MatchValue { @@ -872,7 +944,7 @@ impl Parser { })) } - fn parse_capture_or_wildcard_pattern(&mut self) -> Result { + fn parse_capture_or_wildcard_pattern(&mut self) -> Result { let capture_value = self.cur_token().value.to_string().clone(); let node = self.start_node(); self.expect(Kind::Identifier)?; @@ -896,7 +968,11 @@ impl Parser { // https://docs.python.org/3/reference/compound_stmts.html#value-patterns // This pattern shares the value logic with class pattern // so we pass that part to this method - fn parse_value_pattern(&mut self, value: Expression, node: Node) -> Result { + fn parse_value_pattern( + &mut self, + value: Expression, + node: Node, + ) -> Result { Ok(MatchPattern::MatchValue(MatchValue { node: self.finish_node(node), value: Box::new(value), @@ -905,7 +981,7 @@ impl Parser { // This parse attr does not allow anything other than names // in contrast to attribute parsing in primary expression - fn parse_attr(&mut self) -> Result { + fn parse_attr(&mut self) -> Result { let node = self.start_node(); let value = self.cur_token().value.to_string().clone(); let mut expr = Ok(Expression::Name(Box::new(Name { @@ -927,14 +1003,14 @@ impl Parser { // TODO: This has precedence over sequence pattern but I'm not sure // what is the right way to use it. - fn parse_group_pattern(&mut self) -> Result { + fn parse_group_pattern(&mut self) -> Result { self.expect(Kind::LeftParen)?; let pattern = self.parse_pattern()?; self.expect(Kind::RightParen)?; Ok(pattern) } - fn parse_mapping_pattern(&mut self) -> Result { + fn parse_mapping_pattern(&mut self) -> Result { let node = self.start_node(); self.expect(Kind::LeftBracket)?; let mut keys = vec![]; @@ -974,7 +1050,7 @@ impl Parser { })) } - fn parse_literal_or_value_pattern(&mut self) -> Result { + fn parse_literal_or_value_pattern(&mut self) -> Result { if self.cur_kind() == Kind::Identifier && !matches!(self.peek_kind(), Ok(Kind::Colon)) { let node = self.start_node(); let value = self.parse_attr()?; @@ -984,7 +1060,10 @@ impl Parser { } } - fn parse_class_pattern(&mut self, class_name: Expression) -> Result { + fn parse_class_pattern( + &mut self, + class_name: Expression, + ) -> Result { let node = self.start_node(); let class = Box::new(class_name); self.expect(Kind::LeftParen)?; @@ -1005,11 +1084,14 @@ impl Parser { kwd_patterns.push(self.parse_pattern()?); } else { if seen_keyword_pattern { - return Err(diagnostics::InvalidSyntax( - "positional pattern cannot follow keyword pattern".to_string(), - self.finish_node(node), - ) - .into()); + return Err(ParsingError::InvalidSyntax { + path: Box::from(self.path.as_str()), + msg: Box::from("Positional arguments cannot come after keyword arguments."), + line: self.curr_line_number, + input: self.curr_line_string.clone(), + advice: "you can only use arguments in form a=b here.".to_string(), + span: (node.start, node.end), + }); } patterns.push(self.parse_pattern()?); } @@ -1026,7 +1108,7 @@ impl Parser { })) } - fn parse_sequence_pattern(&mut self) -> Result { + fn parse_sequence_pattern(&mut self) -> Result { let node = self.start_node(); if self.eat(Kind::LeftBrace) { let pattern = self.parse_maybe_sequence_pattern()?; @@ -1037,15 +1119,15 @@ impl Parser { self.expect(Kind::RightParen)?; Ok(MatchPattern::MatchSequence(pattern)) } else { - return Err(diagnostics::InvalidSyntax( - "Expected a sequence pattern".to_string(), - self.finish_node(node), - ) - .into()); + Err(self.unexpected_token_new( + node, + vec![Kind::LeftBrace, Kind::LeftParen], + "Write a sequence pattern here", + )) } } - fn parse_open_sequence_pattern(&mut self) -> Result> { + fn parse_open_sequence_pattern(&mut self) -> Result, ParsingError> { let mut patterns = vec![]; patterns.push(self.parse_maybe_star_patern()?); loop { @@ -1057,7 +1139,7 @@ impl Parser { Ok(patterns) } - fn parse_maybe_sequence_pattern(&mut self) -> Result> { + fn parse_maybe_sequence_pattern(&mut self) -> Result, ParsingError> { let mut patterns = vec![]; loop { if self.at(Kind::RightBrace) { @@ -1070,7 +1152,7 @@ impl Parser { } Ok(patterns) } - fn parse_maybe_star_patern(&mut self) -> Result { + fn parse_maybe_star_patern(&mut self) -> Result { if self.eat(Kind::Mul) { self.parse_capture_or_wildcard_pattern() } else { @@ -1078,28 +1160,14 @@ impl Parser { } } - fn parse_assignment_or_expression_statement(&mut self) -> Result { + fn parse_assignment_or_expression_statement(&mut self) -> Result { let node = self.start_node(); let lhs = self.parse_expression()?; if self.cur_kind() == Kind::Assign { self.parse_assignment_statement(node, lhs) - } else if matches!( - self.cur_kind(), - Kind::AddAssign - | Kind::SubAssign - | Kind::MulAssign - | Kind::DivAssign - | Kind::IntDivAssign - | Kind::ModAssign - | Kind::PowAssign - | Kind::BitAndAssign - | Kind::BitOrAssign - | Kind::BitXorAssign - | Kind::ShiftLeftAssign - | Kind::ShiftRightAssign - ) { - self.parse_aug_assignment_statement(node, lhs) + } else if let Some(op) = self.parse_aug_assign_op() { + self.parse_aug_assignment_statement(node, lhs, op) } else if self.at(Kind::Colon) { self.parse_ann_assign_statement(node, lhs) } else { @@ -1108,7 +1176,7 @@ impl Parser { } // https://docs.python.org/3/reference/compound_stmts.html#grammar-token-python-grammar-suite - fn parse_suite(&mut self) -> Result> { + fn parse_suite(&mut self) -> Result, ParsingError> { let stmts = if self.eat(Kind::NewLine) { self.consume_whitespace_and_newline(); self.expect(Kind::Indent)?; @@ -1130,7 +1198,7 @@ impl Parser { } // https://docs.python.org/3/reference/compound_stmts.html#grammar-token-python-grammar-statement - fn parse_statement(&mut self) -> Result> { + fn parse_statement(&mut self) -> Result, ParsingError> { if is_at_compound_statement(self.cur_token()) { let comp_stmt = self.parse_compount_statement()?; Ok(vec![comp_stmt]) @@ -1140,7 +1208,7 @@ impl Parser { } // https://docs.python.org/3/reference/simple_stmts.html#grammar-token-python-grammar-stmt-list - fn parse_statement_list(&mut self) -> Result> { + fn parse_statement_list(&mut self) -> Result, ParsingError> { let mut stmts = vec![]; let stmt = self.parse_simple_statement()?; stmts.push(stmt); @@ -1151,7 +1219,7 @@ impl Parser { Ok(stmts) } - fn parse_del_statement(&mut self) -> Result { + fn parse_del_statement(&mut self) -> Result { let node = self.start_node(); self.bump(Kind::Del); let expr = self.parse_target_list()?; @@ -1169,7 +1237,11 @@ impl Parser { })) } - fn parse_assignment_statement(&mut self, start: Node, lhs: Expression) -> Result { + fn parse_assignment_statement( + &mut self, + start: Node, + lhs: Expression, + ) -> Result { let mut targets = vec![lhs]; self.bump(Kind::Assign); let value = loop { @@ -1195,8 +1267,8 @@ impl Parser { &mut self, start: Node, lhs: Expression, - ) -> Result { - let op = self.parse_aug_assign_op()?; + op: AugAssignOp, + ) -> Result { let value = self.parse_assignment_value()?; Ok(Statement::AugAssignStatement(AugAssign { @@ -1207,7 +1279,11 @@ impl Parser { })) } - fn parse_ann_assign_statement(&mut self, start: Node, lhs: Expression) -> Result { + fn parse_ann_assign_statement( + &mut self, + start: Node, + lhs: Expression, + ) -> Result { self.bump(Kind::Colon); let annotation = self.parse_expression_2()?; let value = if self.eat(Kind::Assign) { @@ -1227,14 +1303,14 @@ impl Parser { // The value is either expression list or yield expression // https://docs.python.org/3/reference/simple_stmts.html#assignment-statements - fn parse_assignment_value(&mut self) -> Result { + fn parse_assignment_value(&mut self) -> Result { if self.cur_kind() == Kind::Yield { return self.parse_yield_expression(); } self.parse_expression_list() } - fn parse_aug_assign_op(&mut self) -> Result { + fn parse_aug_assign_op(&mut self) -> Option { let op = match self.cur_kind() { Kind::AddAssign => AugAssignOp::Add, Kind::SubAssign => AugAssignOp::Sub, @@ -1248,20 +1324,13 @@ impl Parser { Kind::BitXorAssign => AugAssignOp::BitXor, Kind::ShiftLeftAssign => AugAssignOp::LShift, Kind::ShiftRightAssign => AugAssignOp::RShift, - _ => { - return Err(diagnostics::ExpectToken( - "augmented assignment", - self.cur_kind().to_str(), - self.finish_node(self.start_node()), - ) - .into()); - } + _ => return None, }; self.bump_any(); - Ok(op) + Some(op) } - fn parse_assert_statement(&mut self) -> Result { + fn parse_assert_statement(&mut self) -> Result { let node = self.start_node(); self.bump(Kind::Assert); let test = self.parse_expression_2()?; @@ -1278,7 +1347,7 @@ impl Parser { })) } - fn parse_pass_statement(&mut self) -> Result { + fn parse_pass_statement(&mut self) -> Result { let node = self.start_node(); self.bump(Kind::Pass); Ok(Statement::Pass(Pass { @@ -1286,7 +1355,7 @@ impl Parser { })) } - fn parse_return_statement(&mut self) -> Result { + fn parse_return_statement(&mut self) -> Result { let node = self.start_node(); self.bump(Kind::Return); let value = if self.at(Kind::NewLine) { @@ -1301,7 +1370,7 @@ impl Parser { } // https://docs.python.org/3/reference/simple_stmts.html#the-raise-statement - fn parse_raise_statement(&mut self) -> Result { + fn parse_raise_statement(&mut self) -> Result { let node = self.start_node(); self.bump(Kind::Raise); let exc = if matches!(self.cur_kind(), Kind::NewLine | Kind::Eof) { @@ -1322,7 +1391,7 @@ impl Parser { } // https://docs.python.org/3/reference/simple_stmts.html#the-break-statement - fn parse_break_statement(&mut self) -> Result { + fn parse_break_statement(&mut self) -> Result { let node = self.start_node(); self.bump(Kind::Break); Ok(Statement::Break(Break { @@ -1331,7 +1400,7 @@ impl Parser { } // https://docs.python.org/3/reference/simple_stmts.html#the-continue-statement - fn parse_continue_statement(&mut self) -> Result { + fn parse_continue_statement(&mut self) -> Result { let node = self.start_node(); self.bump(Kind::Continue); Ok(Statement::Continue(Continue { @@ -1340,7 +1409,7 @@ impl Parser { } // https://docs.python.org/3/reference/simple_stmts.html#the-global-statement - fn parse_global_statement(&mut self) -> Result { + fn parse_global_statement(&mut self) -> Result { let node = self.start_node(); self.bump(Kind::Global); let mut names = vec![]; @@ -1359,7 +1428,7 @@ impl Parser { } // https://docs.python.org/3/reference/simple_stmts.html#the-nonlocal-statement - fn parse_nonlocal_statement(&mut self) -> Result { + fn parse_nonlocal_statement(&mut self) -> Result { let node = self.start_node(); self.bump(Kind::Nonlocal); let mut names = vec![]; @@ -1378,13 +1447,13 @@ impl Parser { } // https://docs.python.org/3/reference/simple_stmts.html#the-import-statement - fn parse_import_statement(&mut self) -> Result { + fn parse_import_statement(&mut self) -> Result { let node = self.start_node(); self.bump(Kind::Import); let mut aliases = vec![]; while self.at(Kind::Identifier) { let node = self.start_node(); - let (module, _) = self.parse_module_name(); + let (module, _) = self.parse_module_name()?; let alias = self.parse_alias(module, node); aliases.push(alias); @@ -1399,10 +1468,10 @@ impl Parser { } // https://docs.python.org/3/reference/simple_stmts.html#the-from-import-statement - fn parse_from_import_statement(&mut self) -> Result { + fn parse_from_import_statement(&mut self) -> Result { let import_node = self.start_node(); self.bump(Kind::From); - let (module, level) = self.parse_module_name(); + let (module, level) = self.parse_module_name()?; self.bump(Kind::Import); let mut aliases = vec![]; if self.eat(Kind::LeftParen) { @@ -1432,7 +1501,11 @@ impl Parser { aliases.push(self.parse_alias("*".to_string(), self.start_node())); self.bump(Kind::Mul); } else { - return Err(self.unepxted_token(import_node, self.cur_kind()).unwrap_err()); + return Err(self.unexpected_token_new( + import_node, + vec![Kind::Identifier, Kind::Mul], + "Use * for importing everthing or use () to specify names to import or specify the name you want to import" + )); } Ok(Statement::ImportFrom(ImportFrom { node: self.finish_node(import_node), @@ -1457,7 +1530,7 @@ impl Parser { } } - fn parse_module_name(&mut self) -> (String, usize) { + fn parse_module_name(&mut self) -> Result<(String, usize), ParsingError> { let mut level = 0; while self.at(Kind::Dot) | self.at(Kind::Ellipsis) { match self.cur_kind() { @@ -1467,22 +1540,28 @@ impl Parser { Kind::Ellipsis => { level += 3; } - _ => unreachable!(), + _ => { + return Err(self.unexpected_token_new( + self.start_node(), + vec![Kind::Dot, Kind::Ellipsis], + "use . or ... to specify relative import", + )) + } } self.bump_any(); } let mut module = self.cur_token().value.to_string(); - self.bump(Kind::Identifier); + self.expect(Kind::Identifier); while self.eat(Kind::Dot) { module.push('.'); module.push_str(self.cur_token().value.to_string().as_str()); - self.bump(Kind::Identifier); + self.expect(Kind::Identifier); } - (module, level) + Ok((module, level)) } // https://docs.python.org/3/library/ast.html#ast.Expr - fn parse_expression(&mut self) -> Result { + fn parse_expression(&mut self) -> Result { let node = self.start_node(); let expr = self.parse_expression_2()?; @@ -1506,7 +1585,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#conditional-expressions - fn parse_conditional_expression(&mut self) -> Result { + fn parse_conditional_expression(&mut self) -> Result { let or_test = self.parse_or_test(); if self.eat(Kind::If) { let test = self.parse_or_test()?; @@ -1524,7 +1603,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#assignment-expressions - fn parse_named_expression(&mut self) -> Result { + fn parse_named_expression(&mut self) -> Result { let node = self.start_node(); if self.at(Kind::Identifier) && matches!(self.peek_kind()?, Kind::Walrus) { let identifier = self.cur_token().value.to_string(); @@ -1553,7 +1632,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#list-displays - fn parse_list(&mut self) -> Result { + fn parse_list(&mut self) -> Result { let node = self.start_node(); self.bump(Kind::LeftBrace); if self.eat(Kind::RightBrace) { @@ -1564,7 +1643,11 @@ impl Parser { } let started_with_star = self.at(Kind::Mul); let first_elm = self.parse_star_named_expression()?; - if !started_with_star && self.at(Kind::For) && (self.at(Kind::For) || self.at(Kind::Async) && matches!(self.peek_kind(), Ok(Kind::For))) { + if !started_with_star + && self.at(Kind::For) + && (self.at(Kind::For) + || self.at(Kind::Async) && matches!(self.peek_kind(), Ok(Kind::For))) + { let generators = self.parse_comp_for()?; self.expect(Kind::RightBrace)?; return Ok(Expression::ListComp(Box::new(ListComp { @@ -1585,7 +1668,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#parenthesized-forms - fn parse_paren_form_or_generator(&mut self) -> Result { + fn parse_paren_form_or_generator(&mut self) -> Result { let node = self.start_node(); self.expect(Kind::LeftParen)?; if self.at(Kind::RightParen) { @@ -1632,7 +1715,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#displays-for-lists-sets-and-dictionaries - fn parse_comp_for(&mut self) -> Result> { + fn parse_comp_for(&mut self) -> Result, ParsingError> { // if current token is async let is_async = self.eat(Kind::Async); @@ -1670,7 +1753,7 @@ impl Parser { } // https://docs.python.org/3/reference/simple_stmts.html#grammar-token-python-grammar-target_list - fn parse_target_list(&mut self) -> Result { + fn parse_target_list(&mut self) -> Result { let node = self.start_node(); let mut targets = vec![]; loop { @@ -1690,7 +1773,7 @@ impl Parser { } // https://docs.python.org/3/reference/simple_stmts.html#grammar-token-python-grammar-target - fn parse_target(&mut self) -> Result { + fn parse_target(&mut self) -> Result { let node = self.start_node(); let mut targets = vec![]; let target = match self.cur_kind() { @@ -1779,7 +1862,7 @@ impl Parser { } } - fn parse_dict_or_set(&mut self) -> Result { + fn parse_dict_or_set(&mut self) -> Result { let node = self.start_node(); self.bump(Kind::LeftBracket); if self.eat(Kind::RightBracket) { @@ -1801,8 +1884,12 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#set-displays - fn parse_set(&mut self, node: Node, first_elm: Expression) -> Result { - if !matches!(first_elm, Expression::Starred(_)) && self.at(Kind::For) && (self.at(Kind::For) || self.at(Kind::Async) && matches!(self.peek_kind(), Ok(Kind::For))) { + fn parse_set(&mut self, node: Node, first_elm: Expression) -> Result { + if !matches!(first_elm, Expression::Starred(_)) + && self.at(Kind::For) + && (self.at(Kind::For) + || self.at(Kind::Async) && matches!(self.peek_kind(), Ok(Kind::For))) + { let generators = self.parse_comp_for()?; self.consume_whitespace_and_newline(); self.expect(Kind::RightBracket)?; @@ -1824,7 +1911,11 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#dictionary-displays - fn parse_dict(&mut self, node: Node, first_key: Expression) -> Result { + fn parse_dict( + &mut self, + node: Node, + first_key: Expression, + ) -> Result { self.expect(Kind::Colon)?; let first_val = self.parse_expression_2()?; if self.at(Kind::For) || self.at(Kind::Async) && matches!(self.peek_kind(), Ok(Kind::For)) { @@ -1877,7 +1968,10 @@ impl Parser { // termination_kind is used to know when to stop parsing the list // for example to parse a tuple the termination_kind is Kind::RightParen // caller is responsible to consume the first & last occurrence of the termination_kind - fn parse_starred_list(&mut self, termination_kind: Kind) -> Result> { + fn parse_starred_list( + &mut self, + termination_kind: Kind, + ) -> Result, ParsingError> { let mut expressions = vec![]; while !self.at(Kind::Eof) && !self.at(termination_kind) { if self.eat(Kind::Comment) || self.consume_whitespace_and_newline() { @@ -1893,14 +1987,14 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#expression-lists - fn parse_starred_item(&mut self) -> Result { + fn parse_starred_item(&mut self) -> Result { let mut node = self.start_node(); if self.eat(Kind::Mul) { let starred_value_kind = self.cur_kind(); let expr = self.parse_or_expr()?; node = self.finish_node(node); if !is_iterable(&expr) { - self.unepxted_token(node, starred_value_kind, ); + self.unepxted_token(node, starred_value_kind); } return Ok(Expression::Starred(Box::new(Starred { node: self.finish_node(node), @@ -1911,7 +2005,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#conditional-expressions - fn parse_expression_2(&mut self) -> Result { + fn parse_expression_2(&mut self) -> Result { let node = self.start_node(); if self.eat(Kind::Lambda) { let params_list = self.parse_parameters(true).expect("lambda params"); @@ -1928,7 +2022,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#boolean-operations - fn parse_or_test(&mut self) -> Result { + fn parse_or_test(&mut self) -> Result { let node = self.start_node(); let lhs = self.parse_and_test()?; if self.eat(Kind::Or) { @@ -1943,7 +2037,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#boolean-operations - fn parse_and_test(&mut self) -> Result { + fn parse_and_test(&mut self) -> Result { let node = self.start_node(); let lhs = self.parse_not_test()?; if self.at(Kind::And) { @@ -1959,7 +2053,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#boolean-operations - fn parse_not_test(&mut self) -> Result { + fn parse_not_test(&mut self) -> Result { let node = self.start_node(); if self.at(Kind::Not) { self.bump(Kind::Not); @@ -1974,7 +2068,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#comparisons - fn parse_comparison(&mut self) -> Result { + fn parse_comparison(&mut self) -> Result { let node = self.start_node(); let or_expr = self.parse_or_expr()?; let mut ops = vec![]; @@ -1998,7 +2092,7 @@ impl Parser { // Binary bitwise operations // https://docs.python.org/3/reference/expressions.html#binary-bitwise-operations - fn parse_or_expr(&mut self) -> Result { + fn parse_or_expr(&mut self) -> Result { let node = self.start_node(); let xor_expr = self.parse_xor_expr()?; if self.eat(Kind::BitOr) { @@ -2014,7 +2108,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#binary-bitwise-operations - fn parse_xor_expr(&mut self) -> Result { + fn parse_xor_expr(&mut self) -> Result { let node = self.start_node(); let and_expr = self.parse_and_expr()?; if self.eat(Kind::BitXor) { @@ -2030,7 +2124,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#binary-bitwise-operations - fn parse_and_expr(&mut self) -> Result { + fn parse_and_expr(&mut self) -> Result { let node = self.start_node(); let shift_expr = self.parse_shift_expr()?; @@ -2047,7 +2141,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#shifting-operations - fn parse_shift_expr(&mut self) -> Result { + fn parse_shift_expr(&mut self) -> Result { let node = self.start_node(); let arith_expr = self.parse_binary_arithmetic_operation()?; if self.at(Kind::LeftShift) || self.at(Kind::RightShift) { @@ -2069,7 +2163,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#binary-arithmetic-operations - fn parse_binary_arithmetic_operation(&mut self) -> Result { + fn parse_binary_arithmetic_operation(&mut self) -> Result { let node = self.start_node(); let lhs = self.parse_unary_arithmetric_operation()?; if is_bin_arithmetic_op(&self.cur_kind()) { @@ -2086,7 +2180,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#unary-arithmetic-and-bitwise-operations - fn parse_unary_arithmetric_operation(&mut self) -> Result { + fn parse_unary_arithmetric_operation(&mut self) -> Result { let node = self.start_node(); if is_unary_op(&self.cur_kind()) { let op = map_unary_operator(&self.cur_kind()); @@ -2102,7 +2196,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#the-power-operator - fn parse_power_expression(&mut self) -> Result { + fn parse_power_expression(&mut self) -> Result { let node = self.start_node(); let base = if self.at(Kind::Await) { self.bump(Kind::Await); @@ -2129,7 +2223,7 @@ impl Parser { // https://docs.python.org/3/reference/expressions.html#primaries // primaries can be chained together, when they are chained the base is the previous primary - fn parse_primary(&mut self, base: Option) -> Result { + fn parse_primary(&mut self, base: Option) -> Result { let node = self.start_node(); let mut atom_or_primary = if base.is_some() { base.unwrap() @@ -2139,7 +2233,6 @@ impl Parser { return Err(self.unepxted_token(node, self.cur_kind()).err().unwrap()); }; - let mut primary = if self.at(Kind::Dot) { // TODO: does not handle cases like a.b[0].c self.parse_atribute_ref(node, atom_or_primary) @@ -2159,12 +2252,7 @@ impl Parser { } if self.at(Kind::Identifier) && matches!(self.peek_kind(), Ok(Kind::Assign)) { seen_keyword = true; - let keyword_arg = match self.parse_keyword_item() { - Ok(keyword_arg) => keyword_arg, - Err(_) => { - bail!("Expected keyword argument but found {}, at {:?}", self.cur_kind().to_str(), self.cur_token().start); - } - }; + let keyword_arg = self.parse_keyword_item()?; keyword_args.push(keyword_arg); } else if self.at(Kind::Mul) { let star_arg_node = self.start_node(); @@ -2186,13 +2274,17 @@ impl Parser { keyword_args.push(kwarg); } else { if seen_keyword { - // TODO change to synatx error - return Err(diagnostics::ExpectToken( - "Positional argument after keyword argument", - self.cur_kind().to_str(), - self.finish_node(self.start_node()), - ) - .into()); + let node_end = self.finish_node(node); + return Err(ParsingError::InvalidSyntax { + path: Box::from(self.path.as_str()), + msg: Box::from( + "Positional arguments cannot come after keyword arguments.", + ), + line: self.curr_line_number, + input: self.curr_line_string.clone(), + advice: "you can only use arguments in form a=b here.".to_string(), + span: (node_end.start, node_end.end), + }); } let arg = self.parse_named_expression()?; positional_args.push(arg); @@ -2217,7 +2309,10 @@ impl Parser { Ok(atom_or_primary) }; - if matches!(self.cur_kind(), Kind::LeftBrace | Kind::LeftParen | Kind::Dot) { + if matches!( + self.cur_kind(), + Kind::LeftBrace | Kind::LeftParen | Kind::Dot + ) { primary = self.parse_primary(Some(primary?)); } @@ -2226,27 +2321,18 @@ impl Parser { // https://docs.python.org/3/reference/expressions.html#grammar-token-python-grammar-argument_list // returns args, keywords - fn parse_argument_list(&mut self) -> Result<(Vec, Vec)> { + fn parse_argument_list(&mut self) -> Result<(Vec, Vec), ParsingError> { let mut seen_keyword = false; let mut positional_args = vec![]; let mut keyword_args = vec![]; loop { + let node = self.start_node(); if self.at(Kind::RightParen) { break; } if self.at(Kind::Identifier) && matches!(self.peek_kind(), Ok(Kind::Assign)) { seen_keyword = true; - let keyword_arg = match self.parse_keyword_item() { - Ok(keyword_arg) => keyword_arg, - Err(_) => { - return Err(diagnostics::ExpectToken( - "Keyword argument", - self.cur_kind().to_str(), - self.finish_node(self.start_node()), - ) - .into()); - } - }; + let keyword_arg = self.parse_keyword_item()?; keyword_args.push(keyword_arg); } else if self.at(Kind::Mul) { let star_arg_node = self.start_node(); @@ -2268,13 +2354,15 @@ impl Parser { keyword_args.push(kwarg); } else { if seen_keyword { - // TODO change to synatx error - return Err(diagnostics::ExpectToken( - "Positional argument after keyword argument", - self.cur_kind().to_str(), - self.finish_node(self.start_node()), - ) - .into()); + let node_end = self.finish_node(node); + return Err(ParsingError::InvalidSyntax { + path: Box::from(self.path.as_str()), + msg: Box::from("Positional arguments cannot come after keyword arguments."), + line: self.curr_line_number, + input: self.curr_line_string.clone(), + advice: "you can only use arguments in form a=b here.".to_string(), + span: (node_end.start, node_end.end), + }); } let arg = self.parse_named_expression()?; positional_args.push(arg); @@ -2286,7 +2374,11 @@ impl Parser { Ok((positional_args, keyword_args)) } - fn parse_atribute_ref(&mut self, node: Node, value: Expression) -> Result { + fn parse_atribute_ref( + &mut self, + node: Node, + value: Expression, + ) -> Result { let mut expr = Ok(value); while self.eat(Kind::Dot) { let attr_val = self.cur_token().value.to_string(); @@ -2300,7 +2392,11 @@ impl Parser { expr } - fn parse_subscript(&mut self, node: Node, value: Expression) -> Result { + fn parse_subscript( + &mut self, + node: Node, + value: Expression, + ) -> Result { let mut expr = Ok(value); while self.eat(Kind::LeftBrace) { let slice = self.parse_slice_list()?; @@ -2314,7 +2410,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#atoms - fn parse_atom(&mut self) -> Result { + fn parse_atom(&mut self) -> Result { let node = self.start_node(); if self.at(Kind::Yield) { self.parse_yield_expression() @@ -2381,7 +2477,7 @@ impl Parser { } } - fn parse_identifier(&mut self) -> Result { + fn parse_identifier(&mut self) -> Result { let node = self.start_node(); let value = self.cur_token().value.to_string(); self.expect(Kind::Identifier)?; @@ -2392,7 +2488,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#yield-expressions - fn parse_yield_expression(&mut self) -> Result { + fn parse_yield_expression(&mut self) -> Result { let yield_node = self.start_node(); self.expect(Kind::Yield)?; @@ -2420,7 +2516,7 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#expression-lists - fn parse_expression_list(&mut self) -> Result { + fn parse_expression_list(&mut self) -> Result { let node = self.start_node(); let mut expressions = vec![]; expressions.push(self.parse_expression_2()?); @@ -2442,7 +2538,7 @@ impl Parser { &mut self, node: Node, first_elm: Expression, - ) -> Result { + ) -> Result { let mut elements = vec![]; // if tuple has one element but there's a comma after // it, it's a tuple @@ -2468,7 +2564,7 @@ impl Parser { // https://docs.python.org/3/reference/expressions.html#slicings // Clsoing will be consumed by this function - fn parse_slice_list(&mut self) -> Result { + fn parse_slice_list(&mut self) -> Result { let node = self.start_node(); let mut elements = vec![]; while !self.at(Kind::Eof) && !self.at(Kind::RightBrace) { @@ -2497,7 +2593,10 @@ impl Parser { } // https://docs.python.org/3/reference/expressions.html#slicings - fn parse_proper_slice(&mut self, lower: Option) -> Result { + fn parse_proper_slice( + &mut self, + lower: Option, + ) -> Result { let node = self.start_node(); let slice_lower = if lower.is_some() { @@ -2533,7 +2632,12 @@ impl Parser { }))) } - fn map_to_atom(&mut self, start: Node, kind: &Kind, value: TokenValue) -> Result { + fn map_to_atom( + &mut self, + start: Node, + kind: &Kind, + value: TokenValue, + ) -> Result { let atom = match kind { Kind::Identifier => Expression::Name(Box::new(Name { node: self.finish_node(start), @@ -2635,17 +2739,13 @@ impl Parser { value: ConstantValue::Ellipsis, })), _ => { - return Err(diagnostics::InvalidSyntax( - format!("unexpected token {:?}", kind), - self.finish_node(start), - ) - .into()) + return Err(self.unepxted_token(start, self.cur_kind()).err().unwrap()); } }; Ok(atom) } - fn parse_comp_operator(&mut self) -> Result { + fn parse_comp_operator(&mut self) -> Result { let node = self.start_node(); let op = match self.cur_kind() { Kind::Less => ComparisonOperator::Lt, @@ -2679,7 +2779,7 @@ impl Parser { Ok(op) } - fn parse_bin_arithmetic_op(&mut self) -> Result { + fn parse_bin_arithmetic_op(&mut self) -> Result { let op = match self.cur_kind() { Kind::Plus => Ok(BinaryOperator::Add), Kind::Minus => Ok(BinaryOperator::Sub), @@ -2689,17 +2789,20 @@ impl Parser { Kind::Mod => Ok(BinaryOperator::Mod), Kind::Pow => Ok(BinaryOperator::Pow), Kind::MatrixMul => Ok(BinaryOperator::MatMult), - _ => Err(self.unepxted_token(self.start_node(), self.cur_kind()).err().unwrap()), + _ => Err(self + .unepxted_token(self.start_node(), self.cur_kind()) + .err() + .unwrap()), }; self.bump_any(); - op + Ok(op?) } - fn parse_keyword_item(&mut self) -> Result { + fn parse_keyword_item(&mut self) -> Result { let node = self.start_node(); let arg = self.cur_token().value.to_string(); - self.bump(Kind::Identifier); - self.bump(Kind::Assign); + self.expect(Kind::Identifier); + self.expect(Kind::Assign); let value = Box::new(self.parse_expression_2()?); Ok(Keyword { node: self.finish_node(node), @@ -2708,7 +2811,7 @@ impl Parser { }) } - fn parse_parameters(&mut self, is_lambda: bool) -> Result { + fn parse_parameters(&mut self, is_lambda: bool) -> Result { let node = self.start_node(); let mut seen_vararg = false; let mut seen_kwarg = false; @@ -2728,11 +2831,14 @@ impl Parser { if seen_vararg { kwonlyargs.push(param); } else if seen_kwarg { - return Err(diagnostics::InvalidSyntax( - "parameter after kwarg".to_string(), - self.finish_node(node), - ) - .into()); + return Err(ParsingError::InvalidSyntax { + path: Box::from(self.path.as_str()), + msg: Box::from("positional argument follows keyword argument"), + line: self.curr_line_number, + input: self.curr_line_string.clone(), + advice: "you can only use arguments in form a=b here.".to_string(), + span: (self.cur_token().start, self.cur_token().end), + }); } else { args.push(param); } @@ -2744,11 +2850,14 @@ impl Parser { must_have_default = true; defaults.push(default_value); } else if must_have_default { - return Err(diagnostics::InvalidSyntax( - "non-default argument follows default argument".to_string(), - self.finish_node(node), - ) - .into()); + return Err(ParsingError::InvalidSyntax { + path: Box::from(self.path.as_str()), + msg: Box::from("non-default argument follows default argument"), + line: self.curr_line_number, + input: self.curr_line_string.clone(), + advice: "you can only use arguments with default value here.".to_string(), + span: (self.cur_token().start, self.cur_token().end), + }); } // If a parameter has a default value, all following parameters up until the “*” // must also have a default value — this is a syntactic restriction that is not expressed by the grammar. @@ -2760,11 +2869,14 @@ impl Parser { let (param, default) = self.parse_parameter(is_lambda)?; // default is not allowed for vararg if default.is_some() { - return Err(diagnostics::InvalidSyntax( - "var-positional argument cannot have default value".to_string(), - self.finish_node(node), - ) - .into()); + return Err(ParsingError::InvalidSyntax { + path: Box::from(self.path.as_str()), + msg: Box::from("var-positional argument cannot have default value"), + line: self.curr_line_number, + input: self.curr_line_string.clone(), + advice: "remove the default value of this argument".to_string(), + span: (self.cur_token().start, self.cur_token().end), + }); } vararg = Some(param); } else if self.eat(Kind::Pow) { @@ -2772,11 +2884,14 @@ impl Parser { let (param, default) = self.parse_parameter(is_lambda)?; // default is not allowed for kwarg if default.is_some() { - return Err(diagnostics::InvalidSyntax( - "var-keyword argument cannot have default value".to_string(), - self.finish_node(node), - ) - .into()); + return Err(ParsingError::InvalidSyntax { + path: Box::from(self.path.as_str()), + msg: Box::from("var-keyword argument cannot have default value"), + line: self.curr_line_number, + input: self.curr_line_string.clone(), + advice: "remove the default value of this argument".to_string(), + span: (self.cur_token().start, self.cur_token().end), + }); } kwarg = Some(param); } else if self.eat(Kind::Comma) { @@ -2809,7 +2924,10 @@ impl Parser { // || matches!(self.peek_kind(), Ok(Kind::Colon)) } - fn parse_parameter(&mut self, is_lambda: bool) -> Result<(Arg, Option)> { + fn parse_parameter( + &mut self, + is_lambda: bool, + ) -> Result<(Arg, Option), ParsingError> { let node = self.start_node(); let arg = self.cur_token().value.to_string(); self.bump(Kind::Identifier); @@ -2836,7 +2954,7 @@ impl Parser { } // the FStringStart token is consumed by the caller - fn parse_fstring(&mut self) -> Result> { + fn parse_fstring(&mut self) -> Result, ParsingError> { let mut expressions = vec![]; while self.cur_kind() != Kind::FStringEnd { match self.cur_kind() { @@ -2854,7 +2972,10 @@ impl Parser { self.expect(Kind::RightBracket)?; } _ => { - return Err(self.unepxted_token(self.start_node(), self.cur_kind()).err().unwrap()); + return Err(self + .unepxted_token(self.start_node(), self.cur_kind()) + .err() + .unwrap()); } } } @@ -2868,7 +2989,7 @@ mod tests { use std::fs; use super::*; - use insta::{assert_debug_snapshot, glob}; + use insta::{assert_debug_snapshot, glob, assert_display_snapshot}; #[test] fn test_parse_assignment() { @@ -3006,37 +3127,6 @@ mod tests { } } - #[test] - fn test_parse_import_statement() { - for test_case in &[ - "import a", - "import a as b", - "import a.b", - "import a.b as c", - "import a.b.c", - "from a import b", - "from a import b as c", - "from a.b import c", - "from a.b import c as d", - "from ...a import b", - "from ....a import b", - "from .....a import b", - "from ......a import b", - "from .......a import b", - "from ...", - ] { - let mut parser = Parser::new(test_case.to_string(), String::from("")); - let program = parser.parse(); - - insta::with_settings!({ - description => test_case.to_string(), // the template source code - omit_expression => true // do not include the default expression - }, { - assert_debug_snapshot!(program); - }); - } - } - #[test] fn test_parse_bool_op() { for test_case in &["a or b", "a and b", "a or b or c", "a and b or c"] { @@ -3624,12 +3714,12 @@ class a: pass", assert_debug_snapshot!(program); }); - if !parser.get_errors().is_empty() { + if !parser.errors.is_empty() { insta::with_settings!({ description => test_case, omit_expression => true }, { - assert_debug_snapshot!(parser.get_errors()); + assert_debug_snapshot!(parser.errors); }); } }); @@ -3649,12 +3739,12 @@ class a: pass", }, { assert_debug_snapshot!(program); }); - if !parser.get_errors().is_empty() { + if !parser.errors.is_empty() { insta::with_settings!({ description => test_case, omit_expression => true }, { - assert_debug_snapshot!(parser.get_errors()); + assert_debug_snapshot!(parser.errors); }); } } diff --git a/parser/src/parser/snapshots/enderpy_python_parser__parser__parser__tests__complete@separate_statements.py-2.snap b/parser/src/parser/snapshots/enderpy_python_parser__parser__parser__tests__complete@separate_statements.py-2.snap index 6f98ce5c..251512a7 100644 --- a/parser/src/parser/snapshots/enderpy_python_parser__parser__parser__tests__complete@separate_statements.py-2.snap +++ b/parser/src/parser/snapshots/enderpy_python_parser__parser__parser__tests__complete@separate_statements.py-2.snap @@ -4,6 +4,26 @@ description: "# Test case to check that we return correct error when two python input_file: parser/test_data/inputs/separate_statements.py --- [ - "Statement must be seperated with new line or semicolon but found Token { kind: Identifier, value: Str(\"y\"), start: 166, end: 167 }", - "Statement must be seperated with new line or semicolon but found Token { kind: Identifier, value: Str(\"b\"), start: 197, end: 198 }", + InvalidSyntax { + path: "", + msg: "Statement does not end in new line or semicolon", + line: 8, + input: " x = 1", + advice: "Split the statements into two seperate lines or add a semicolon", + span: ( + 160, + 165, + ), + }, + InvalidSyntax { + path: "", + msg: "Statement does not end in new line or semicolon", + line: 11, + input: "a = 1", + advice: "Split the statements into two seperate lines or add a semicolon", + span: ( + 190, + 195, + ), + }, ] diff --git a/parser/src/parser/snapshots/enderpy_python_parser__parser__parser__tests__one_liners@from_import.py-2.snap b/parser/src/parser/snapshots/enderpy_python_parser__parser__parser__tests__one_liners@from_import.py-2.snap new file mode 100644 index 00000000..4bc0dc48 --- /dev/null +++ b/parser/src/parser/snapshots/enderpy_python_parser__parser__parser__tests__one_liners@from_import.py-2.snap @@ -0,0 +1,18 @@ +--- +source: parser/src/parser/parser.rs +description: "import a\nimport a as b\nimport a.b\nimport a.b as c\nimport a.b.c\nfrom a import b\nfrom a import b as c\nfrom a.b import c\nfrom a.b import c as d\nfrom ...a import b\nfrom ....a import b\nfrom .....a import b\nfrom ......a import b\nfrom .......a import b\nfrom ...\n" +input_file: parser/test_data/inputs/one_liners/from_import.py +--- +[ + InvalidSyntax { + path: "", + msg: "Expected one of \"Identifier, Mul, \" but found Eof", + line: 16, + input: "", + advice: "Use * for importing everthing or use () to specify names to import or specify the name you want to import", + span: ( + 246, + 255, + ), + }, +] diff --git a/parser/src/parser/snapshots/enderpy_python_parser__parser__parser__tests__one_liners@from_import.py.snap b/parser/src/parser/snapshots/enderpy_python_parser__parser__parser__tests__one_liners@from_import.py.snap new file mode 100644 index 00000000..6408affd --- /dev/null +++ b/parser/src/parser/snapshots/enderpy_python_parser__parser__parser__tests__one_liners@from_import.py.snap @@ -0,0 +1,291 @@ +--- +source: parser/src/parser/parser.rs +description: "import a\nimport a as b\nimport a.b\nimport a.b as c\nimport a.b.c\nfrom a import b\nfrom a import b as c\nfrom a.b import c\nfrom a.b import c as d\nfrom ...a import b\nfrom ....a import b\nfrom .....a import b\nfrom ......a import b\nfrom .......a import b\nfrom ...\n" +input_file: parser/test_data/inputs/one_liners/from_import.py +--- +Module { + node: Node { + start: 0, + end: 255, + }, + body: [ + Import( + Import { + node: Node { + start: 0, + end: 8, + }, + names: [ + Alias { + node: Node { + start: 7, + end: 8, + }, + name: "a", + asname: None, + }, + ], + }, + ), + Import( + Import { + node: Node { + start: 9, + end: 22, + }, + names: [ + Alias { + node: Node { + start: 16, + end: 22, + }, + name: "a", + asname: Some( + "b", + ), + }, + ], + }, + ), + Import( + Import { + node: Node { + start: 23, + end: 33, + }, + names: [ + Alias { + node: Node { + start: 30, + end: 33, + }, + name: "a.b", + asname: None, + }, + ], + }, + ), + Import( + Import { + node: Node { + start: 34, + end: 49, + }, + names: [ + Alias { + node: Node { + start: 41, + end: 49, + }, + name: "a.b", + asname: Some( + "c", + ), + }, + ], + }, + ), + Import( + Import { + node: Node { + start: 50, + end: 62, + }, + names: [ + Alias { + node: Node { + start: 57, + end: 62, + }, + name: "a.b.c", + asname: None, + }, + ], + }, + ), + ImportFrom( + ImportFrom { + node: Node { + start: 63, + end: 78, + }, + module: "a", + names: [ + Alias { + node: Node { + start: 77, + end: 78, + }, + name: "b", + asname: None, + }, + ], + level: 0, + }, + ), + ImportFrom( + ImportFrom { + node: Node { + start: 79, + end: 99, + }, + module: "a", + names: [ + Alias { + node: Node { + start: 93, + end: 99, + }, + name: "b", + asname: Some( + "c", + ), + }, + ], + level: 0, + }, + ), + ImportFrom( + ImportFrom { + node: Node { + start: 100, + end: 117, + }, + module: "a.b", + names: [ + Alias { + node: Node { + start: 116, + end: 117, + }, + name: "c", + asname: None, + }, + ], + level: 0, + }, + ), + ImportFrom( + ImportFrom { + node: Node { + start: 118, + end: 140, + }, + module: "a.b", + names: [ + Alias { + node: Node { + start: 134, + end: 140, + }, + name: "c", + asname: Some( + "d", + ), + }, + ], + level: 0, + }, + ), + ImportFrom( + ImportFrom { + node: Node { + start: 141, + end: 159, + }, + module: "a", + names: [ + Alias { + node: Node { + start: 158, + end: 159, + }, + name: "b", + asname: None, + }, + ], + level: 3, + }, + ), + ImportFrom( + ImportFrom { + node: Node { + start: 160, + end: 179, + }, + module: "a", + names: [ + Alias { + node: Node { + start: 178, + end: 179, + }, + name: "b", + asname: None, + }, + ], + level: 4, + }, + ), + ImportFrom( + ImportFrom { + node: Node { + start: 180, + end: 200, + }, + module: "a", + names: [ + Alias { + node: Node { + start: 199, + end: 200, + }, + name: "b", + asname: None, + }, + ], + level: 5, + }, + ), + ImportFrom( + ImportFrom { + node: Node { + start: 201, + end: 222, + }, + module: "a", + names: [ + Alias { + node: Node { + start: 221, + end: 222, + }, + name: "b", + asname: None, + }, + ], + level: 6, + }, + ), + ImportFrom( + ImportFrom { + node: Node { + start: 223, + end: 245, + }, + module: "a", + names: [ + Alias { + node: Node { + start: 244, + end: 245, + }, + name: "b", + asname: None, + }, + ], + level: 7, + }, + ), + ], +} diff --git a/parser/src/parser/snapshots/enderpy_python_parser__parser__parser__tests__parse_import_statement-15.snap b/parser/src/parser/snapshots/enderpy_python_parser__parser__parser__tests__parse_import_statement-15.snap index 1c06e551..be92f832 100644 --- a/parser/src/parser/snapshots/enderpy_python_parser__parser__parser__tests__parse_import_statement-15.snap +++ b/parser/src/parser/snapshots/enderpy_python_parser__parser__parser__tests__parse_import_statement-15.snap @@ -7,17 +7,5 @@ Module { start: 0, end: 8, }, - body: [ - ImportFrom( - ImportFrom { - node: Node { - start: 0, - end: 8, - }, - module: "None", - names: [], - level: 3, - }, - ), - ], + body: [], } diff --git a/parser/src/parser/string.rs b/parser/src/parser/string.rs index ae256561..8bcb811c 100644 --- a/parser/src/parser/string.rs +++ b/parser/src/parser/string.rs @@ -1,10 +1,11 @@ use miette::Result; +use crate::parser::ast::Expression; use crate::parser::ast::JoinedStr; -use crate::parser::{ast::Expression, diagnostics}; use crate::token::Kind; use super::ast::Node; +use super::error::ParsingError; pub fn extract_string_inside(val: String) -> String { if let Some(val) = val.strip_prefix("\"\"\"") { val.strip_suffix("\"\"\"") @@ -41,7 +42,7 @@ pub fn is_string(kind: &Kind) -> bool { } } -pub fn concat_string_exprs(lhs: Expression, rhs: Expression) -> Result { +pub fn concat_string_exprs(lhs: Expression, rhs: Expression) -> Result { use crate::parser::ast::{Constant, ConstantValue}; match (lhs, rhs) { (Expression::Constant(lhs), Expression::Constant(rhs)) => { @@ -64,18 +65,24 @@ pub fn concat_string_exprs(lhs: Expression, rhs: Expression) -> Result { - return Err(diagnostics::InvalidSyntax( - "Cannot concat bytes and string".to_string(), - node, - ) - .into()) + return Err(ParsingError::InvalidSyntax { + path: "test".into(), + msg: "Cannot concat bytes and string".into(), + line: 0, + input: "test".into(), + advice: "test".into(), + span: (0, 0), + }) } (_, ConstantValue::Bytes(_rhs)) => { - return Err(diagnostics::InvalidSyntax( - "Cannot concat string and bytes".to_string(), - node, - ) - .into()) + return Err(ParsingError::InvalidSyntax { + path: "test".into(), + msg: "Can only concat bytes with other bytes".into(), + line: 0, + input: "test".into(), + advice: "test".into(), + span: (0, 0), + }); } _ => panic!("Cannot concat string"), }; @@ -102,11 +109,14 @@ pub fn concat_string_exprs(lhs: Expression, rhs: Expression) -> Result { - return Err(diagnostics::InvalidSyntax( - "Cannot concat string and bytes".to_string(), - const_rhs.node, - ) - .into()); + return Err(ParsingError::InvalidSyntax { + path: "test".into(), + msg: "Cannot concat string and bytes".into(), + line: 0, + input: "test".into(), + advice: "test".into(), + span: (0, 0), + }); } _ => panic!("Cannot concat string"), } @@ -125,11 +135,14 @@ pub fn concat_string_exprs(lhs: Expression, rhs: Expression) -> Result { - return Err(diagnostics::InvalidSyntax( - "Cannot concat string and bytes".to_string(), - const_lhs.node, - ) - .into()); + return Err(ParsingError::InvalidSyntax { + path: "test".into(), + msg: "Cannot concat string and bytes".into(), + line: 0, + input: "test".into(), + advice: "test".into(), + span: (0, 0), + }); } _ => panic!("Cannot concat string"), }; diff --git a/parser/test_data/inputs/one_liners/from_import.py b/parser/test_data/inputs/one_liners/from_import.py new file mode 100644 index 00000000..44b6f451 --- /dev/null +++ b/parser/test_data/inputs/one_liners/from_import.py @@ -0,0 +1,15 @@ +import a +import a as b +import a.b +import a.b as c +import a.b.c +from a import b +from a import b as c +from a.b import c +from a.b import c as d +from ...a import b +from ....a import b +from .....a import b +from ......a import b +from .......a import b +from ... diff --git a/typechecker/src/settings.rs b/typechecker/src/settings.rs index f804f7fb..fb40b314 100644 --- a/typechecker/src/settings.rs +++ b/typechecker/src/settings.rs @@ -29,7 +29,7 @@ impl Settings { pub fn test_settings() -> Self { Settings { - debug: true, + debug: false, root: PathBuf::from(""), import_discovery: ImportDiscovery { python_executable: None,