From 822399e5abde028d574d90baba3ef4bc2d7e819e Mon Sep 17 00:00:00 2001 From: Shaygan Hooshyari Date: Thu, 17 Oct 2024 17:43:28 +0200 Subject: [PATCH] Add string interner (#269) * Add string interner * Switch to fxhash * Use interning for function names * Make interner global * Buffer interner * Store symbol table in Arc * Remove redundant stuff * Cache imports --- Cargo.toml | 1 + enderpy/src/main.rs | 2 +- parser/Cargo.toml | 1 + parser/src/ast.rs | 719 +++++++++++++++++- parser/src/intern.rs | 71 ++ parser/src/lib.rs | 1 + parser/src/parser/parser.rs | 74 +- ..._parser__parser__parser__tests__class.snap | 100 +-- ...rser__parser__parser__tests__comments.snap | 4 +- ...__parser__tests__ellipsis_statement-2.snap | 2 +- ...er__parser__tests__ellipsis_statement.snap | 2 +- ...__parser__parser__tests__function_def.snap | 52 +- ...ser__parser__parser__tests__functions.snap | 6 +- ...rser__parser__parser__tests__newlines.snap | 7 +- ...r__parser__tests__separate_statements.snap | 4 +- typechecker/src/build.rs | 31 +- typechecker/src/checker.rs | 83 +- typechecker/src/file.rs | 12 +- typechecker/src/semantic_analyzer.rs | 56 +- typechecker/src/type_evaluator.rs | 117 ++- 20 files changed, 1028 insertions(+), 317 deletions(-) create mode 100644 parser/src/intern.rs diff --git a/Cargo.toml b/Cargo.toml index de309e77..c073e9e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ reqwest = { version = "0.12.7", features = ["blocking"] } dashmap = "6.0" tracing = "0.1.40" tracing-subscriber = {version = "0.3.18", features = ["env-filter"]} +fxhash = "0.2.1" [profile.dev.package.insta] opt-level = 3 diff --git a/enderpy/src/main.rs b/enderpy/src/main.rs index c4211183..11fe7b3c 100644 --- a/enderpy/src/main.rs +++ b/enderpy/src/main.rs @@ -33,7 +33,7 @@ fn symbols(path: &Path) -> Result<()> { manager.build(root); manager.build_one(root, path); - println!("{}", manager.get_symbol_table(path)); + println!("{}", manager.get_symbol_table_by_path(path)); Ok(()) } diff --git a/parser/Cargo.toml b/parser/Cargo.toml index 8c5ba0c8..dbce2a68 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -20,6 +20,7 @@ unicode-id-start = "1.2" miette.workspace = true thiserror = "1.0.40" is-macro = "0.3.6" +fxhash.workspace = true [dev-dependencies] codspeed-criterion-compat.workspace = true diff --git a/parser/src/ast.rs b/parser/src/ast.rs index 5d5eb6af..9feb1129 100644 --- a/parser/src/ast.rs +++ b/parser/src/ast.rs @@ -5,6 +5,8 @@ use std::sync::Arc; use miette::{SourceOffset, SourceSpan}; +use crate::intern::StrId; + #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] // #[serde(tag = "type")] pub struct Node { /// Start offset in source @@ -921,7 +923,7 @@ pub struct ExceptHandler { #[derive(Debug, Clone)] pub struct FunctionDef { pub node: Node, - pub name: String, + pub name: StrId, pub args: Arguments, pub body: Vec, pub decorator_list: Vec, @@ -934,7 +936,7 @@ pub struct FunctionDef { #[derive(Debug, Clone)] pub struct AsyncFunctionDef { pub node: Node, - pub name: String, + pub name: StrId, pub args: Arguments, pub body: Vec, pub decorator_list: Vec, @@ -944,10 +946,32 @@ pub struct AsyncFunctionDef { } impl AsyncFunctionDef { + #[allow(clippy::too_many_arguments)] + pub fn new( + node: Node, + name: StrId, + args: Arguments, + body: Vec, + decorator_list: Vec, + returns: Option, + type_comment: Option<&str>, + type_params: Vec, + ) -> Self { + Self { + node, + name, + args, + body, + decorator_list, + returns, + type_comment: type_comment.map(|s| s.to_owned()), + type_params, + } + } pub fn to_function_def(&self) -> FunctionDef { FunctionDef { node: self.node, - name: self.name.clone(), + name: self.name, args: self.args.clone(), body: self.body.clone(), decorator_list: self.decorator_list.clone(), @@ -962,7 +986,7 @@ impl AsyncFunctionDef { #[derive(Debug, Clone)] pub struct ClassDef { pub node: Node, - pub name: TextRange, + pub name: StrId, pub bases: Vec, pub keywords: Vec, pub body: Vec, @@ -970,12 +994,6 @@ pub struct ClassDef { pub type_params: Vec, } -impl ClassDef { - pub fn name<'a>(&self, source: &'a str) -> &'a str { - &source[(self.name.start) as usize..(self.name.end) as usize] - } -} - // https://docs.python.org/3/library/ast.html#ast.Match #[derive(Debug, Clone)] pub struct Match { @@ -1094,11 +1112,678 @@ pub struct TypeAlias { pub value: Expression, } -#[cfg(target_pointer_width = "64")] -#[test] -fn no_bloat_enum_sizes() { - use crate::ast::*; - use std::mem::size_of; - assert_eq!(size_of::(), 16); - assert_eq!(size_of::(), 16); +impl Module { + pub fn new(node: Node, body: Vec) -> Self { + Self { node, body } + } +} +impl Assign { + pub fn new(node: Node, targets: Vec, value: Expression) -> Self { + Self { + node, + targets, + value, + } + } +} +impl AnnAssign { + pub fn new( + node: Node, + target: Expression, + annotation: Expression, + value: Option, + simple: bool, + ) -> Self { + Self { + node, + target, + annotation, + value, + simple, + } + } +} +impl AugAssign { + pub fn new(node: Node, target: Expression, op: AugAssignOp, value: Expression) -> Self { + Self { + node, + target, + op, + value, + } + } +} +impl Assert { + pub fn new(node: Node, test: Expression, msg: Option) -> Self { + Self { node, test, msg } + } +} +impl Pass { + pub fn new(node: Node) -> Self { + Self { node } + } +} +impl Delete { + pub fn new(node: Node, targets: Vec) -> Self { + Self { node, targets } + } +} +impl Return { + pub fn new(node: Node, value: Option) -> Self { + Self { node, value } + } +} +impl Raise { + pub fn new(node: Node, exc: Option, cause: Option) -> Self { + Self { node, exc, cause } + } +} +impl Break { + pub fn new(node: Node) -> Self { + Self { node } + } +} +impl Continue { + pub fn new(node: Node) -> Self { + Self { node } + } +} +impl Import { + pub fn new(node: Node, names: Vec) -> Self { + Self { node, names } + } +} +impl Alias { + pub fn new(node: Node, name: &str, asname: Option<&str>) -> Self { + Self { + node, + name: name.to_owned(), + asname: asname.map(|s| s.to_owned()), + } + } +} +impl ImportFrom { + pub fn new(node: Node, module: &str, names: Vec, level: usize) -> Self { + Self { + node, + module: module.to_owned(), + names, + level, + } + } +} +impl Global { + pub fn new(node: Node, names: Vec) -> Self { + Self { node, names } + } +} +impl Nonlocal { + pub fn new(node: Node, names: Vec) -> Self { + Self { node, names } + } +} +impl Name { + pub fn new(node: Node, id: &str, parenthesized: bool) -> Self { + Self { + node, + id: id.to_owned(), + parenthesized, + } + } +} +impl Constant { + pub fn new(node: Node, value: ConstantValue) -> Self { + Self { node, value } + } +} +impl List { + pub fn new(node: Node, elements: Vec) -> Self { + Self { node, elements } + } +} +impl Tuple { + pub fn new(node: Node, elements: Vec) -> Self { + Self { node, elements } + } +} +impl Dict { + pub fn new(node: Node, keys: Vec, values: Vec) -> Self { + Self { node, keys, values } + } +} +impl Set { + pub fn new(node: Node, elements: Vec) -> Self { + Self { node, elements } + } +} +impl BoolOperation { + pub fn new(node: Node, op: BooleanOperator, values: Vec) -> Self { + Self { node, op, values } + } +} +impl UnaryOperation { + pub fn new(node: Node, op: UnaryOperator, operand: Expression) -> Self { + Self { node, op, operand } + } +} +impl BinOp { + pub fn new(node: Node, op: BinaryOperator, left: Expression, right: Expression) -> Self { + Self { + node, + op, + left, + right, + } + } +} +impl NamedExpression { + pub fn new(node: Node, target: Expression, value: Expression) -> Self { + Self { + node, + target, + value, + } + } +} +impl Yield { + pub fn new(node: Node, value: Option) -> Self { + Self { node, value } + } +} +impl YieldFrom { + pub fn new(node: Node, value: Expression) -> Self { + Self { node, value } + } +} +impl Starred { + pub fn new(node: Node, value: Expression) -> Self { + Self { node, value } + } +} +impl Generator { + pub fn new(node: Node, element: Expression, generators: Vec) -> Self { + Self { + node, + element, + generators, + } + } +} +impl ListComp { + pub fn new(node: Node, element: Expression, generators: Vec) -> Self { + Self { + node, + element, + generators, + } + } +} +impl SetComp { + pub fn new(node: Node, element: Expression, generators: Vec) -> Self { + Self { + node, + element, + generators, + } + } +} +impl DictComp { + pub fn new( + node: Node, + key: Expression, + value: Expression, + generators: Vec, + ) -> Self { + Self { + node, + key, + value, + generators, + } + } +} +impl Comprehension { + pub fn new( + node: Node, + target: Expression, + iter: Expression, + ifs: Vec, + is_async: bool, + ) -> Self { + Self { + node, + target, + iter, + ifs, + is_async, + } + } +} +impl Attribute { + pub fn new(node: Node, value: Expression, attr: &str) -> Self { + Self { + node, + value, + attr: attr.to_owned(), + } + } +} +impl Subscript { + pub fn new(node: Node, value: Expression, slice: Expression) -> Self { + Self { node, value, slice } + } +} +impl Slice { + pub fn new( + node: Node, + lower: Option, + upper: Option, + step: Option, + ) -> Self { + Self { + node, + lower, + upper, + step, + } + } +} +impl Call { + pub fn new( + node: Node, + func: Expression, + args: Vec, + keywords: Vec, + starargs: Option, + kwargs: Option, + ) -> Self { + Self { + node, + func, + args, + keywords, + starargs, + kwargs, + } + } +} +impl Keyword { + pub fn new(node: Node, arg: Option<&str>, value: Expression) -> Self { + Self { + node, + arg: arg.map(|s| s.to_owned()), + value, + } + } +} +impl Await { + pub fn new(node: Node, value: Expression) -> Self { + Self { node, value } + } +} +impl Compare { + pub fn new( + node: Node, + left: Expression, + ops: Vec, + comparators: Vec, + ) -> Self { + Self { + node, + left, + ops, + comparators, + } + } +} +impl Lambda { + pub fn new(node: Node, args: Arguments, body: Expression) -> Self { + Self { node, args, body } + } +} +impl Arguments { + #[allow(clippy::too_many_arguments)] + pub fn new( + node: Node, + posonlyargs: Vec, + args: Vec, + vararg: Option, + kwonlyargs: Vec, + kw_defaults: Vec>, + kwarg: Option, + defaults: Vec, + ) -> Self { + Self { + node, + posonlyargs, + args, + vararg, + kwonlyargs, + kw_defaults, + kwarg, + defaults, + } + } +} +impl Arg { + pub fn new(node: Node, arg: &str, annotation: Option) -> Self { + Self { + node, + arg: arg.to_owned(), + annotation, + } + } +} +impl IfExp { + pub fn new(node: Node, test: Expression, body: Expression, orelse: Expression) -> Self { + Self { + node, + test, + body, + orelse, + } + } +} +impl FormattedValue { + pub fn new( + node: Node, + value: Expression, + conversion: i32, + format_spec: Option, + ) -> Self { + Self { + node, + value, + conversion, + format_spec, + } + } +} +impl JoinedStr { + pub fn new(node: Node, values: Vec) -> Self { + Self { node, values } + } +} +impl If { + pub fn new(node: Node, test: Expression, body: Vec, orelse: Vec) -> Self { + Self { + node, + test, + body, + orelse, + } + } +} +impl While { + pub fn new(node: Node, test: Expression, body: Vec, orelse: Vec) -> Self { + Self { + node, + test, + body, + orelse, + } + } +} +impl For { + pub fn new( + node: Node, + target: Expression, + iter: Expression, + body: Vec, + orelse: Vec, + ) -> Self { + Self { + node, + target, + iter, + body, + orelse, + } + } +} +impl AsyncFor { + pub fn new( + node: Node, + target: Expression, + iter: Expression, + body: Vec, + orelse: Vec, + ) -> Self { + Self { + node, + target, + iter, + body, + orelse, + } + } +} +impl With { + pub fn new(node: Node, items: Vec, body: Vec) -> Self { + Self { node, items, body } + } +} +impl AsyncWith { + pub fn new(node: Node, items: Vec, body: Vec) -> Self { + Self { node, items, body } + } +} +impl WithItem { + pub fn new(node: Node, context_expr: Expression, optional_vars: Option) -> Self { + Self { + node, + context_expr, + optional_vars, + } + } +} +impl Try { + pub fn new( + node: Node, + body: Vec, + handlers: Vec, + orelse: Vec, + finalbody: Vec, + ) -> Self { + Self { + node, + body, + handlers, + orelse, + finalbody, + } + } +} +impl TryStar { + pub fn new( + node: Node, + body: Vec, + handlers: Vec, + orelse: Vec, + finalbody: Vec, + ) -> Self { + Self { + node, + body, + handlers, + orelse, + finalbody, + } + } +} +impl ExceptHandler { + pub fn new( + node: Node, + typ: Option, + name: Option<&str>, + body: Vec, + ) -> Self { + Self { + node, + typ, + name: name.map(|s| s.to_owned()), + body, + } + } +} +impl FunctionDef { + #[allow(clippy::too_many_arguments)] + pub fn new( + node: Node, + name: StrId, + args: Arguments, + body: Vec, + decorator_list: Vec, + returns: Option, + type_comment: Option<&str>, + type_params: Vec, + ) -> Self { + Self { + node, + name, + args, + body, + decorator_list, + returns, + type_comment: type_comment.map(|s| s.to_owned()), + type_params, + } + } +} +impl ClassDef { + pub fn new( + node: Node, + name: StrId, + bases: Vec, + keywords: Vec, + body: Vec, + decorator_list: Vec, + type_params: Vec, + ) -> Self { + Self { + node, + name, + bases, + keywords, + body, + decorator_list, + type_params, + } + } +} +impl Match { + pub fn new(node: Node, subject: Expression, cases: Vec) -> Self { + Self { + node, + subject, + cases, + } + } +} +impl MatchCase { + pub fn new( + node: Node, + pattern: MatchPattern, + guard: Option, + body: Vec, + ) -> Self { + Self { + node, + pattern, + guard, + body, + } + } +} +impl MatchValue { + pub fn new(node: Node, value: Expression) -> Self { + Self { node, value } + } +} +impl MatchAs { + pub fn new(node: Node, name: Option<&str>, pattern: Option) -> Self { + Self { + node, + name: name.map(|s| s.to_owned()), + pattern, + } + } +} +impl MatchMapping { + pub fn new( + node: Node, + keys: Vec, + patterns: Vec, + rest: Option<&str>, + ) -> Self { + Self { + node, + keys, + patterns, + rest: rest.map(|s| s.to_owned()), + } + } +} +impl MatchClass { + pub fn new( + node: Node, + cls: Expression, + patterns: Vec, + kwd_attrs: Vec, + kwd_patterns: Vec, + ) -> Self { + Self { + node, + cls, + patterns, + kwd_attrs, + kwd_patterns, + } + } +} +impl TypeVar { + pub fn new(node: Node, name: &str, bound: Option) -> Self { + Self { + node, + name: name.to_owned(), + bound, + } + } +} +impl ParamSpec { + pub fn new(node: Node, name: &str) -> Self { + Self { + node, + name: name.to_owned(), + } + } +} +impl TypeVarTuple { + pub fn new(node: Node, name: &str) -> Self { + Self { + node, + name: name.to_owned(), + } + } +} +impl TypeAlias { + pub fn new(node: Node, name: &str, type_params: Vec, value: Expression) -> Self { + Self { + node, + name: name.to_owned(), + type_params, + value, + } + } +} + +mod tests { + #[cfg(target_pointer_width = "64")] + #[test] + fn no_bloat_enum_sizes() { + use crate::ast::*; + use std::mem::size_of; + assert_eq!(size_of::(), 16); + assert_eq!(size_of::(), 16); + } } diff --git a/parser/src/intern.rs b/parser/src/intern.rs new file mode 100644 index 00000000..68a8ddb1 --- /dev/null +++ b/parser/src/intern.rs @@ -0,0 +1,71 @@ +use std::fmt::{Debug, Display}; + +use fxhash::FxHashMap; +use serde::Serialize; +use std::mem; + +use crate::parser::parser::intern_lookup; + +#[derive(Clone, Copy, Eq, PartialEq, Hash, Serialize)] +pub struct StrId(pub u32); + +impl Debug for StrId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", intern_lookup(*self)) + } +} + +impl Display for StrId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +pub struct Interner { + map: FxHashMap<&'static str, StrId>, + vec: Vec<&'static str>, + buf: String, + full: Vec, +} +impl Interner { + pub fn with_capacity(cap: usize) -> Interner { + let cap = cap.next_power_of_two(); + Interner { + map: FxHashMap::default(), + vec: Vec::new(), + buf: String::with_capacity(cap), + full: Vec::new(), + } + } + pub fn intern(&mut self, name: &str) -> StrId { + if let Some(&id) = self.map.get(name) { + return id; + } + let name = unsafe { self.alloc(name) }; + let id = self.map.len() as u32; + self.map.insert(name, StrId(id)); + self.vec.push(name); + debug_assert!(self.lookup(StrId(id)) == name); + debug_assert!(self.intern(name) == StrId(id)); + + StrId(id) + } + pub fn lookup(&self, id: StrId) -> &'static str { + self.vec[id.0 as usize] + } + unsafe fn alloc(&mut self, name: &str) -> &'static str { + let cap = self.buf.capacity(); + if cap < self.buf.len() + name.len() { + let new_cap = (cap.max(name.len()) + 1).next_power_of_two(); + let new_buf = String::with_capacity(new_cap); + let old_buf = mem::replace(&mut self.buf, new_buf); + self.full.push(old_buf); + } + let interned = { + let start = self.buf.len(); + self.buf.push_str(name); + &self.buf[start..] + }; + &*(interned as *const str) + } +} diff --git a/parser/src/lib.rs b/parser/src/lib.rs index be73b2e0..b3a13203 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -5,6 +5,7 @@ pub mod parser; pub use crate::lexer::Lexer; pub mod error; +pub mod intern; pub mod token; pub fn get_row_col_position(start: u32, end: u32, line_starts: &[u32]) -> (u32, u32, u32, u32) { diff --git a/parser/src/parser/parser.rs b/parser/src/parser/parser.rs index 771d4d6b..808ad452 100644 --- a/parser/src/parser/parser.rs +++ b/parser/src/parser/parser.rs @@ -4,7 +4,10 @@ use core::panic; /// For example star expressions are defined slightly differently in python grammar and references. /// So there might be duplicates of both. Try to migrate the wrong names to how they are called in: /// https://docs.python.org/3/reference/grammar.html -use std::{sync::Arc, vec}; +use std::{ + sync::{Arc, OnceLock, RwLock}, + vec, +}; use miette::Result; @@ -12,12 +15,26 @@ use super::{concat_string_exprs, is_at_compound_statement, is_iterable, map_unar use crate::{ error::ParsingError, get_row_col_position, + intern::{Interner, StrId}, lexer::Lexer, parser::{ast::*, extract_string_inside}, token::{Kind, Token}, }; +static INTERNER: OnceLock> = OnceLock::new(); + +pub fn intern_lookup(s: StrId) -> &'static str { + let val = INTERNER.get_or_init(|| RwLock::new(Interner::with_capacity(100))); + let v2 = val.read().unwrap(); + v2.lookup(s) +} + +pub fn intern(s: &str) -> StrId { + let val = INTERNER.get_or_init(|| RwLock::new(Interner::with_capacity(100))); + let mut v2 = val.write().unwrap(); + v2.intern(s) +} -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct Parser<'a> { pub identifiers_start_offset: Vec<(u32, u32, String)>, pub source: &'a str, @@ -86,7 +103,7 @@ impl<'a> Parser<'a> { // Remove the EOF offset node.end.saturating_sub(1); - Ok(Module { node, body }) + Ok(Module::new(node, body)) } fn start_node(&self) -> Node { @@ -578,7 +595,7 @@ impl<'a> Parser<'a> { decorators: Vec, is_async: bool, ) -> Result { - let name = self.cur_token().to_string(self.source); + let name = intern(self.cur_token().as_str(self.source)); self.expect(Kind::Identifier)?; let type_params = if self.at(Kind::LeftBrace) { self.parse_type_parameters()? @@ -598,28 +615,30 @@ impl<'a> Parser<'a> { self.expect(Kind::Colon)?; let body = self.parse_suite()?; if is_async { - Ok(Statement::AsyncFunctionDef(Arc::new(AsyncFunctionDef { - node: self.finish_node_chomped(node), - name, - args, - body, - decorator_list: decorators, - returns: return_type, - type_comment: None, - type_params, - }))) + Ok(Statement::AsyncFunctionDef(Arc::new( + AsyncFunctionDef::new( + self.finish_node_chomped(node), + name, + args, + body, + decorators, + return_type, + None, + type_params, + ), + ))) } else { - Ok(Statement::FunctionDef(Arc::new(FunctionDef { - node: self.finish_node_chomped(node), + Ok(Statement::FunctionDef(Arc::new(FunctionDef::new( + self.finish_node_chomped(node), name, args, body, - decorator_list: decorators, - returns: return_type, + decorators, + return_type, // TODO: type comment - type_comment: None, + None, type_params, - }))) + )))) } } @@ -656,10 +675,7 @@ impl<'a> Parser<'a> { self.start_node() }; self.expect(Kind::Class)?; - let name_range = TextRange { - start: self.cur_token().start, - end: self.cur_token().end, - }; + let name = intern(self.cur_token().as_str(self.source)); self.expect(Kind::Identifier)?; let type_params = if self.at(Kind::LeftBrace) { self.parse_type_parameters()? @@ -676,15 +692,15 @@ impl<'a> Parser<'a> { self.expect(Kind::Colon)?; let body = self.parse_suite()?; - Ok(Statement::ClassDef(Arc::new(ClassDef { - node: self.finish_node(node), - name: name_range, + Ok(Statement::ClassDef(Arc::new(ClassDef::new( + self.finish_node(node), + name, bases, keywords, body, - decorator_list: decorators, + decorators, type_params, - }))) + )))) } // https://peps.python.org/pep-0622/#appendix-a-full-grammar diff --git a/parser/test_data/output/enderpy_python_parser__parser__parser__tests__class.snap b/parser/test_data/output/enderpy_python_parser__parser__parser__tests__class.snap index 3efd51ad..78b874a6 100644 --- a/parser/test_data/output/enderpy_python_parser__parser__parser__tests__class.snap +++ b/parser/test_data/output/enderpy_python_parser__parser__parser__tests__class.snap @@ -14,10 +14,7 @@ Module { start: 0, end: 13, }, - name: TextRange { - start: 6, - end: 7, - }, + name: a, bases: [], keywords: [], body: [ @@ -40,10 +37,7 @@ Module { start: 15, end: 36, }, - name: TextRange { - start: 21, - end: 22, - }, + name: a, bases: [], keywords: [], body: [ @@ -66,10 +60,7 @@ Module { start: 36, end: 55, }, - name: TextRange { - start: 42, - end: 43, - }, + name: a, bases: [ Name( Name { @@ -111,10 +102,7 @@ Module { start: 57, end: 82, }, - name: TextRange { - start: 63, - end: 64, - }, + name: a, bases: [ Name( Name { @@ -181,10 +169,7 @@ Module { start: 84, end: 106, }, - name: TextRange { - start: 90, - end: 91, - }, + name: a, bases: [ Name( Name { @@ -235,10 +220,7 @@ Module { start: 108, end: 128, }, - name: TextRange { - start: 121, - end: 122, - }, + name: a, bases: [], keywords: [], body: [ @@ -271,10 +253,7 @@ Module { start: 130, end: 146, }, - name: TextRange { - start: 136, - end: 137, - }, + name: a, bases: [], keywords: [], body: [ @@ -308,10 +287,7 @@ Module { start: 148, end: 167, }, - name: TextRange { - start: 154, - end: 155, - }, + name: a, bases: [], keywords: [], body: [ @@ -355,10 +331,7 @@ Module { start: 169, end: 188, }, - name: TextRange { - start: 175, - end: 176, - }, + name: a, bases: [], keywords: [], body: [ @@ -402,10 +375,7 @@ Module { start: 190, end: 215, }, - name: TextRange { - start: 196, - end: 197, - }, + name: a, bases: [], keywords: [], body: [ @@ -469,10 +439,7 @@ Module { start: 217, end: 234, }, - name: TextRange { - start: 223, - end: 224, - }, + name: a, bases: [], keywords: [], body: [ @@ -505,10 +472,7 @@ Module { start: 236, end: 256, }, - name: TextRange { - start: 242, - end: 243, - }, + name: a, bases: [], keywords: [], body: [ @@ -551,10 +515,7 @@ Module { start: 258, end: 281, }, - name: TextRange { - start: 264, - end: 265, - }, + name: a, bases: [], keywords: [], body: [ @@ -607,10 +568,7 @@ Module { start: 283, end: 312, }, - name: TextRange { - start: 289, - end: 290, - }, + name: a, bases: [], keywords: [], body: [ @@ -683,10 +641,7 @@ Module { start: 314, end: 332, }, - name: TextRange { - start: 320, - end: 321, - }, + name: a, bases: [], keywords: [], body: [ @@ -719,10 +674,7 @@ Module { start: 334, end: 355, }, - name: TextRange { - start: 340, - end: 341, - }, + name: a, bases: [], keywords: [], body: [ @@ -765,10 +717,7 @@ Module { start: 357, end: 381, }, - name: TextRange { - start: 363, - end: 364, - }, + name: a, bases: [], keywords: [], body: [ @@ -821,10 +770,7 @@ Module { start: 383, end: 413, }, - name: TextRange { - start: 389, - end: 390, - }, + name: a, bases: [], keywords: [], body: [ @@ -897,10 +843,7 @@ Module { start: 415, end: 440, }, - name: TextRange { - start: 421, - end: 422, - }, + name: a, bases: [], keywords: [], body: [ @@ -952,10 +895,7 @@ Module { start: 442, end: 470, }, - name: TextRange { - start: 448, - end: 449, - }, + name: a, bases: [], keywords: [], body: [ diff --git a/parser/test_data/output/enderpy_python_parser__parser__parser__tests__comments.snap b/parser/test_data/output/enderpy_python_parser__parser__parser__tests__comments.snap index 9306300a..4c230bdd 100644 --- a/parser/test_data/output/enderpy_python_parser__parser__parser__tests__comments.snap +++ b/parser/test_data/output/enderpy_python_parser__parser__parser__tests__comments.snap @@ -14,7 +14,7 @@ Module { start: 17, end: 29, }, - name: "a", + name: a, args: Arguments { node: Node { start: 23, @@ -53,7 +53,7 @@ Module { start: 35, end: 64, }, - name: "b", + name: b, args: Arguments { node: Node { start: 41, diff --git a/parser/test_data/output/enderpy_python_parser__parser__parser__tests__ellipsis_statement-2.snap b/parser/test_data/output/enderpy_python_parser__parser__parser__tests__ellipsis_statement-2.snap index a7e849dd..2006e04e 100644 --- a/parser/test_data/output/enderpy_python_parser__parser__parser__tests__ellipsis_statement-2.snap +++ b/parser/test_data/output/enderpy_python_parser__parser__parser__tests__ellipsis_statement-2.snap @@ -14,7 +14,7 @@ Module { start: 0, end: 16, }, - name: "a", + name: a, args: Arguments { node: Node { start: 6, diff --git a/parser/test_data/output/enderpy_python_parser__parser__parser__tests__ellipsis_statement.snap b/parser/test_data/output/enderpy_python_parser__parser__parser__tests__ellipsis_statement.snap index 10a7a1cb..98467525 100644 --- a/parser/test_data/output/enderpy_python_parser__parser__parser__tests__ellipsis_statement.snap +++ b/parser/test_data/output/enderpy_python_parser__parser__parser__tests__ellipsis_statement.snap @@ -14,7 +14,7 @@ Module { start: 0, end: 12, }, - name: "a", + name: a, args: Arguments { node: Node { start: 6, diff --git a/parser/test_data/output/enderpy_python_parser__parser__parser__tests__function_def.snap b/parser/test_data/output/enderpy_python_parser__parser__parser__tests__function_def.snap index bc5dc6a2..252c97ca 100644 --- a/parser/test_data/output/enderpy_python_parser__parser__parser__tests__function_def.snap +++ b/parser/test_data/output/enderpy_python_parser__parser__parser__tests__function_def.snap @@ -14,7 +14,7 @@ Module { start: 0, end: 13, }, - name: "a", + name: a, args: Arguments { node: Node { start: 6, @@ -50,7 +50,7 @@ Module { start: 15, end: 33, }, - name: "a", + name: a, args: Arguments { node: Node { start: 21, @@ -86,7 +86,7 @@ Module { start: 34, end: 54, }, - name: "a", + name: a, args: Arguments { node: Node { start: 40, @@ -147,7 +147,7 @@ Module { start: 56, end: 79, }, - name: "a", + name: a, args: Arguments { node: Node { start: 62, @@ -210,7 +210,7 @@ Module { start: 81, end: 101, }, - name: "a", + name: a, args: Arguments { node: Node { start: 87, @@ -271,7 +271,7 @@ Module { start: 103, end: 123, }, - name: "a", + name: a, args: Arguments { node: Node { start: 116, @@ -317,7 +317,7 @@ Module { start: 125, end: 187, }, - name: "f", + name: f, args: Arguments { node: Node { start: 138, @@ -467,7 +467,7 @@ Module { start: 189, end: 213, }, - name: "func", + name: func, args: Arguments { node: Node { start: 198, @@ -513,7 +513,7 @@ Module { start: 215, end: 234, }, - name: "a", + name: a, args: Arguments { node: Node { start: 227, @@ -549,7 +549,7 @@ Module { start: 236, end: 260, }, - name: "a", + name: a, args: Arguments { node: Node { start: 248, @@ -585,7 +585,7 @@ Module { start: 261, end: 287, }, - name: "a", + name: a, args: Arguments { node: Node { start: 273, @@ -646,7 +646,7 @@ Module { start: 289, end: 305, }, - name: "a", + name: a, args: Arguments { node: Node { start: 298, @@ -693,7 +693,7 @@ Module { start: 307, end: 326, }, - name: "a", + name: a, args: Arguments { node: Node { start: 319, @@ -750,7 +750,7 @@ Module { start: 328, end: 347, }, - name: "a", + name: a, args: Arguments { node: Node { start: 340, @@ -807,7 +807,7 @@ Module { start: 349, end: 374, }, - name: "a", + name: a, args: Arguments { node: Node { start: 367, @@ -884,7 +884,7 @@ Module { start: 376, end: 393, }, - name: "a", + name: a, args: Arguments { node: Node { start: 386, @@ -930,7 +930,7 @@ Module { start: 395, end: 415, }, - name: "a", + name: a, args: Arguments { node: Node { start: 408, @@ -986,7 +986,7 @@ Module { start: 417, end: 440, }, - name: "a", + name: a, args: Arguments { node: Node { start: 433, @@ -1052,7 +1052,7 @@ Module { start: 442, end: 471, }, - name: "a", + name: a, args: Arguments { node: Node { start: 464, @@ -1138,7 +1138,7 @@ Module { start: 473, end: 491, }, - name: "a", + name: a, args: Arguments { node: Node { start: 484, @@ -1184,7 +1184,7 @@ Module { start: 493, end: 514, }, - name: "a", + name: a, args: Arguments { node: Node { start: 507, @@ -1240,7 +1240,7 @@ Module { start: 516, end: 540, }, - name: "a", + name: a, args: Arguments { node: Node { start: 533, @@ -1306,7 +1306,7 @@ Module { start: 542, end: 572, }, - name: "a", + name: a, args: Arguments { node: Node { start: 565, @@ -1392,7 +1392,7 @@ Module { start: 574, end: 599, }, - name: "a", + name: a, args: Arguments { node: Node { start: 592, @@ -1457,7 +1457,7 @@ Module { start: 601, end: 629, }, - name: "a", + name: a, args: Arguments { node: Node { start: 622, @@ -1532,7 +1532,7 @@ Module { start: 631, end: 1012, }, - name: "dataclass_transform", + name: dataclass_transform, args: Arguments { node: Node { start: 660, diff --git a/parser/test_data/output/enderpy_python_parser__parser__parser__tests__functions.snap b/parser/test_data/output/enderpy_python_parser__parser__parser__tests__functions.snap index 40819c0d..e9716447 100644 --- a/parser/test_data/output/enderpy_python_parser__parser__parser__tests__functions.snap +++ b/parser/test_data/output/enderpy_python_parser__parser__parser__tests__functions.snap @@ -14,7 +14,7 @@ Module { start: 0, end: 340, }, - name: "_handle_ticker_index", + name: _handle_ticker_index, args: Arguments { node: Node { start: 25, @@ -342,7 +342,7 @@ Module { start: 342, end: 609, }, - name: "_extract_ticker_client_types_data", + name: _extract_ticker_client_types_data, args: Arguments { node: Node { start: 380, @@ -675,7 +675,7 @@ Module { start: 610, end: 1152, }, - name: "common_process", + name: common_process, args: Arguments { node: Node { start: 629, diff --git a/parser/test_data/output/enderpy_python_parser__parser__parser__tests__newlines.snap b/parser/test_data/output/enderpy_python_parser__parser__parser__tests__newlines.snap index a3c7c6d3..ef8bbe44 100644 --- a/parser/test_data/output/enderpy_python_parser__parser__parser__tests__newlines.snap +++ b/parser/test_data/output/enderpy_python_parser__parser__parser__tests__newlines.snap @@ -155,7 +155,7 @@ Module { start: 68, end: 99, }, - name: "hello", + name: hello, args: Arguments { node: Node { start: 83, @@ -211,10 +211,7 @@ Module { start: 102, end: 135, }, - name: TextRange { - start: 108, - end: 109, - }, + name: A, bases: [ Name( Name { diff --git a/parser/test_data/output/enderpy_python_parser__parser__parser__tests__separate_statements.snap b/parser/test_data/output/enderpy_python_parser__parser__parser__tests__separate_statements.snap index 20db982f..51696045 100644 --- a/parser/test_data/output/enderpy_python_parser__parser__parser__tests__separate_statements.snap +++ b/parser/test_data/output/enderpy_python_parser__parser__parser__tests__separate_statements.snap @@ -14,7 +14,7 @@ Module { start: 99, end: 144, }, - name: "foo", + name: foo, args: Arguments { node: Node { start: 107, @@ -135,7 +135,7 @@ Module { start: 145, end: 239, }, - name: "bar", + name: bar, args: Arguments { node: Node { start: 153, diff --git a/typechecker/src/build.rs b/typechecker/src/build.rs index 15ac5832..582ba51f 100755 --- a/typechecker/src/build.rs +++ b/typechecker/src/build.rs @@ -13,8 +13,8 @@ use crate::{ checker::TypeChecker, file::{EnderpyFile, ImportKinds}, ruff_python_import_resolver::{ - self as ruff_python_resolver, config::Config, execution_environment, - import_result::ImportResult, module_descriptor::ImportModuleDescriptor, resolver, + self as ruff_python_resolver, execution_environment, import_result::ImportResult, + module_descriptor::ImportModuleDescriptor, resolver, }, settings::Settings, symbol_table::{Id, SymbolTable}, @@ -23,10 +23,10 @@ use crate::{ #[derive(Debug)] pub struct BuildManager { pub files: DashMap, - pub symbol_tables: DashMap, + pub symbol_tables: DashMap>, pub paths: DashMap, pub settings: Settings, - import_config: Config, + import_config: ruff_python_resolver::config::Config, host: ruff_python_resolver::host::StaticHost, } #[allow(unused)] @@ -39,7 +39,7 @@ impl<'a> BuildManager { .try_init(); let mut modules = DashMap::new(); - let import_config = Config { + let import_config = ruff_python_resolver::config::Config { typeshed_path: Some(settings.typeshed_path.clone()), stub_path: None, venv_path: None, @@ -67,7 +67,7 @@ impl<'a> BuildManager { log::debug!("Imports resolved"); for mut module in new_modules { let sym_table = module.populate_symbol_table(&imports); - self.symbol_tables.insert(module.id, sym_table); + self.symbol_tables.insert(module.id, Arc::new(sym_table)); self.paths.insert(module.path.to_path_buf(), module.id); self.files.insert(module.id, module); } @@ -83,7 +83,7 @@ impl<'a> BuildManager { log::debug!("Imports resolved"); for mut module in new_modules { let sym_table = module.populate_symbol_table(&imports); - self.symbol_tables.insert(module.id, sym_table); + self.symbol_tables.insert(module.id, Arc::new(sym_table)); self.paths.insert(module.path.to_path_buf(), module.id); self.files.insert(module.id, module); } @@ -105,10 +105,13 @@ impl<'a> BuildManager { checker } - pub fn get_symbol_table(&self, path: &Path) -> SymbolTable { + pub fn get_symbol_table_by_path(&'a self, path: &Path) -> Arc { let module_id = self.paths.get(path).expect("incorrect ID"); - let symbol_table = self.symbol_tables.get(module_id.value()); + return self.get_symbol_table_by_id(&module_id); + } + pub fn get_symbol_table_by_id(&'a self, id: &Id) -> Arc { + let symbol_table = self.symbol_tables.get_mut(id); return symbol_table .expect("symbol table not found") .value() @@ -118,7 +121,7 @@ impl<'a> BuildManager { pub fn get_hover_information(&self, path: &Path, line: u32, column: u32) -> String { let file = self.files.get(&self.paths.get(path).unwrap()).unwrap(); let checker = self.type_check(path, &file); - let symbol_table = self.get_symbol_table(path); + let symbol_table = self.get_symbol_table_by_path(path); let hovered_offset = file.line_starts[line as usize] + column; let hovered_offset_start = hovered_offset.saturating_sub(1); @@ -170,7 +173,7 @@ fn gather_imports<'a>( execution_environment, import_config, host, - // &import_results, + &import_results, ); new_modules.insert(module); for (import_desc, mut resolved) in resolved_imports { @@ -237,6 +240,7 @@ fn resolve_file_imports( execution_environment: &ruff_python_resolver::execution_environment::ExecutionEnvironment, import_config: &ruff_python_resolver::config::Config, host: &ruff_python_resolver::host::StaticHost, + resolved_imports: &ResolvedImports, ) -> HashMap { let mut imports = HashMap::new(); debug!("resolving imports for file {:?}", file.path); @@ -253,6 +257,9 @@ fn resolve_file_imports( }; for import_desc in import_descriptions { + if resolved_imports.contains_key(&import_desc) { + continue; + } // TODO: Cache non relative imports let resolved = match false { true => continue, @@ -291,7 +298,7 @@ mod tests { manager.build(root); manager.build_one(root, &path); - let symbol_table = manager.get_symbol_table(&path); + let symbol_table = manager.get_symbol_table_by_path(&path); let result = format!("{}", symbol_table); let mut settings = insta::Settings::clone_current(); diff --git a/typechecker/src/checker.rs b/typechecker/src/checker.rs index e635676a..c58c8a84 100644 --- a/typechecker/src/checker.rs +++ b/typechecker/src/checker.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use ast::{Expression, Statement}; use enderpy_python_parser as parser; use enderpy_python_parser::ast::{self, *}; +use enderpy_python_parser::parser::parser::intern_lookup; use super::{type_evaluator::TypeEvaluator, types::PythonType}; use crate::build::BuildManager; @@ -17,6 +18,8 @@ pub struct TypeChecker<'a> { id: Id, type_evaluator: TypeEvaluator<'a>, build_manager: &'a BuildManager, + current_scope: u32, + prev_scope: u32, } #[derive(Debug, PartialEq, Eq, Clone)] @@ -33,6 +36,8 @@ impl<'a> TypeChecker<'a> { id, build_manager, types: Lapper::new(vec![]), + current_scope: 0, + prev_scope: 0, } } @@ -41,18 +46,17 @@ impl<'a> TypeChecker<'a> { } fn infer_expr_type(&mut self, expr: &Expression) -> PythonType { - let symbol_table = self.build_manager.symbol_tables.get(&self.id).unwrap(); - let t = - match self - .type_evaluator - .get_type(expr, &symbol_table, symbol_table.current_scope_id) - { - Ok(t) => t, - Err(e) => { - log::error!("type evaluator error: {} for expr {expr:?}", e); - PythonType::Unknown - } - }; + let symbol_table = self.build_manager.get_symbol_table_by_id(&self.id); + let t = match self + .type_evaluator + .get_type(expr, &symbol_table, self.current_scope) + { + Ok(t) => t, + Err(e) => { + log::error!("type evaluator error: {} for expr {expr:?}", e); + PythonType::Unknown + } + }; self.types.insert(Interval { start: expr.get_node().start, @@ -63,12 +67,10 @@ impl<'a> TypeChecker<'a> { } fn infer_annotation_type(&mut self, expr: &Expression) -> PythonType { - let symbol_table = self.build_manager.symbol_tables.get(&self.id).unwrap(); - let t = self.type_evaluator.get_annotation_type( - expr, - &symbol_table, - symbol_table.current_scope_id, - ); + let symbol_table = self.build_manager.get_symbol_table_by_id(&self.id); + let t = self + .type_evaluator + .get_annotation_type(expr, &symbol_table, self.current_scope); self.types.insert(Interval { start: expr.get_node().start, @@ -79,13 +81,10 @@ impl<'a> TypeChecker<'a> { } fn infer_name_type(&mut self, name: &str, start: u32, stop: u32) { - let symbol_table = self.build_manager.symbol_tables.get(&self.id).unwrap(); - let name_type = self.type_evaluator.get_name_type( - name, - None, - &symbol_table, - symbol_table.current_scope_id, - ); + let symbol_table = self.build_manager.get_symbol_table_by_id(&self.id); + let name_type = + self.type_evaluator + .get_name_type(name, None, &symbol_table, self.current_scope); self.types.insert(Interval { start, stop, @@ -93,14 +92,14 @@ impl<'a> TypeChecker<'a> { }); } - fn enter_scope(&self, pos: u32) { - let mut symbol_table = self.build_manager.symbol_tables.get_mut(&self.id).unwrap(); - symbol_table.set_scope(pos); + fn enter_scope(&mut self, pos: u32) { + let symbol_table = self.build_manager.get_symbol_table_by_id(&self.id); + self.prev_scope = self.current_scope; + self.current_scope = symbol_table.get_scope(pos); } - fn leave_scope(&self) { - let mut symbol_table = self.build_manager.symbol_tables.get_mut(&self.id).unwrap(); - symbol_table.revert_scope(); + fn leave_scope(&mut self) { + self.current_scope = self.prev_scope; } pub fn dump_types(&self) -> String { @@ -311,12 +310,10 @@ impl<'a> TraversalVisitor for TypeChecker<'a> { } fn visit_function_def(&mut self, f: &Arc) { + let file = &self.build_manager.files.get(&self.id).unwrap(); self.enter_scope(f.node.start); - self.infer_name_type( - &f.name, - f.node.start + 4, - f.node.start + 4 + f.name.len() as u32, - ); + let name = intern_lookup(f.name); + self.infer_name_type(name, f.node.start + 4, f.node.start + 4 + name.len() as u32); if let Some(ret_type) = &f.returns { self.visit_expr(ret_type); } @@ -334,12 +331,10 @@ impl<'a> TraversalVisitor for TypeChecker<'a> { } fn visit_async_function_def(&mut self, f: &Arc) { + let file = &self.build_manager.files.get(&self.id).unwrap(); self.enter_scope(f.node.start); - self.infer_name_type( - &f.name, - f.node.start + 9, - f.node.start + 9 + f.name.len() as u32, - ); + let name = intern_lookup(f.name); + self.infer_name_type(name, f.node.start + 9, f.node.start + 9 + name.len() as u32); for stmt in &f.body { self.visit_stmt(stmt); } @@ -347,9 +342,9 @@ impl<'a> TraversalVisitor for TypeChecker<'a> { } fn visit_class_def(&mut self, c: &Arc) { - let source = &self.build_manager.files.get(&self.id).unwrap().source; - let name = c.name(source); - self.infer_name_type(name, c.name.start, c.name.end); + let file = &self.build_manager.files.get(&self.id).unwrap(); + let name = intern_lookup(c.name); + self.infer_name_type(name, c.node.start + 6, c.node.start + 6 + name.len() as u32); self.enter_scope(c.node.start); for base in &c.bases { diff --git a/typechecker/src/file.rs b/typechecker/src/file.rs index 95a3065e..26354be6 100755 --- a/typechecker/src/file.rs +++ b/typechecker/src/file.rs @@ -11,8 +11,8 @@ use parser::{ast, get_row_col_position, parser::parser::Parser}; use std::sync::atomic::Ordering; use crate::build::ResolvedImports; +use crate::symbol_table; use crate::{diagnostic::Position, semantic_analyzer::SemanticAnalyzer, symbol_table::SymbolTable}; -use crate::{get_module_name, symbol_table}; #[derive(Clone, Debug)] pub enum ImportKinds<'a> { @@ -25,7 +25,6 @@ pub enum ImportKinds<'a> { #[derive(Clone, Debug)] pub struct EnderpyFile { pub id: symbol_table::Id, - pub module: String, // if this source is found by following an import pub followed: bool, pub path: Arc, @@ -58,7 +57,6 @@ impl<'a> EnderpyFile { pub fn new(path: PathBuf, followed: bool) -> Self { let source = std::fs::read_to_string(&path).unwrap_or_else(|_| panic!("cannot read file {path:?}")); - let module = get_module_name(&path); let mut parser = Parser::new(&source); let parse_result = catch_unwind(AssertUnwindSafe(|| parser.parse())); @@ -71,7 +69,7 @@ impl<'a> EnderpyFile { panic!("Cannot parse file : {path:?}"); } }; - let line_starts = parser.lexer.line_starts.clone(); + let line_starts = parser.lexer.line_starts; let id = if path.ends_with("builtins.pyi") { symbol_table::Id(0) @@ -84,14 +82,10 @@ impl<'a> EnderpyFile { source, line_starts, followed, - module, tree, path: Arc::new(path), } } - pub fn module_name(&self) -> String { - self.module.clone() - } pub fn path(&self) -> PathBuf { self.path.to_path_buf() @@ -134,7 +128,7 @@ impl<'a> EnderpyFile { /// entry point to fill up the symbol table from the global definitions pub fn populate_symbol_table(&mut self, imports: &ResolvedImports) -> SymbolTable { let mut sem_anal = SemanticAnalyzer::new(self, imports); - for stmt in &self.tree.body { + for stmt in self.tree.body.iter() { sem_anal.visit_stmt(stmt) } let mut sym_table = sem_anal.symbol_table; diff --git a/typechecker/src/semantic_analyzer.rs b/typechecker/src/semantic_analyzer.rs index 04c57a56..7432eaf6 100644 --- a/typechecker/src/semantic_analyzer.rs +++ b/typechecker/src/semantic_analyzer.rs @@ -1,7 +1,8 @@ use std::sync::Arc; -use enderpy_python_parser as parser; use enderpy_python_parser::ast::Expression; +use enderpy_python_parser::parser::parser::intern_lookup; +use enderpy_python_parser::{self as parser}; use parser::ast::{self, GetNode, Name, Statement}; @@ -109,15 +110,24 @@ impl<'a> SemanticAnalyzer<'a> { } } Expression::Attribute(a) => { - let member_access_info = - get_member_access_info(&self.symbol_table, &a.value, &self.file.source); + let member_access_info = get_member_access_info(&self.symbol_table, &a.value); let symbol_flags = if member_access_info.is_some_and(|x| x) { SymbolFlags::INSTANCE_MEMBER } else { SymbolFlags::CLASS_MEMBER }; - if self.function_assigns_attribute(&self.symbol_table) { + let function_assigns_attribute = if let Some(function_def) = + self.symbol_table.current_scope().kind.as_function() + { + // TODO: some python usual names to be interned + intern_lookup(function_def.name) == "__init__" + || intern_lookup(function_def.name) == "__new__" + } else { + false + }; + + if function_assigns_attribute { let declaration_path = DeclarationPath::new( self.symbol_table.id, a.node, @@ -251,17 +261,6 @@ impl<'a> SemanticAnalyzer<'a> { ); } } - - /// Returns true if the current function assigns an attribute to an object - /// Functions like __init__ and __new__ are considered to assign attributes - fn function_assigns_attribute(&self, symbol_table: &SymbolTable) -> bool { - if let Some(function_def) = symbol_table.current_scope().kind.as_function() { - if function_def.name == "__init__" || function_def.name == "__new__" { - return true; - } - } - false - } } impl<'a> TraversalVisitor for SemanticAnalyzer<'a> { @@ -506,7 +505,7 @@ impl<'a> TraversalVisitor for SemanticAnalyzer<'a> { } self.symbol_table.push_scope(SymbolTableScope::new( crate::symbol_table::SymbolTableType::Function(Arc::clone(f)), - f.name.clone(), + intern_lookup(f.name).to_owned(), f.node.start, self.symbol_table.current_scope_id, )); @@ -553,7 +552,11 @@ impl<'a> TraversalVisitor for SemanticAnalyzer<'a> { raise_statements: vec![], }); let flags = SymbolFlags::empty(); - self.create_symbol(f.name.clone(), function_declaration, flags); + self.create_symbol( + intern_lookup(f.name).to_owned(), + function_declaration, + flags, + ); } fn visit_async_function_def(&mut self, f: &Arc) { @@ -565,7 +568,7 @@ impl<'a> TraversalVisitor for SemanticAnalyzer<'a> { self.symbol_table.push_scope(SymbolTableScope::new( SymbolTableType::Function(Arc::new(f.to_function_def())), - f.name.clone(), + intern_lookup(f.name).to_owned(), f.node.start, self.symbol_table.current_scope_id, )); @@ -611,7 +614,11 @@ impl<'a> TraversalVisitor for SemanticAnalyzer<'a> { raise_statements: vec![], }); let flags = SymbolFlags::empty(); - self.create_symbol(f.name.clone(), function_declaration, flags); + self.create_symbol( + intern_lookup(f.name).to_string(), + function_declaration, + flags, + ); } fn visit_type_alias(&mut self, t: &parser::ast::TypeAlias) { @@ -631,11 +638,11 @@ impl<'a> TraversalVisitor for SemanticAnalyzer<'a> { ); } + // TODO: here I'm looking up the name 3 times because of immutable borrow fn visit_class_def(&mut self, c: &Arc) { - let name = c.name(&self.file.source); self.symbol_table.push_scope(SymbolTableScope::new( SymbolTableType::Class(c.clone()), - name.to_string(), + intern_lookup(c.name).to_owned(), c.node.start, self.symbol_table.current_scope_id, )); @@ -680,10 +687,10 @@ impl<'a> TraversalVisitor for SemanticAnalyzer<'a> { Arc::clone(c), class_declaration_path, class_body_scope_id, - name, + intern_lookup(c.name), )); let flags = SymbolFlags::empty(); - self.create_symbol(name.to_string(), class_declaration, flags); + self.create_symbol(intern_lookup(c.name).to_string(), class_declaration, flags); } fn visit_match(&mut self, m: &parser::ast::Match) { @@ -829,7 +836,6 @@ pub struct MemberAccessInfo {} pub fn get_member_access_info( symbol_table: &SymbolTable, value: &parser::ast::Expression, - source: &str, //TODO: This is option to use the `?` operator. Remove it ) -> Option { let name = value.as_name()?; @@ -863,7 +869,7 @@ pub fn get_member_access_info( } // e.g. "MyClass.x = 1" - if value_name == enclosing_class.name(source) || is_class_member { + if value_name == intern_lookup(enclosing_class.name) || is_class_member { Some(false) } else { Some(true) diff --git a/typechecker/src/type_evaluator.rs b/typechecker/src/type_evaluator.rs index 3b82e142..3adda5bf 100755 --- a/typechecker/src/type_evaluator.rs +++ b/typechecker/src/type_evaluator.rs @@ -2,12 +2,13 @@ #![allow(unused_variables)] use core::panic; -use enderpy_python_parser::{self as parser}; +use enderpy_python_parser::{self as parser, parser::parser::intern_lookup}; use parser::ast; use parser::parser::parser::Parser; use std::{ cell::Cell, panic::{catch_unwind, AssertUnwindSafe}, + sync::Arc, }; use tracing::{error, instrument, span, trace, Level}; @@ -23,7 +24,7 @@ use super::{ use crate::{ build::BuildManager, semantic_analyzer::get_member_access_info, - symbol_table::{self, Class, Declaration, Id, SymbolTable, SymbolTableNode}, + symbol_table::{self, Class, Declaration, DeclarationPath, Id, SymbolTable, SymbolTableNode}, types::CallableArgs, }; @@ -325,13 +326,8 @@ impl<'a> TypeEvaluator<'a> { // } // Case 1 // This is self or cls - let source = &self - .build_manager - .files - .get(&symbol_table.id) - .unwrap() - .source; - if get_member_access_info(symbol_table, &a.value, source).is_some() { + let file = &self.build_manager.files.get(&symbol_table.id).unwrap(); + if get_member_access_info(symbol_table, &a.value).is_some() { let enclosing_parent_class = symbol_table.get_enclosing_class_scope(); if let Some(enclosing_parent_class) = enclosing_parent_class { let symbol_table_node = @@ -363,11 +359,7 @@ impl<'a> TypeEvaluator<'a> { } } PythonType::Module(module) => { - let module_sym_table = self - .build_manager - .symbol_tables - .get(&module.module_id) - .unwrap(); + let module_sym_table = self.get_symbol_table(&module.module_id); Ok(self.get_name_type(&a.attr, None, &module_sym_table, 0)) } // Anything you perform a get attribute on should at least resolve using object @@ -727,7 +719,7 @@ impl<'a> TypeEvaluator<'a> { for star_import in symbol_table.star_imports.iter() { trace!("checking star imports {:?}", star_import); for id in star_import.resolved_ids.iter() { - let star_import_sym_table = self.build_manager.symbol_tables.get(id).unwrap(); + let star_import_sym_table = self.get_symbol_table(id); // In the star import we can only lookup the global scope let res = star_import_sym_table.lookup_in_scope(name, 0); match res { @@ -786,16 +778,12 @@ impl<'a> TypeEvaluator<'a> { decl }; let decl_scope = decl.declaration_path().scope_id; - let symbol_table = &self - .build_manager - .symbol_tables - .get(&decl.declaration_path().symbol_table_id) - .unwrap(); + let symbol_table = self.get_dec_symbol_table(decl.declaration_path()); let result = match decl { Declaration::Variable(v) => { if let Some(type_annotation) = &v.type_annotation { let var_type = - self.get_annotation_type(type_annotation, symbol_table, decl_scope); + self.get_annotation_type(type_annotation, &symbol_table, decl_scope); if type_annotation .as_name() @@ -829,7 +817,7 @@ impl<'a> TypeEvaluator<'a> { if let Some(b_type) = builtin_type { b_type } else { - self.get_type(source, symbol_table, decl_scope) + self.get_type(source, &symbol_table, decl_scope) .unwrap_or(PythonType::Unknown) } // If the variable was created using a for statement e.g. `a` in: for a in []: @@ -841,12 +829,12 @@ impl<'a> TypeEvaluator<'a> { } } let iter_type = self - .get_type(&for_stmt.iter, symbol_table, decl_scope) + .get_type(&for_stmt.iter, &symbol_table, decl_scope) .unwrap_or_else(|_| panic!("iterating over unknown {:?}", for_stmt)); match iter_type { PythonType::Instance(instance_type) => { let iter_method = match self.lookup_on_class( - symbol_table, + &symbol_table, &instance_type.class_type, "__iter__", ) { @@ -867,7 +855,7 @@ impl<'a> TypeEvaluator<'a> { }; let next_method = match self.lookup_on_class( - symbol_table, + &symbol_table, &iter_method_type, "__next__", ) { @@ -893,7 +881,7 @@ impl<'a> TypeEvaluator<'a> { } Declaration::Function(f) => { self.get_function_type( - symbol_table, + &symbol_table, f, // to be able to get the function signature correctly we use the scope id of the // function. Since parameters are defined in that scope. @@ -901,14 +889,14 @@ impl<'a> TypeEvaluator<'a> { ) } Declaration::AsyncFunction(f) => self.get_async_function_type( - symbol_table, + &symbol_table, f, symbol_table.get_scope(f.function_node.node.start), ), Declaration::Parameter(p) => { if let Some(type_annotation) = &p.type_annotation { let annotated_type = - self.get_annotation_type(type_annotation, symbol_table, decl_scope); + self.get_annotation_type(type_annotation, &symbol_table, decl_scope); if let PythonType::Class(ref c) = annotated_type { let instance_type = InstanceType::new(c.clone(), c.specialized.clone()); PythonType::Instance(instance_type) @@ -928,7 +916,7 @@ impl<'a> TypeEvaluator<'a> { let class_def = symbol_table .lookup_in_scope(&class_scope.name, parent_scope.id) .expect("class def not found"); - return self.get_symbol_type(class_def, symbol_table, position); + return self.get_symbol_type(class_def, &symbol_table, position); } PythonType::Unknown } @@ -946,8 +934,7 @@ impl<'a> TypeEvaluator<'a> { trace!("import result {:?}", import_result); for id in import_result.resolved_ids.iter() { trace!("checking path {:?}", id); - let symbol_table_with_alias_def = - self.build_manager.symbol_tables.get(id).unwrap(); + let symbol_table_with_alias_def = self.get_symbol_table(id); if let Some(symbol_table_file_name) = symbol_table_with_alias_def.file_path.file_stem() @@ -994,19 +981,19 @@ impl<'a> TypeEvaluator<'a> { trace!("checking star imports {:?}", star_import); for id in star_import.resolved_ids.iter() { trace!("checking path {:?}", id); - let star_import_sym_table = - self.build_manager.symbol_tables.get(id); - let Some(sym_table) = star_import_sym_table else { - panic!("symbol table of star import not found at {:?}", id); - }; - let res = sym_table.lookup_in_scope(name, 0); + let star_import_sym_table = self.get_symbol_table(id); + let res = star_import_sym_table.lookup_in_scope(name, 0); // TODO: if an import in the other module imports the previous // module again as * import then don't come back to the module // that started the import. Don't know the correct way to // handle this. match res { Some(res) => { - return self.get_symbol_type(res, &sym_table, None); + return self.get_symbol_type( + res, + &star_import_sym_table, + None, + ); } None => continue, }; @@ -1032,7 +1019,7 @@ impl<'a> TypeEvaluator<'a> { Declaration::TypeParameter(_) => PythonType::Unknown, Declaration::TypeAlias(_) => PythonType::Unknown, Declaration::Class(c) => self - .get_class_declaration_type(c, symbol_table, decl_scope) + .get_class_declaration_type(c, &symbol_table, decl_scope) .unwrap_or(PythonType::Unknown), }; @@ -1175,26 +1162,22 @@ impl<'a> TypeEvaluator<'a> { if name == "function" { return None; } - let builtins_symbol_table = &self - .build_manager - .symbol_tables - .get(&Id(0)) - .expect("Builtins must exist"); + let builtins_symbol_table = self.get_symbol_table(&Id(0)); let builtin_symbol = builtins_symbol_table.lookup_in_scope(name, 0)?; let decl = builtin_symbol.last_declaration(); let found_declaration = match decl { Declaration::Class(c) => { let decl_scope = decl.declaration_path().scope_id; - self.get_class_declaration_type(c, builtins_symbol_table, decl_scope) + self.get_class_declaration_type(c, &builtins_symbol_table, decl_scope) .unwrap_or_else(|_| { panic!("Error getting type for builtin class: {:?}", c.class_node) }) } Declaration::Function(f) => { - self.get_function_type(builtins_symbol_table, f, decl.declaration_path().scope_id) + self.get_function_type(&builtins_symbol_table, f, decl.declaration_path().scope_id) } Declaration::AsyncFunction(f) => self.get_async_function_type( - builtins_symbol_table, + &builtins_symbol_table, f, decl.declaration_path().scope_id, ), @@ -1508,12 +1491,7 @@ impl<'a> TypeEvaluator<'a> { c: &ClassType, method_name: &str, ) -> Option { - let class_symbol_table_id = c.details.declaration_path.symbol_table_id; - let class_symbol_table = self - .build_manager - .symbol_tables - .get(&class_symbol_table_id) - .unwrap(); + let class_symbol_table = self.get_dec_symbol_table(&c.details.declaration_path); let class_scope = c.details.class_scope_id; // TODO: Probably should implement mro here // Try to find on the class and it's base classes. @@ -1522,9 +1500,8 @@ impl<'a> TypeEvaluator<'a> { if symbol.is_none() { for base in bases { let base_class = base.expect_class(); - let class_symbol_table_id = base_class.details.declaration_path.symbol_table_id; - let get_symbol_table = self.build_manager.symbol_tables.get(&class_symbol_table_id); - let class_symbol_table = get_symbol_table.unwrap(); + let class_symbol_table = + self.get_dec_symbol_table(&base_class.details.declaration_path); if let Some(attribute_on_base) = class_symbol_table .lookup_attribute(method_name, base_class.details.class_scope_id) { @@ -1627,7 +1604,7 @@ impl<'a> TypeEvaluator<'a> { ) -> PythonType { // TODO: handle default values - let name = f.function_node.name.clone(); + let name = f.function_node.name; let signature = self.get_function_signature(&f.function_node.args, symbol_table, arguments_scope_id); let return_type = @@ -1637,8 +1614,13 @@ impl<'a> TypeEvaluator<'a> { .map_or(PythonType::Unknown, |type_annotation| { self.get_annotation_type(&type_annotation, symbol_table, arguments_scope_id) }); + let file = self + .build_manager + .files + .get(&f.declaration_path.symbol_table_id) + .unwrap(); PythonType::Callable(Box::new(CallableType::new( - name, + intern_lookup(name).to_string(), signature, return_type, false, @@ -1652,7 +1634,7 @@ impl<'a> TypeEvaluator<'a> { scope_id: u32, ) -> PythonType { let arguments = f.function_node.args.clone(); - let name = f.function_node.name.clone(); + let name = f.function_node.name; let signature = self.get_function_signature(&f.function_node.args, symbol_table, scope_id); let return_type = f .function_node @@ -1661,8 +1643,14 @@ impl<'a> TypeEvaluator<'a> { .map_or(PythonType::Unknown, |type_annotation| { self.get_annotation_type(&type_annotation, symbol_table, scope_id) }); + + let file = self + .build_manager + .files + .get(&f.declaration_path.symbol_table_id) + .unwrap(); PythonType::Callable(Box::new(CallableType::new( - name, + intern_lookup(name).to_string(), signature, PythonType::Coroutine(Box::new(types::CoroutineType { return_type, @@ -1737,4 +1725,13 @@ impl<'a> TypeEvaluator<'a> { } } } + + fn get_dec_symbol_table(&self, decl_path: &DeclarationPath) -> Arc { + let table_id = decl_path.symbol_table_id; + return self.build_manager.get_symbol_table_by_id(&table_id); + } + + fn get_symbol_table(&self, id: &Id) -> Arc { + return self.build_manager.get_symbol_table_by_id(id); + } }