From 9b0c14a9910146cba8a059555d470685824a793c Mon Sep 17 00:00:00 2001 From: Ryan Daum Date: Mon, 12 Aug 2024 17:19:01 -0400 Subject: [PATCH] Introduce fully lexically scoped variables Adds a new notion of block scoped variables, declared using new syntax "let x =". Adds new block scope with "begin" and "end" keywords. Existing block structures like while/if-elseif-else/try/for are also lexically scopable. Also adds explicit global variable assignment / declaration via "global" By default variables are global, to keep backwards compatibility with LambdaMOO. --- crates/compiler/src/ast.rs | 46 ++- crates/compiler/src/codegen.rs | 95 +++++- crates/compiler/src/codegen_tests.rs | 69 +++- crates/compiler/src/decompile.rs | 115 ++++++- crates/compiler/src/moo.pest | 13 +- crates/compiler/src/names.rs | 102 ++++-- crates/compiler/src/opcode.rs | 68 +++- crates/compiler/src/parse.rs | 347 +++++++++++++++++--- crates/compiler/src/program.rs | 2 +- crates/compiler/src/unparse.rs | 113 ++++++- crates/db-wiredtiger/src/bindings/cursor.rs | 4 + crates/kernel/src/builtins/bf_server.rs | 2 +- crates/kernel/src/tasks/task.rs | 3 + crates/kernel/src/textdump/load_db.rs | 10 +- crates/kernel/src/vm/mod.rs | 2 +- crates/kernel/src/vm/moo_execute.rs | 65 +++- crates/kernel/src/vm/moo_frame.rs | 48 ++- crates/kernel/src/vm/vm_call.rs | 3 +- crates/kernel/src/vm/vm_test.rs | 237 ++++++++++++- crates/kernel/src/vm/vm_unwind.rs | 4 + 20 files changed, 1145 insertions(+), 203 deletions(-) diff --git a/crates/compiler/src/ast.rs b/crates/compiler/src/ast.rs index ef4062f5..fa80cc6a 100644 --- a/crates/compiler/src/ast.rs +++ b/crates/compiler/src/ast.rs @@ -173,6 +173,13 @@ pub enum Expr { pub struct CondArm { pub condition: Expr, pub statements: Vec, + pub environment_width: usize, +} + +#[derive(Debug, Eq, PartialEq, Clone)] +pub struct ElseArm { + pub statements: Vec, + pub environment_width: usize, } #[derive(Debug, Eq, PartialEq, Clone)] @@ -210,23 +217,26 @@ impl Stmt { pub enum StmtNode { Cond { arms: Vec, - otherwise: Vec, + otherwise: Option, }, ForList { id: UnboundName, expr: Expr, body: Vec, + environment_width: usize, }, ForRange { id: UnboundName, from: Expr, to: Expr, body: Vec, + environment_width: usize, }, While { id: Option, condition: Expr, body: Vec, + environment_width: usize, }, Fork { id: Option, @@ -236,10 +246,18 @@ pub enum StmtNode { TryExcept { body: Vec, excepts: Vec, + environment_width: usize, }, TryFinally { body: Vec, handler: Vec, + environment_width: usize, + }, + Scope { + /// The number of non-upfront variables in the scope (e.g. let statements) + num_bindings: usize, + /// The body of the let scope, which is evaluated with the bindings in place. + body: Vec, }, Break { exit: Option, @@ -276,7 +294,19 @@ pub fn assert_trees_match_recursive(a: &[Stmt], b: &[Stmt]) { .. }, ) => { - assert_trees_match_recursive(otherwise1, otherwise2); + match (otherwise1, otherwise2) { + ( + Some(ElseArm { statements, .. }), + Some(ElseArm { + statements: statements2, + .. + }), + ) => { + assert_trees_match_recursive(statements, statements2); + } + (None, None) => {} + _ => panic!("Mismatched otherwise: {:?} vs {:?}", otherwise1, otherwise2), + } for arms in arms1.iter().zip(arms2.iter()) { assert_eq!(arms.0.condition, arms.1.condition); assert_trees_match_recursive(&arms.0.statements, &arms.1.statements); @@ -286,24 +316,34 @@ pub fn assert_trees_match_recursive(a: &[Stmt], b: &[Stmt]) { StmtNode::TryFinally { body: body1, handler: handler1, + environment_width: ew1, }, StmtNode::TryFinally { body: body2, handler: handler2, + environment_width: ew2, }, ) => { assert_trees_match_recursive(body1, body2); assert_trees_match_recursive(handler1, handler2); + assert_eq!(ew1, ew2); } (StmtNode::TryExcept { body: body1, .. }, StmtNode::TryExcept { body: body2, .. }) | (StmtNode::ForList { body: body1, .. }, StmtNode::ForList { body: body2, .. }) | (StmtNode::ForRange { body: body1, .. }, StmtNode::ForRange { body: body2, .. }) | (StmtNode::Fork { body: body1, .. }, StmtNode::Fork { body: body2, .. }) + | (StmtNode::Scope { body: body1, .. }, StmtNode::Scope { body: body2, .. }) | (StmtNode::While { body: body1, .. }, StmtNode::While { body: body2, .. }) => { assert_trees_match_recursive(body1, body2); } _ => { - panic!("Mismatched statements: {:?} vs {:?}", left, right); + panic!( + "Mismatched statements:\n\ + {:?}\n\ + vs\n\ + {:?}", + left, right + ); } } } diff --git a/crates/compiler/src/codegen.rs b/crates/compiler/src/codegen.rs index 52aee60f..81fd08cf 100644 --- a/crates/compiler/src/codegen.rs +++ b/crates/compiler/src/codegen.rs @@ -428,7 +428,7 @@ impl CodegenState { }; let builtin = *builtin; self.generate_arg_list(args)?; - self.emit(Op::FuncCall { id: builtin }); + self.emit(Op::FuncCall { id: builtin as u16 }); } Expr::Verb { args, @@ -515,29 +515,48 @@ impl CodegenState { self.generate_expr(&arm.condition)?; let otherwise_label = self.make_jump_label(None); self.emit(if !is_else { - Op::If(otherwise_label) + Op::If(otherwise_label, arm.environment_width as u16) } else { - Op::Eif(otherwise_label) + Op::Eif(otherwise_label, arm.environment_width as u16) }); is_else = true; self.pop_stack(1); for stmt in &arm.statements { self.generate_stmt(stmt)?; } + self.emit(Op::EndScope { + num_bindings: arm.environment_width as u16, + }); self.emit(Op::Jump { label: end_label }); // This is where we jump to if the condition is false; either the end of the // if statement, or the start of the next ('else or elseif') arm. self.commit_jump_label(otherwise_label); } - if !otherwise.is_empty() { - for stmt in otherwise { + if let Some(otherwise) = otherwise { + let end_label = self.make_jump_label(None); + // Decompilation has to elide this begin/end scope pair, as it's not actually + // present in the source code. + self.emit(Op::BeginScope { + num_bindings: otherwise.environment_width as u16, + end_label, + }); + for stmt in &otherwise.statements { self.generate_stmt(stmt)?; } + self.emit(Op::EndScope { + num_bindings: otherwise.environment_width as u16, + }); + self.commit_jump_label(end_label); } self.commit_jump_label(end_label); } - StmtNode::ForList { id, expr, body } => { + StmtNode::ForList { + id, + expr, + body, + environment_width, + } => { self.generate_expr(expr)?; // Note that MOO is 1-indexed, so this is counter value is 1 in LambdaMOO; @@ -550,6 +569,7 @@ impl CodegenState { self.emit(Op::ForList { id: self.binding_mappings[id], end_label, + environment_width: *environment_width as u16, }); self.loops.push(Loop { loop_name: Some(self.binding_mappings[id]), @@ -563,10 +583,19 @@ impl CodegenState { } self.emit(Op::Jump { label: loop_top }); self.commit_jump_label(end_label); + self.emit(Op::EndScope { + num_bindings: *environment_width as u16, + }); self.pop_stack(2); self.loops.pop(); } - StmtNode::ForRange { from, to, id, body } => { + StmtNode::ForRange { + from, + to, + id, + body, + environment_width, + } => { self.generate_expr(from)?; self.generate_expr(to)?; let loop_top = self.make_jump_label(Some(self.binding_mappings[id])); @@ -575,6 +604,7 @@ impl CodegenState { self.emit(Op::ForRange { id: self.binding_mappings[id], end_label, + environment_width: *environment_width as u16, }); self.loops.push(Loop { loop_name: Some(self.binding_mappings[id]), @@ -588,6 +618,9 @@ impl CodegenState { } self.emit(Jump { label: loop_top }); self.commit_jump_label(end_label); + self.emit(Op::EndScope { + num_bindings: *environment_width as u16, + }); self.pop_stack(2); self.loops.pop(); } @@ -595,6 +628,7 @@ impl CodegenState { id, condition, body, + environment_width, } => { let loop_start_label = self.make_jump_label(id.as_ref().map(|id| self.binding_mappings[id])); @@ -604,10 +638,14 @@ impl CodegenState { self.make_jump_label(id.as_ref().map(|id| self.binding_mappings[id])); self.generate_expr(condition)?; match id { - None => self.emit(Op::While(loop_end_label)), + None => self.emit(Op::While { + jump_label: loop_end_label, + environment_width: *environment_width as u16, + }), Some(id) => self.emit(Op::WhileId { id: self.binding_mappings[id], end_label: loop_end_label, + environment_width: *environment_width as u16, }), } self.pop_stack(1); @@ -625,6 +663,9 @@ impl CodegenState { label: loop_start_label, }); self.commit_jump_label(loop_end_label); + self.emit(Op::EndScope { + num_bindings: *environment_width as u16, + }); self.loops.pop(); } StmtNode::Fork { id, body, time } => { @@ -645,7 +686,11 @@ impl CodegenState { }); self.pop_stack(1); } - StmtNode::TryExcept { body, excepts } => { + StmtNode::TryExcept { + body, + excepts, + environment_width, + } => { let mut labels = vec![]; let num_excepts = excepts.len(); for ex in excepts { @@ -655,7 +700,10 @@ impl CodegenState { labels.push(push_label); } self.pop_stack(num_excepts); - self.emit(Op::TryExcept { num_excepts }); + self.emit(Op::TryExcept { + num_excepts: num_excepts as u16, + environment_width: *environment_width as u16, + }); for stmt in body { self.generate_stmt(stmt)?; } @@ -676,12 +724,20 @@ impl CodegenState { self.emit(Op::Jump { label: end_label }); } } + self.emit(Op::EndScope { + num_bindings: *environment_width as u16, + }); self.commit_jump_label(end_label); } - StmtNode::TryFinally { body, handler } => { + StmtNode::TryFinally { + body, + handler, + environment_width, + } => { let handler_label = self.make_jump_label(None); self.emit(Op::TryFinally { end_label: handler_label, + environment_width: *environment_width as u16, }); for stmt in body { self.generate_stmt(stmt)?; @@ -695,6 +751,23 @@ impl CodegenState { self.emit(Op::FinallyContinue); self.pop_stack(2); } + StmtNode::Scope { num_bindings, body } => { + let end_label = self.make_jump_label(None); + self.emit(Op::BeginScope { + num_bindings: *num_bindings as u16, + end_label, + }); + + // And then the body within which the bindings are in scope. + for stmt in body { + self.generate_stmt(stmt)?; + } + + self.emit(Op::EndScope { + num_bindings: *num_bindings as u16, + }); + self.commit_jump_label(end_label); + } StmtNode::Break { exit: None } => { let l = self.loops.last().expect("No loop to break/continue from"); self.emit(Op::Exit { diff --git a/crates/compiler/src/codegen_tests.rs b/crates/compiler/src/codegen_tests.rs index 5efcb66f..6d5dd5c9 100644 --- a/crates/compiler/src/codegen_tests.rs +++ b/crates/compiler/src/codegen_tests.rs @@ -109,19 +109,26 @@ mod tests { ImmInt(1), ImmInt(2), Eq, - If(1.into()), + If(1.into(), 0), ImmInt(5), Return, + EndScope { num_bindings: 0 }, Jump { label: 0.into() }, ImmInt(2), ImmInt(3), Eq, - Eif(2.into()), + Eif(2.into(), 0), ImmInt(3), Return, + EndScope { num_bindings: 0 }, Jump { label: 0.into() }, + BeginScope { + num_bindings: 0, + end_label: 3.into() + }, ImmInt(6), Return, + EndScope { num_bindings: 0 }, Done ] ); @@ -148,13 +155,17 @@ mod tests { *binary.main_vector.as_ref(), vec![ ImmInt(1), - While(1.into()), + While { + jump_label: 1.into(), + environment_width: 0, + }, Push(x), ImmInt(1), Add, Put(x), Pop, Jump { label: 0.into() }, + EndScope { num_bindings: 0 }, Done ] ); @@ -192,7 +203,8 @@ mod tests { ImmInt(1), WhileId { id: chuckles, - end_label: 1.into() + end_label: 1.into(), + environment_width: 0, }, Push(x), ImmInt(1), @@ -202,15 +214,17 @@ mod tests { Push(x), ImmInt(5), Gt, - If(3.into()), + If(3.into(), 0), ExitId(1.into()), + EndScope { num_bindings: 0 }, Jump { label: 2.into() }, Jump { label: 0.into() }, + EndScope { num_bindings: 0 }, Done, ] ); assert_eq!(binary.jump_labels[0].position.0, 0); - assert_eq!(binary.jump_labels[1].position.0, 14); + assert_eq!(binary.jump_labels[1].position.0, 15); } #[test] fn test_while_break_continue_stmt() { @@ -240,7 +254,10 @@ mod tests { *binary.main_vector.as_ref(), vec![ ImmInt(1), - While(1.into()), + While { + jump_label: 1.into(), + environment_width: 0, + }, Push(x), ImmInt(1), Add, @@ -249,22 +266,29 @@ mod tests { Push(x), ImmInt(5), Eq, - If(3.into()), + If(3.into(), 0), Exit { stack: 0.into(), label: 1.into() }, + EndScope { num_bindings: 0 }, Jump { label: 2.into() }, + BeginScope { + num_bindings: 0, + end_label: 4.into() + }, Exit { stack: 0.into(), label: 0.into() }, + EndScope { num_bindings: 0 }, Jump { label: 0.into() }, + EndScope { num_bindings: 0 }, Done ] ); assert_eq!(binary.jump_labels[0].position.0, 0); - assert_eq!(binary.jump_labels[1].position.0, 15); + assert_eq!(binary.jump_labels[1].position.0, 18); } #[test] fn test_for_in_list_stmt() { @@ -303,7 +327,8 @@ mod tests { ImmInt(0), ForList { id: x, - end_label: 1.into() + end_label: 1.into(), + environment_width: 0, }, Push(x), ImmInt(5), @@ -311,6 +336,7 @@ mod tests { Put(b), Pop, Jump { label: 0.into() }, + EndScope { num_bindings: 0 }, Done ] ); @@ -347,7 +373,8 @@ mod tests { ImmInt(5), ForRange { id: n, - end_label: 1.into() + end_label: 1.into(), + environment_width: 0, }, Push(player), Imm(tell), @@ -356,6 +383,7 @@ mod tests { CallVerb, Pop, Jump { label: 0.into() }, + EndScope { num_bindings: 0 }, Done ] ); @@ -774,7 +802,8 @@ mod tests { *binary.main_vector.as_ref(), vec![ TryFinally { - end_label: 0.into() + end_label: 0.into(), + environment_width: 0, }, ImmInt(1), Put(a), @@ -831,7 +860,10 @@ mod tests { ImmErr(E_PROPNF), MakeSingletonList, PushCatchLabel(1.into()), - TryExcept { num_excepts: 2 }, + TryExcept { + num_excepts: 2, + environment_width: 0 + }, ImmInt(1), Put(a), Pop, @@ -847,6 +879,7 @@ mod tests { ImmInt(3), Put(a), Pop, + EndScope { num_bindings: 0 }, Done ] ); @@ -930,7 +963,9 @@ mod tests { }, ImmErr(E_INVARG), MakeSingletonList, - FuncCall { id: raise_num }, + FuncCall { + id: raise_num as u16 + }, EndCatch(1.into()), ImmInt(1), Ref, @@ -1449,7 +1484,10 @@ mod tests { ImmErr(E_RANGE), MakeSingletonList, PushCatchLabel(Label(0)), - TryExcept { num_excepts: 1 }, + TryExcept { + num_excepts: 1, + environment_width: 0 + }, Imm(Label(0)), ImmInt(2), // Our offset is different because we don't count PushLabel in the stack. @@ -1458,6 +1496,7 @@ mod tests { Return, EndExcept(Label(1)), Pop, + EndScope { num_bindings: 0 }, Done ] ); diff --git a/crates/compiler/src/decompile.rs b/crates/compiler/src/decompile.rs index ba316bac..b4c50f12 100644 --- a/crates/compiler/src/decompile.rs +++ b/crates/compiler/src/decompile.rs @@ -18,8 +18,8 @@ use moor_values::var::{v_float, Variant}; use std::collections::{HashMap, VecDeque}; use crate::ast::{ - Arg, BinaryOp, CatchCodes, CondArm, ExceptArm, Expr, ScatterItem, ScatterKind, Stmt, StmtNode, - UnaryOp, + Arg, BinaryOp, CatchCodes, CondArm, ElseArm, ExceptArm, Expr, ScatterItem, ScatterKind, Stmt, + StmtNode, UnaryOp, }; use crate::builtins::make_offsets_builtins; use crate::decompile::DecompileError::{BuiltinNotFound, MalformedProgram}; @@ -192,7 +192,7 @@ impl Decompile { let line_num = self.line_num_for_position(); match opcode { - Op::If(otherwise_label) => { + Op::If(otherwise_label, environment_width) => { let cond = self.pop_expr()?; // decompile statements until the position marked in `label`, which is the @@ -204,18 +204,40 @@ impl Decompile { let cond_arm = CondArm { condition: cond, statements: arm, + environment_width: environment_width as usize, }; self.statements.push(Stmt::new( StmtNode::Cond { arms: vec![cond_arm], - otherwise: vec![], + otherwise: None, }, line_num, )); // Decompile to the 'end_of_otherwise' label to get the statements for the // otherwise branch. - let otherwise_stmts = self.decompile_statements_until(&end_of_otherwise)?; + let mut otherwise_stmts = self.decompile_statements_until(&end_of_otherwise)?; + + // Resulting thing should be a Scope, or empty. + let else_arm = if otherwise_stmts.is_empty() { + None + } else { + let Some(Stmt { + node: StmtNode::Scope { num_bindings, body }, + .. + }) = otherwise_stmts.pop() + else { + return Err(MalformedProgram( + "expected Scope as otherwise branch".to_string(), + )); + }; + + Some(ElseArm { + statements: body, + environment_width: num_bindings, + }) + }; + let Some(Stmt { node: StmtNode::Cond { arms: _, otherwise }, .. @@ -225,9 +247,9 @@ impl Decompile { "expected Cond as working tree".to_string(), )); }; - *otherwise = otherwise_stmts; + *otherwise = else_arm; } - Op::Eif(end_label) => { + Op::Eif(end_label, environment_width) => { let cond = self.pop_expr()?; // decompile statements until the position marked in `label`, which is the // end of the branch statement @@ -235,6 +257,7 @@ impl Decompile { let cond_arm = CondArm { condition: cond, statements: cond_statements, + environment_width: environment_width as usize, }; // Add the arm let Some(Stmt { @@ -251,6 +274,7 @@ impl Decompile { Op::ForList { id, end_label: label, + environment_width, } => { let one = self.pop_expr()?; let Expr::Value(v) = one else { @@ -271,21 +295,35 @@ impl Decompile { id, expr: list, body, + environment_width: environment_width as usize, }, line_num, )); } - Op::ForRange { id, end_label } => { + Op::ForRange { + id, + end_label, + environment_width, + } => { let to = self.pop_expr()?; let from = self.pop_expr()?; let (body, _) = self.decompile_until_branch_end(&end_label)?; let id = self.decompile_name(&id)?; self.statements.push(Stmt::new( - StmtNode::ForRange { id, from, to, body }, + StmtNode::ForRange { + id, + from, + to, + body, + environment_width: environment_width as usize, + }, line_num, )); } - Op::While(loop_end_label) => { + Op::While { + jump_label: loop_end_label, + environment_width, + } => { // A "while" is actually a: // a conditional expression // this While opcode (with end label) @@ -298,6 +336,7 @@ impl Decompile { id: None, condition: cond, body, + environment_width: environment_width as usize, }, line_num, )); @@ -307,6 +346,7 @@ impl Decompile { Op::WhileId { id, end_label: loop_end_label, + environment_width, } => { // A "while" is actually a: // a conditional expression @@ -321,6 +361,7 @@ impl Decompile { id: Some(id), condition: cond, body, + environment_width: environment_width as usize, }, line_num, )); @@ -520,8 +561,8 @@ impl Decompile { } Op::FuncCall { id } => { let args = self.pop_expr()?; - let Some(builtin) = self.builtins.get(&id) else { - return Err(BuiltinNotFound(id)); + let Some(builtin) = self.builtins.get(&(id as usize)) else { + return Err(BuiltinNotFound(id as usize)); }; // Have to reconstruct arg list ... @@ -656,8 +697,11 @@ impl Decompile { Op::PushCatchLabel(_) => { // ignore and consume, we don't need it. } - Op::TryExcept { num_excepts } => { - let mut except_arms = Vec::with_capacity(num_excepts); + Op::TryExcept { + num_excepts, + environment_width, + } => { + let mut except_arms = Vec::with_capacity(num_excepts as usize); for _ in 0..num_excepts { let codes_expr = self.pop_expr()?; let catch_codes = match codes_expr { @@ -726,18 +770,28 @@ impl Decompile { StmtNode::TryExcept { body, excepts: except_arms, + environment_width: environment_width as usize, }, line_num, )); } - Op::TryFinally { end_label: _label } => { + Op::TryFinally { + end_label: _label, + environment_width, + } => { // decompile body up until the EndFinally let (body, _) = self.decompile_statements_until_match(|_, op| matches!(op, Op::EndFinally))?; let (handler, _) = self .decompile_statements_until_match(|_, op| matches!(op, Op::FinallyContinue))?; - self.statements - .push(Stmt::new(StmtNode::TryFinally { body, handler }, line_num)); + self.statements.push(Stmt::new( + StmtNode::TryFinally { + body, + handler, + environment_width: environment_width as usize, + }, + line_num, + )); } Op::TryCatch { handler_label: label, @@ -859,6 +913,22 @@ impl Decompile { Op::ImmObjid(oid) => { self.push_expr(Expr::Value(v_objid(oid))); } + Op::BeginScope { + num_bindings, + end_label, + } => { + let block_statements = self.decompile_statements_until(&end_label)?; + self.statements.push(Stmt::new( + StmtNode::Scope { + num_bindings: num_bindings as usize, + body: block_statements, + }, + line_num, + )); + } + Op::EndScope { .. } => { + // Noop. + } } Ok(()) } @@ -867,7 +937,7 @@ impl Decompile { self.names_mapping .get(name) .cloned() - .ok_or_else(|| DecompileError::NameNotFound(name.clone())) + .ok_or(DecompileError::NameNotFound(*name)) } } @@ -996,6 +1066,15 @@ mod tests { assert_trees_match_recursive(&parse.stmts, &decompiled.stmts); } + #[test] + fn test_decompile_lexical_scope_block() { + let program = r#"begin + let a = 5; + end"#; + let (parse, decompiled) = parse_decompile(program); + assert_trees_match_recursive(&parse.stmts, &decompiled.stmts); + } + #[test] // A big verb to verify that decompilation works for more than just simple cases. fn test_a_complicated_function() { diff --git a/crates/compiler/src/moo.pest b/crates/compiler/src/moo.pest index 4d6e5109..0a721a91 100644 --- a/crates/compiler/src/moo.pest +++ b/crates/compiler/src/moo.pest @@ -26,8 +26,11 @@ statement = { | return_statement | try_except_statement | try_finally_statement + | begin_statement | expr_statement -} + | local_assignment + | global_assignment + } if_statement = { ^"if" ~ "(" ~ expr ~ ")" ~ statements ~ (elseif_clause)* ~ (else_clause)? ~ endif_clause } elseif_clause = { ^"elseif" ~ "(" ~ expr ~ ")" ~ statements } @@ -53,6 +56,13 @@ except = { ^"except" ~ (labelled_except | unlabelled_except) ~ st labelled_except = { ident ~ "(" ~ codes ~ ")" } unlabelled_except = { "(" ~ codes ~ ")" } +begin_statement = { ^"begin" ~ statements ~ ^"end" } + +local_assignment = { ^"let" ~ ident ~ (ASSIGN ~ expr)? ~ ";" } + +// globally scoped (same as default in MOO) adds explicitly to global scope. +global_assignment = { ^"global" ~ ident ~ (ASSIGN ~ expr)? ~ ";" } + codes = { anycode | exprlist } anycode = { ^"any" } @@ -188,6 +198,7 @@ keyword = @{ | ^"endtry" | ^"finally" | ^"in" + | ^"let" | err } diff --git a/crates/compiler/src/names.rs b/crates/compiler/src/names.rs index 77dbdf17..f909eb98 100644 --- a/crates/compiler/src/names.rs +++ b/crates/compiler/src/names.rs @@ -20,13 +20,19 @@ use strum::IntoEnumIterator; #[derive(Debug, Clone, Eq, PartialEq)] pub struct UnboundNames { - unbound_names: Vec, + unbound_names: Vec<(Symbol, usize)>, scope: Vec>, } #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub struct UnboundName(usize); +impl Default for UnboundNames { + fn default() -> Self { + Self::new() + } +} + impl UnboundNames { pub fn new() -> Self { let mut names = Self { @@ -53,7 +59,7 @@ impl UnboundNames { // If the name doesn't exist, add it to the global scope, since that's how // MOO currently works. - let unbound_name = self.new_unbound(name); + let unbound_name = self.new_unbound(name, 0); self.scope[0].insert(name, unbound_name); unbound_name } @@ -64,38 +70,46 @@ impl UnboundNames { } /// Pop the current scope. - pub fn pop_scope(&mut self) { - self.scope.pop(); + pub fn pop_scope(&mut self) -> usize { + let scope = self.scope.pop().unwrap(); + scope.len() } /// Declare a name in the current lexical scope. pub fn declare_name(&mut self, name: &str) -> UnboundName { let name = Symbol::mk_case_insensitive(name); - let unbound_name = self.new_unbound(name); + let unbound_name = self.new_unbound(name, self.scope.len() - 1); self.scope.last_mut().unwrap().insert(name, unbound_name); unbound_name } - /// Find the name in the name table, if it exists. - pub fn find_name(&self, name: &str) -> Option { - self.find_name_offset(Symbol::mk_case_insensitive(name)) - .map(|x| UnboundName(x)) + /// If the same named variable exists in multiple scopes, return them all as a vector. + pub fn find_named(&self, name: &str) -> Vec { + let name = Symbol::mk_case_insensitive(name); + let mut names = vec![]; + for (i, n) in self.unbound_names.iter().enumerate() { + if n.0 == name { + names.push(UnboundName(i)); + } + } + names } - /// Return the environment offset of the name, if it exists. - pub fn find_name_offset(&self, name: Symbol) -> Option { + /// Find the first scoped name in the name table, if it exists. + pub fn find_name(&self, name: &str) -> Option { + let name = Symbol::mk_case_insensitive(name); for scope in self.scope.iter().rev() { if let Some(n) = scope.get(&name) { - return Some(n.0 as usize); + return Some(*n); } } None } /// Create a new unbound variable. - fn new_unbound(&mut self, name: Symbol) -> UnboundName { + fn new_unbound(&mut self, name: Symbol, scope: usize) -> UnboundName { let idx = self.unbound_names.len(); - self.unbound_names.push(name); + self.unbound_names.push((name, scope)); UnboundName(idx) } @@ -103,18 +117,27 @@ impl UnboundNames { /// Run at the end of compilation to produce valid offsets. pub fn bind(&self) -> (Names, HashMap) { let mut mapping = HashMap::new(); - let mut bound = vec![]; + let mut bound = Vec::with_capacity(self.unbound_names.len()); // Walk the scopes, binding all unbound variables. // This will produce offsets for all variables in the order they should appear in the // environment. - for _ in self.scope.iter() { - for idx in 0..self.unbound_names.len() { - let offset = bound.len(); - bound.push(self.unbound_names[idx]); - mapping.insert(UnboundName(idx), Name(offset as u16)); - } + let mut scope_depth = Vec::with_capacity(self.unbound_names.len()); + for idx in 0..self.unbound_names.len() { + let offset = bound.len(); + bound.push(self.unbound_names[idx].0); + scope_depth.push(self.unbound_names[idx].1 as u16); + mapping.insert(UnboundName(idx), Name(offset as u16)); } - (Names { bound }, mapping) + + let global_width = self.scope[0].len(); + ( + Names { + bound, + global_width, + scope_depth, + }, + mapping, + ) } } @@ -124,18 +147,22 @@ pub struct Name(u16); #[derive(Clone, Debug, PartialEq, Eq, Encode, Decode)] pub struct Names { + /// The set of bound variables and their names. bound: Vec, -} - -impl Default for Names { - fn default() -> Self { - Self::new() - } + /// The size of the global scope, e.g. the size the environment should be when the frame + /// is first created. + global_width: usize, + /// The scope-depth for each variable. E.g. 0 for global and then 1..N for nested scopes. + scope_depth: Vec, } impl Names { - pub fn new() -> Self { - Self { bound: vec![] } + pub fn new(global_width: usize) -> Self { + Self { + bound: vec![], + global_width, + scope_depth: vec![], + } } /// Find the name in the name table, if it exists. @@ -148,11 +175,17 @@ impl Names { None } - /// Return the width of the name table, to be used as the (total) environment size. + /// Return the total width of the name table, to be used as the (total) environment size. pub fn width(&self) -> usize { self.bound.len() } + /// The size of the global scope section of the environment, e.g. the "environment_width" of the + /// frame's environment when it is first created. + pub fn global_width(&self) -> usize { + self.global_width + } + /// Return the symbol value of the given name offset. pub fn name_of(&self, name: &Name) -> Option { if name.0 as usize >= self.bound.len() { @@ -169,6 +202,13 @@ impl Names { (0..self.bound.len() as u16).map(Name).collect() } + pub fn depth_of(&self, name: &Name) -> Option { + if name.0 as usize >= self.scope_depth.len() { + return None; + } + Some(self.scope_depth[name.0 as usize]) + } + /// Get the offset for a bound variable. pub fn offset_for(&self, name: &Name) -> Option { if name.0 as usize >= self.bound.len() { diff --git a/crates/compiler/src/opcode.rs b/crates/compiler/src/opcode.rs index cb439daa..f1931e35 100644 --- a/crates/compiler/src/opcode.rs +++ b/crates/compiler/src/opcode.rs @@ -28,18 +28,33 @@ pub enum Op { FinallyContinue, Div, Done, - Eif(Label), EndCatch(Label), EndExcept(Label), EndFinally, Eq, - Exit { stack: Offset, label: Label }, + Exit { + stack: Offset, + label: Label, + }, ExitId(Label), Exp, - ForList { id: Name, end_label: Label }, - ForRange { id: Name, end_label: Label }, - Fork { fv_offset: Offset, id: Option }, - FuncCall { id: usize }, + ForList { + id: Name, + end_label: Label, + environment_width: u16, + }, + ForRange { + id: Name, + end_label: Label, + environment_width: u16, + }, + Fork { + fv_offset: Offset, + id: Option, + }, + FuncCall { + id: u16, + }, Ge, GetProp, Gt, @@ -54,7 +69,9 @@ pub enum Op { ImmObjid(Objid), In, IndexSet, - Jump { label: Label }, + Jump { + label: Label, + }, Le, Length(Offset), ListAddTail, @@ -83,13 +100,38 @@ pub enum Op { Scatter(Box), Sub, PushCatchLabel(Label), - TryCatch { handler_label: Label }, - TryExcept { num_excepts: usize }, - TryFinally { end_label: Label }, + TryCatch { + handler_label: Label, + }, + TryExcept { + num_excepts: u16, + environment_width: u16, + }, + TryFinally { + end_label: Label, + environment_width: u16, + }, + /// Begin a lexical scope, expanding the Environment by `num_bindings` + BeginScope { + num_bindings: u16, + end_label: Label, + }, + /// End a lexical scope, contracting the Environment by `num_bindings` + EndScope { + num_bindings: u16, + }, UnaryMinus, - While(Label), - WhileId { id: Name, end_label: Label }, - If(Label), + While { + jump_label: Label, + environment_width: u16, + }, + WhileId { + id: Name, + end_label: Label, + environment_width: u16, + }, + If(Label, u16), + Eif(Label, u16), } #[derive(Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Encode, Decode)] diff --git a/crates/compiler/src/parse.rs b/crates/compiler/src/parse.rs index 6724b9b1..a078ca32 100644 --- a/crates/compiler/src/parse.rs +++ b/crates/compiler/src/parse.rs @@ -19,7 +19,7 @@ use std::collections::HashMap; use std::rc::Rc; use std::str::FromStr; -use moor_values::var::Symbol; +use moor_values::var::{v_none, Symbol}; use moor_values::SYSTEM_OBJECT; use pest::pratt_parser::{Assoc, Op, PrattParser}; pub use pest::Parser as PestParser; @@ -33,9 +33,10 @@ use moor_values::var::Objid; use moor_values::var::{v_err, v_float, v_int, v_objid, v_str, v_string}; use crate::ast::Arg::{Normal, Splice}; +use crate::ast::StmtNode::Scope; use crate::ast::{ - Arg, BinaryOp, CatchCodes, CondArm, ExceptArm, Expr, ScatterItem, ScatterKind, Stmt, StmtNode, - UnaryOp, + Arg, BinaryOp, CatchCodes, CondArm, ElseArm, ExceptArm, Expr, ScatterItem, ScatterKind, Stmt, + StmtNode, UnaryOp, }; use crate::names::{Names, UnboundName, UnboundNames}; use crate::parse::moo::{MooParser, Rule}; @@ -523,43 +524,52 @@ fn parse_statement( Ok(None) } Rule::while_statement => { + names.borrow_mut().push_scope(); let mut parts = pair.into_inner(); let condition = parse_expr(names.clone(), parts.next().unwrap().into_inner())?; - let body = parse_statements(names, parts.next().unwrap().into_inner())?; + let body = parse_statements(names.clone(), parts.next().unwrap().into_inner())?; + let environment_width = names.borrow_mut().pop_scope(); Ok(Some(Stmt::new( StmtNode::While { id: None, condition, body, + environment_width, }, line, ))) } Rule::labelled_while_statement => { + names.borrow_mut().push_scope(); let mut parts = pair.into_inner(); let id = names .borrow_mut() .find_or_add_name_global(parts.next().unwrap().as_str()); let condition = parse_expr(names.clone(), parts.next().unwrap().into_inner())?; - let body = parse_statements(names, parts.next().unwrap().into_inner())?; + let body = parse_statements(names.clone(), parts.next().unwrap().into_inner())?; + let environment_width = names.borrow_mut().pop_scope(); Ok(Some(Stmt::new( StmtNode::While { id: Some(id), condition, body, + environment_width, }, line, ))) } Rule::if_statement => { + names.borrow_mut().push_scope(); let mut parts = pair.into_inner(); let mut arms = vec![]; - let mut otherwise = vec![]; + let mut otherwise = None; let condition = parse_expr(names.clone(), parts.next().unwrap().into_inner())?; let body = parse_statements(names.clone(), parts.next().unwrap().into_inner())?; + let environment_width = names.borrow_mut().pop_scope(); arms.push(CondArm { condition, statements: body, + environment_width, }); for remainder in parts { match remainder.as_rule() { @@ -567,20 +577,30 @@ fn parse_statement( continue; } Rule::elseif_clause => { + names.borrow_mut().push_scope(); let mut parts = remainder.into_inner(); let condition = parse_expr(names.clone(), parts.next().unwrap().into_inner())?; let body = parse_statements(names.clone(), parts.next().unwrap().into_inner())?; + let environment_width = names.borrow_mut().pop_scope(); arms.push(CondArm { condition, statements: body, + environment_width, }); } Rule::else_clause => { + names.borrow_mut().push_scope(); let mut parts = remainder.into_inner(); - otherwise = + let otherwise_statements = parse_statements(names.clone(), parts.next().unwrap().into_inner())?; + let otherwise_environment_width = names.borrow_mut().pop_scope(); + let otherwise_arm = ElseArm { + statements: otherwise_statements, + environment_width: otherwise_environment_width, + }; + otherwise = Some(otherwise_arm); } _ => panic!("Unimplemented if clause: {:?}", remainder), } @@ -623,6 +643,7 @@ fn parse_statement( Ok(Some(Stmt::new(StmtNode::Return(expr), line))) } Rule::for_statement => { + names.borrow_mut().push_scope(); let mut parts = pair.into_inner(); let id = names .borrow_mut() @@ -635,31 +656,54 @@ fn parse_statement( let from_rule = clause_inner.next().unwrap(); let to_rule = clause_inner.next().unwrap(); let from = parse_expr(names.clone(), from_rule.into_inner())?; - let to = parse_expr(names, to_rule.into_inner())?; + let to = parse_expr(names.clone(), to_rule.into_inner())?; + let environment_width = names.borrow_mut().pop_scope(); Ok(Some(Stmt::new( - StmtNode::ForRange { id, from, to, body }, + StmtNode::ForRange { + id, + from, + to, + body, + environment_width, + }, line, ))) } Rule::for_in_clause => { let mut clause_inner = clause.into_inner(); let in_rule = clause_inner.next().unwrap(); - let expr = parse_expr(names, in_rule.into_inner())?; - Ok(Some(Stmt::new(StmtNode::ForList { id, expr, body }, line))) + let expr = parse_expr(names.clone(), in_rule.into_inner())?; + let environment_width = names.borrow_mut().pop_scope(); + Ok(Some(Stmt::new( + StmtNode::ForList { + id, + expr, + body, + environment_width, + }, + line, + ))) } _ => panic!("Unimplemented for clause: {:?}", clause), } } Rule::try_finally_statement => { + names.borrow_mut().push_scope(); let mut parts = pair.into_inner(); let body = parse_statements(names.clone(), parts.next().unwrap().into_inner())?; - let handler = parse_statements(names, parts.next().unwrap().into_inner())?; + let handler = parse_statements(names.clone(), parts.next().unwrap().into_inner())?; + let environment_width = names.borrow_mut().pop_scope(); Ok(Some(Stmt::new( - StmtNode::TryFinally { body, handler }, + StmtNode::TryFinally { + body, + handler, + environment_width, + }, line, ))) } Rule::try_except_statement => { + names.borrow_mut().push_scope(); let mut parts = pair.into_inner(); let body = parse_statements(names.clone(), parts.next().unwrap().into_inner())?; let mut excepts = vec![]; @@ -705,7 +749,15 @@ fn parse_statement( _ => panic!("Unimplemented except clause: {:?}", except), } } - Ok(Some(Stmt::new(StmtNode::TryExcept { body, excepts }, line))) + let environment_width = names.borrow_mut().pop_scope(); + Ok(Some(Stmt::new( + StmtNode::TryExcept { + body, + excepts, + environment_width, + }, + line, + ))) } Rule::fork_statement => { let mut parts = pair.into_inner(); @@ -736,6 +788,70 @@ fn parse_statement( line, ))) } + Rule::begin_statement => { + let mut parts = pair.into_inner(); + + names.borrow_mut().push_scope(); + + let body = parse_statements(names.clone(), parts.next().unwrap().into_inner())?; + let num_total_bindings = names.borrow_mut().pop_scope(); + Ok(Some(Stmt::new( + Scope { + num_bindings: num_total_bindings, + body, + }, + line, + ))) + } + Rule::local_assignment => { + // An assignment declaration that introduces a locally lexically scoped variable. + // May be of form `let x = expr` or just `let x` + let mut parts = pair.into_inner(); + let id = names + .borrow_mut() + .declare_name(parts.next().unwrap().as_str()); + let expr = parts + .next() + .map(|e| parse_expr(names.clone(), e.into_inner()).unwrap()); + + // Just becomes an assignment expression. + // But that means the decompiler will need to know what to do with it. + // Which is: if assignment is on its own in statement, and variable assigned to is + // restricted to the scope of the block, then it's a let. + Ok(Some(Stmt::new( + StmtNode::Expr(Expr::Assign { + left: Box::new(Expr::Id(id)), + right: Box::new(expr.unwrap_or(Expr::Value(v_none()))), + }), + line, + ))) + } + Rule::global_assignment => { + // An explicit global-declaration. + // global x, or global x = y + let mut parts = pair.into_inner(); + let id = names + .borrow_mut() + .find_or_add_name_global(parts.next().unwrap().as_str()); + let expr = parts + .next() + .map(|e| parse_expr(names.clone(), e.into_inner()).unwrap()); + + // Produces an assignment expression as usual, but + // the decompiler will need to look and see that + // a) the statement is just an assignment on its own + // b) the variable being assigned to is in scope 0 (global) + // and then it's a global declaration. + // Note that this well have the effect of turning most existing MOO decompilations + // into global declarations, which is fine, if that feature is turned on. + Ok(Some(Stmt::new( + StmtNode::Expr(Expr::Assign { + left: Box::new(Expr::Id(id)), + right: Box::new(expr.unwrap_or(Expr::Value(v_none()))), + }), + line, + ))) + } _ => panic!("Unimplemented statement: {:?}", pair.as_rule()), } } @@ -857,14 +973,14 @@ pub fn unquote_str(s: &str) -> Result { #[cfg(test)] mod tests { use moor_values::var::Error::{E_INVARG, E_PROPNF, E_VARNF}; - use moor_values::var::Symbol; use moor_values::var::{v_err, v_float, v_int, v_obj, v_str}; + use moor_values::var::{v_none, Symbol}; use crate::ast::Arg::{Normal, Splice}; use crate::ast::Expr::{Call, Id, Prop, Value, Verb}; use crate::ast::{ - BinaryOp, CatchCodes, CondArm, ExceptArm, Expr, ScatterItem, ScatterKind, Stmt, StmtNode, - UnaryOp, + BinaryOp, CatchCodes, CondArm, ElseArm, ExceptArm, Expr, ScatterItem, ScatterKind, Stmt, + StmtNode, UnaryOp, }; use crate::parse::{parse_program, unquote_str}; use moor_values::model::CompileError; @@ -968,8 +1084,10 @@ mod tests { parser_line_no: 1, tree_line_no: 2, }], + environment_width: 0, }, CondArm { + environment_width: 0, condition: Expr::Binary( BinaryOp::Eq, Box::new(Value(v_int(2))), @@ -983,11 +1101,14 @@ mod tests { }, ], - otherwise: vec![Stmt { - node: StmtNode::Return(Some(Value(v_int(6)))), - parser_line_no: 1, - tree_line_no: 6, - }], + otherwise: Some(ElseArm { + statements: vec![Stmt { + node: StmtNode::Return(Some(Value(v_int(6)))), + parser_line_no: 1, + tree_line_no: 6, + }], + environment_width: 0, + }), } ); } @@ -1012,6 +1133,7 @@ mod tests { StmtNode::Cond { arms: vec![ CondArm { + environment_width: 0, condition: Expr::Binary( BinaryOp::Eq, Box::new(Value(v_int(1))), @@ -1024,6 +1146,7 @@ mod tests { }], }, CondArm { + environment_width: 0, condition: Expr::Binary( BinaryOp::Eq, Box::new(Value(v_int(2))), @@ -1036,6 +1159,7 @@ mod tests { }], }, CondArm { + environment_width: 0, condition: Expr::Binary( BinaryOp::Eq, Box::new(Value(v_int(3))), @@ -1049,11 +1173,14 @@ mod tests { }, ], - otherwise: vec![Stmt { - node: StmtNode::Return(Some(Value(v_int(6)))), - parser_line_no: 9, - tree_line_no: 8, - }], + otherwise: Some(ElseArm { + statements: vec![Stmt { + node: StmtNode::Return(Some(Value(v_int(6)))), + parser_line_no: 9, + tree_line_no: 8, + }], + environment_width: 0, + }), } ); } @@ -1090,6 +1217,7 @@ mod tests { stripped_stmts(&parse.stmts)[0], StmtNode::Cond { arms: vec![CondArm { + environment_width: 0, condition: Expr::Unary( UnaryOp::Not, Box::new(Verb { @@ -1107,7 +1235,7 @@ mod tests { tree_line_no: 2, }], }], - otherwise: vec![], + otherwise: None, } ); } @@ -1122,6 +1250,7 @@ mod tests { assert_eq!( stripped_stmts(&parse.stmts)[0], StmtNode::ForList { + environment_width: 0, id: x, expr: Expr::List(vec![ Normal(Value(v_int(1))), @@ -1154,6 +1283,7 @@ mod tests { assert_eq!( stripped_stmts(&parse.stmts)[0], StmtNode::ForRange { + environment_width: 0, id: x, from: Value(v_int(1)), to: Value(v_int(5)), @@ -1213,6 +1343,7 @@ mod tests { assert_eq!( stripped_stmts(&parse.stmts), vec![StmtNode::While { + environment_width: 0, id: None, condition: Value(v_int(1)), body: vec![ @@ -1231,6 +1362,7 @@ mod tests { Stmt { node: StmtNode::Cond { arms: vec![CondArm { + environment_width: 0, condition: Expr::Binary( BinaryOp::Gt, Box::new(Id(x)), @@ -1242,7 +1374,7 @@ mod tests { tree_line_no: 4, }], }], - otherwise: vec![], + otherwise: None, }, parser_line_no: 1, tree_line_no: 3, @@ -1262,6 +1394,7 @@ mod tests { assert_eq!( stripped_stmts(&parse.stmts), vec![StmtNode::While { + environment_width: 0, id: Some(chuckles), condition: Value(v_int(1)), body: vec![ @@ -1280,6 +1413,7 @@ mod tests { Stmt { node: StmtNode::Cond { arms: vec![CondArm { + environment_width: 0, condition: Expr::Binary( BinaryOp::Gt, Box::new(Id(x)), @@ -1293,7 +1427,7 @@ mod tests { tree_line_no: 4, }], }], - otherwise: vec![], + otherwise: None, }, parser_line_no: 1, tree_line_no: 3, @@ -1436,6 +1570,7 @@ mod tests { stripped_stmts(&parse.stmts), vec![ StmtNode::ForList { + environment_width: 0, id: i, expr: Expr::List(vec![ Normal(Value(v_int(1))), @@ -1554,6 +1689,7 @@ mod tests { stripped_stmts(&parse.stmts), vec![ StmtNode::ForList { + environment_width: 0, id: a, expr: Expr::List(vec![ Normal(Value(v_int(1))), @@ -1587,6 +1723,7 @@ mod tests { vec![Stmt { node: StmtNode::Cond { arms: vec![CondArm { + environment_width: 0, condition: Expr::Binary( BinaryOp::Eq, Box::new(Value(v_int(5))), @@ -1598,11 +1735,14 @@ mod tests { tree_line_no: 2, }], }], - otherwise: vec![Stmt { - node: StmtNode::Return(Some(Value(v_int(3)))), - parser_line_no: 4, - tree_line_no: 4, - }], + otherwise: Some(ElseArm { + statements: vec![Stmt { + node: StmtNode::Return(Some(Value(v_int(3)))), + parser_line_no: 4, + tree_line_no: 4, + }], + environment_width: 0, + }), }, parser_line_no: 1, tree_line_no: 1, @@ -1625,6 +1765,7 @@ mod tests { vec![StmtNode::Cond { arms: vec![ CondArm { + environment_width: 0, condition: Expr::Binary( BinaryOp::Eq, Box::new(Value(v_int(5))), @@ -1637,6 +1778,7 @@ mod tests { }], }, CondArm { + environment_width: 0, condition: Expr::Binary( BinaryOp::Eq, Box::new(Value(v_int(2))), @@ -1649,11 +1791,14 @@ mod tests { }], }, ], - otherwise: vec![Stmt { - node: StmtNode::Return(Some(Value(v_int(3)))), - parser_line_no: 6, - tree_line_no: 6, - }], + otherwise: Some(ElseArm { + statements: vec![Stmt { + node: StmtNode::Return(Some(Value(v_int(3)))), + parser_line_no: 6, + tree_line_no: 6, + }], + environment_width: 0, + }), }] ); } @@ -1667,6 +1812,8 @@ mod tests { stripped_stmts(&parse.stmts), vec![StmtNode::Cond { arms: vec![CondArm { + environment_width: 0, + condition: Expr::Binary( BinaryOp::In, Box::new(Value(v_int(5))), @@ -1678,7 +1825,7 @@ mod tests { ), statements: vec![], }], - otherwise: vec![], + otherwise: None, }] ); } @@ -1695,6 +1842,7 @@ mod tests { assert_eq!( stripped_stmts(&parse.stmts), vec![StmtNode::TryExcept { + environment_width: 0, body: vec![Stmt { node: StmtNode::Expr(Value(v_int(5))), parser_line_no: 2, @@ -1885,6 +2033,7 @@ mod tests { stripped_stmts(&parse.stmts), vec![ StmtNode::ForList { + environment_width: 0, id: parse.unbound_names.find_name("line").unwrap(), expr: Expr::List(vec![ Normal(Value(v_int(1))), @@ -2049,4 +2198,120 @@ mod tests { let parse = parse_program(program); assert!(matches!(parse, Err(CompileError::UnknownLoopLabel(_)))); } + + #[test] + fn test_begin_end() { + let program = r#"begin + return 5; + end + "#; + let parse = parse_program(program).unwrap(); + assert_eq!( + stripped_stmts(&parse.stmts), + vec![StmtNode::Scope { + num_bindings: 0, + body: vec![Stmt { + node: StmtNode::Return(Some(Value(v_int(5)))), + parser_line_no: 2, + tree_line_no: 2, + }], + }] + ); + } + + /// Test that lexical block scopes parse and that the inner scope variables can shadow outer scope + #[test] + fn test_parse_scoped_variables() { + let program = r#"begin + let x = 5; + let y = 6; + x = x + 6; + let z = 7; + let o; + global a = 1; + end + return x;"#; + let parse = parse_program(program).unwrap(); + let x_names = parse.unbound_names.find_named("x"); + let y_names = parse.unbound_names.find_named("y"); + let z_names = parse.unbound_names.find_named("z"); + let o_names = parse.unbound_names.find_named("o"); + let inner_y = y_names[0]; + let inner_z = z_names[0]; + let inner_o = o_names[0]; + let global_a = parse.unbound_names.find_named("a")[0]; + assert_eq!(x_names.len(), 2); + let global_x = x_names[1]; + // Declared first, so appears in unbound names first, though in the bound names it will + // appear second. + let inner_x = x_names[0]; + assert_eq!( + stripped_stmts(&parse.stmts), + vec![ + StmtNode::Scope { + num_bindings: 4, + body: vec![ + // Declaration of X + Stmt { + node: StmtNode::Expr(Expr::Assign { + left: Box::new(Id(inner_x)), + right: Box::new(Value(v_int(5))), + }), + parser_line_no: 2, + tree_line_no: 2, + }, + // Declaration of y + Stmt { + node: StmtNode::Expr(Expr::Assign { + left: Box::new(Id(inner_y)), + right: Box::new(Value(v_int(6))), + }), + parser_line_no: 3, + tree_line_no: 3, + }, + Stmt { + node: StmtNode::Expr(Expr::Assign { + left: Box::new(Id(inner_x)), + right: Box::new(Expr::Binary( + BinaryOp::Add, + Box::new(Id(inner_x)), + Box::new(Value(v_int(6))), + )), + }), + parser_line_no: 4, + tree_line_no: 4, + }, + // Asssignment to z. + Stmt { + node: StmtNode::Expr(Expr::Assign { + left: Box::new(Id(inner_z)), + right: Box::new(Value(v_int(7))), + }), + parser_line_no: 5, + tree_line_no: 5, + }, + // Declaration of o (o = v_none) + Stmt { + node: StmtNode::Expr(Expr::Assign { + left: Box::new(Id(inner_o)), + right: Box::new(Value(v_none())), + }), + parser_line_no: 6, + tree_line_no: 6, + }, + // Assignment to global a + Stmt { + node: StmtNode::Expr(Expr::Assign { + left: Box::new(Id(global_a)), + right: Box::new(Value(v_int(1))), + }), + parser_line_no: 7, + tree_line_no: 7, + }, + ], + }, + StmtNode::Return(Some(Id(global_x))) + ] + ); + } } diff --git a/crates/compiler/src/program.rs b/crates/compiler/src/program.rs index f4ab550c..34247093 100644 --- a/crates/compiler/src/program.rs +++ b/crates/compiler/src/program.rs @@ -51,7 +51,7 @@ impl Program { Program { literals: Vec::new(), jump_labels: Vec::new(), - var_names: Default::default(), + var_names: Names::new(0), main_vector: Arc::new(Vec::new()), fork_vectors: Vec::new(), line_number_spans: Vec::new(), diff --git a/crates/compiler/src/unparse.rs b/crates/compiler/src/unparse.rs index 86f5177a..228523e6 100644 --- a/crates/compiler/src/unparse.rs +++ b/crates/compiler/src/unparse.rs @@ -326,21 +326,27 @@ impl<'a> Unparse<'a> { stmt_lines.push(format!("{}elseif ({})", indent_frag, cond_frag)); stmt_lines.append(&mut stmt_frag); } - if !otherwise.is_empty() { - let mut stmt_frag = self.unparse_stmts(otherwise, indent + INDENT_LEVEL)?; + if let Some(otherwise) = otherwise { + let mut stmt_frag = + self.unparse_stmts(&otherwise.statements, indent + INDENT_LEVEL)?; stmt_lines.push(format!("{}else", indent_frag)); stmt_lines.append(&mut stmt_frag); } stmt_lines.push(format!("{}endif", indent_frag)); Ok(stmt_lines) } - StmtNode::ForList { id, expr, body } => { + StmtNode::ForList { + id, + expr, + body, + environment_width: _, + } => { let mut stmt_lines = Vec::with_capacity(body.len() + 3); let expr_frag = self.unparse_expr(expr)?; let mut stmt_frag = self.unparse_stmts(body, indent + INDENT_LEVEL)?; - let name = self.unparse_name(&id); + let name = self.unparse_name(id); stmt_lines.push(format!( "{}for {} in ({})", @@ -355,13 +361,19 @@ impl<'a> Unparse<'a> { stmt_lines.push(format!("{}endfor", indent_frag)); Ok(stmt_lines) } - StmtNode::ForRange { id, from, to, body } => { + StmtNode::ForRange { + id, + from, + to, + body, + environment_width: _, + } => { let mut stmt_lines = Vec::with_capacity(body.len() + 3); let from_frag = self.unparse_expr(from)?; let to_frag = self.unparse_expr(to)?; let mut stmt_frag = self.unparse_stmts(body, indent + INDENT_LEVEL)?; - let name = self.unparse_name(&id); + let name = self.unparse_name(id); stmt_lines.push(format!( "{}for {} in [{}..{}]", @@ -381,6 +393,7 @@ impl<'a> Unparse<'a> { id, condition, body, + environment_width: _, } => { let mut stmt_lines = Vec::with_capacity(body.len() + 3); @@ -389,7 +402,7 @@ impl<'a> Unparse<'a> { let mut base_str = "while ".to_string(); if let Some(id) = id { - let id = self.unparse_name(&id); + let id = self.unparse_name(id); base_str.push_str( self.tree @@ -412,7 +425,7 @@ impl<'a> Unparse<'a> { let mut base_str = format!("{}fork", indent_frag); if let Some(id) = id { base_str.push(' '); - let id = self.unparse_name(&id); + let id = self.unparse_name(id); base_str.push_str( self.tree @@ -427,7 +440,11 @@ impl<'a> Unparse<'a> { stmt_lines.push(format!("{}endfork", indent_frag)); Ok(stmt_lines) } - StmtNode::TryExcept { body, excepts } => { + StmtNode::TryExcept { + body, + excepts, + environment_width: _, + } => { let mut stmt_lines = Vec::with_capacity(body.len() + 3); let mut stmt_frag = self.unparse_stmts(body, indent + INDENT_LEVEL)?; @@ -438,7 +455,7 @@ impl<'a> Unparse<'a> { self.unparse_stmts(&except.statements, indent + INDENT_LEVEL)?; let mut base_str = "except ".to_string(); if let Some(id) = &except.id { - let id = self.unparse_name(&id); + let id = self.unparse_name(id); base_str.push_str( self.tree @@ -457,7 +474,11 @@ impl<'a> Unparse<'a> { stmt_lines.push(format!("{}endtry", indent_frag)); Ok(stmt_lines) } - StmtNode::TryFinally { body, handler } => { + StmtNode::TryFinally { + body, + handler, + environment_width: _, + } => { let mut stmt_lines = Vec::with_capacity(body.len() + 3); let mut stmt_frag = self.unparse_stmts(body, indent + INDENT_LEVEL)?; @@ -473,7 +494,7 @@ impl<'a> Unparse<'a> { let mut base_str = format!("{}break", indent_frag); if let Some(exit) = &exit { base_str.push(' '); - let exit = self.unparse_name(&exit); + let exit = self.unparse_name(exit); base_str.push_str( self.tree @@ -490,7 +511,7 @@ impl<'a> Unparse<'a> { let mut base_str = format!("{}continue", indent_frag); if let Some(exit) = &exit { base_str.push(' '); - let exit = self.unparse_name(&exit); + let exit = self.unparse_name(exit); base_str.push_str( self.tree @@ -511,11 +532,47 @@ impl<'a> Unparse<'a> { vec![format!("{}return {};", indent_frag, self.unparse_expr(e)?)] } }), + StmtNode::Expr(Expr::Assign { left, right }) => { + let left_frag = match left.as_ref() { + Expr::Id(id) => { + // If this Id is in non-zero scope, we need to prefix with "let" + let bound_name = self.tree.names_mapping[id]; + let scope_depth = self.tree.names.depth_of(&bound_name).unwrap(); + let prefix = if scope_depth > 0 { + "let " + } else { + // TODO: could have 'global' prefix here when in a certain mode. + // instead of having it implied. + "" + }; + let suffix = self.tree.names.name_of(&self.unparse_name(id)).unwrap(); + format!("{}{}", prefix, suffix) + } + _ => self.unparse_expr(left)?, + }; + let right_frag = self.unparse_expr(right)?; + Ok(vec![format!( + "{}{} = {};", + indent_frag, left_frag, right_frag + )]) + } StmtNode::Expr(expr) => Ok(vec![format!( "{}{};", indent_frag, self.unparse_expr(expr)? )]), + StmtNode::Scope { + num_bindings: _, + body, + } => { + // Begin/End + let mut stmt_lines = Vec::with_capacity(body.len() + 3); + stmt_lines.push(format!("{}begin", indent_frag)); + let mut stmt_frag = self.unparse_stmts(body, indent + INDENT_LEVEL)?; + stmt_lines.append(&mut stmt_frag); + stmt_lines.push(format!("{}end", indent_frag)); + Ok(stmt_lines) + } } } @@ -532,7 +589,7 @@ impl<'a> Unparse<'a> { } fn unparse_name(&self, name: &UnboundName) -> Name { - self.tree.names_mapping.get(name).unwrap().clone() + *self.tree.names_mapping.get(name).unwrap() } } @@ -560,11 +617,11 @@ pub fn annotate_line_numbers(start_line_no: usize, tree: &mut [Stmt]) -> usize { // Walk arm.statements ... line_no = annotate_line_numbers(line_no, &mut arm.statements); } - if !otherwise.is_empty() { + if let Some(otherwise) = otherwise { // ELSE line ... line_no += 1; // Walk otherwise ... - line_no = annotate_line_numbers(line_no, otherwise); + line_no = annotate_line_numbers(line_no, &mut otherwise.statements); } // ENDIF line_no += 1; @@ -590,6 +647,7 @@ pub fn annotate_line_numbers(start_line_no: usize, tree: &mut [Stmt]) -> usize { StmtNode::TryExcept { ref mut body, ref mut excepts, + environment_width: _, } => { // TRY line_no += 1; @@ -607,6 +665,7 @@ pub fn annotate_line_numbers(start_line_no: usize, tree: &mut [Stmt]) -> usize { StmtNode::TryFinally { ref mut body, ref mut handler, + environment_width: _, } => { // TRY line_no += 1; @@ -619,6 +678,17 @@ pub fn annotate_line_numbers(start_line_no: usize, tree: &mut [Stmt]) -> usize { // ENDTRY line_no += 1; } + StmtNode::Scope { + ref mut body, + num_bindings: _, + } => { + // BEGIN + line_no += 1; + // Walk body ... + line_no = annotate_line_numbers(line_no, body); + // ENDLET + line_no += 1; + } } } line_no @@ -775,6 +845,17 @@ mod tests { assert_eq!(stripped.trim(), result.trim()); } + #[test] + fn test_unparse_lexical_scope_block() { + let program = r#"b = 3; + begin + let a = 5; + end"#; + let stripped = unindent(program); + let result = parse_and_unparse(&stripped).unwrap(); + assert_eq!(stripped.trim(), result.trim()); + } + #[test] fn regress_test() { let program = r#"n + 10 in a;"#; diff --git a/crates/db-wiredtiger/src/bindings/cursor.rs b/crates/db-wiredtiger/src/bindings/cursor.rs index abf3d6db..68b424f7 100644 --- a/crates/db-wiredtiger/src/bindings/cursor.rs +++ b/crates/db-wiredtiger/src/bindings/cursor.rs @@ -67,6 +67,10 @@ impl Datum { pub fn len(&self) -> usize { self.data.len() } + + pub fn is_empty(&self) -> bool { + self.data.is_empty() + } } impl Clone for Datum { diff --git a/crates/kernel/src/builtins/bf_server.rs b/crates/kernel/src/builtins/bf_server.rs index 542adc44..6318e9e2 100644 --- a/crates/kernel/src/builtins/bf_server.rs +++ b/crates/kernel/src/builtins/bf_server.rs @@ -609,7 +609,7 @@ fn bf_call_function(bf_args: &mut BfCallState<'_>) -> Result { // Then ask the scheduler to run the function as a continuation of what we're doing now. Ok(VmInstr(ExecutionResult::ContinueBuiltin { - bf_func_num: func_offset, + bf_func_num: func_offset as u16, arguments: args[..].to_vec(), })) } diff --git a/crates/kernel/src/tasks/task.rs b/crates/kernel/src/tasks/task.rs index 8b4a2d1e..cf716bfe 100644 --- a/crates/kernel/src/tasks/task.rs +++ b/crates/kernel/src/tasks/task.rs @@ -672,6 +672,7 @@ mod tests { argspec: VerbArgsSpec, } + #[allow(clippy::type_complexity)] fn setup_test_env( task_start: Arc, programs: &[TestVerb], @@ -746,6 +747,7 @@ mod tests { } /// Build a simple test environment with an Eval task (since that is simplest to setup) + #[allow(clippy::type_complexity)] fn setup_test_env_eval( program: &str, ) -> ( @@ -764,6 +766,7 @@ mod tests { setup_test_env(task_start, &[]) } + #[allow(clippy::type_complexity)] fn setup_test_env_command( command: &str, verbs: &[TestVerb], diff --git a/crates/kernel/src/textdump/load_db.rs b/crates/kernel/src/textdump/load_db.rs index 5827e0d3..11768f1a 100644 --- a/crates/kernel/src/textdump/load_db.rs +++ b/crates/kernel/src/textdump/load_db.rs @@ -17,7 +17,6 @@ use std::fs::File; use std::io; use std::io::BufReader; use std::path::PathBuf; -use std::sync::Arc; use tracing::{info, span, trace}; @@ -219,14 +218,7 @@ pub fn read_textdump( } // If the verb program is missing, then it's an empty program, and we'll put in // an empty binary. - _ => Program { - literals: vec![], - jump_labels: vec![], - var_names: Default::default(), - main_vector: Arc::new(vec![]), - fork_vectors: vec![], - line_number_spans: vec![], - }, + _ => Program::new(), }; let binary = diff --git a/crates/kernel/src/vm/mod.rs b/crates/kernel/src/vm/mod.rs index b865e764..031dffa3 100644 --- a/crates/kernel/src/vm/mod.rs +++ b/crates/kernel/src/vm/mod.rs @@ -102,7 +102,7 @@ pub enum ExecutionResult { DispatchFork(Fork), /// Request dispatch of a builtin function with the given arguments. ContinueBuiltin { - bf_func_num: usize, + bf_func_num: u16, arguments: Vec, }, /// Request that this task be suspended for a duration of time. diff --git a/crates/kernel/src/vm/moo_execute.rs b/crates/kernel/src/vm/moo_execute.rs index e743319d..821b76a7 100644 --- a/crates/kernel/src/vm/moo_execute.rs +++ b/crates/kernel/src/vm/moo_execute.rs @@ -121,7 +121,24 @@ pub fn moo_frame_execute( f.pc += 1; match op { - Op::If(label) | Op::Eif(label) | Op::IfQues(label) | Op::While(label) => { + Op::If(label, environment_width) + | Op::Eif(label, environment_width) + | Op::While { + jump_label: label, + environment_width, + } => { + let scope_type = match op { + Op::If(..) | Op::Eif(..) => ScopeType::If, + Op::While { .. } => ScopeType::While, + _ => unreachable!(), + }; + f.push_scope(scope_type, *environment_width); + let cond = f.pop(); + if !cond.is_true() { + f.jump(label); + } + } + Op::IfQues(label) => { let cond = f.pop(); if !cond.is_true() { f.jump(label); @@ -130,7 +147,12 @@ pub fn moo_frame_execute( Op::Jump { label } => { f.jump(label); } - Op::WhileId { id, end_label } => { + Op::WhileId { + id, + end_label, + environment_width, + } => { + f.push_scope(ScopeType::While, *environment_width); let v = f.pop(); let is_true = v.is_true(); f.set_env(id, v); @@ -138,7 +160,13 @@ pub fn moo_frame_execute( f.jump(end_label); } } - Op::ForList { end_label, id } => { + Op::ForList { + end_label, + id, + environment_width, + } => { + f.push_scope(ScopeType::For, *environment_width); + // Pop the count and list off the stack. We push back later when we re-enter. let (count, list) = f.peek2(); @@ -176,7 +204,13 @@ pub fn moo_frame_execute( f.set_env(id, l.get(count).unwrap().clone()); f.poke(0, v_int((count + 1) as i64)); } - Op::ForRange { end_label, id } => { + Op::ForRange { + end_label, + id, + environment_width, + } => { + f.push_scope(ScopeType::For, *environment_width); + // Pull the range ends off the stack. let (from, next_val) = { let (to, from) = f.peek2(); @@ -654,16 +688,23 @@ pub fn moo_frame_execute( } } } - Op::TryFinally { end_label: label } => { - f.enter_scope(ScopeType::TryFinally(*label)); + Op::TryFinally { + end_label: label, + environment_width, + } => { + // Next opcode must be BeginScope, to define the variable scoping. + f.push_scope(ScopeType::TryFinally(*label), *environment_width); } Op::TryCatch { handler_label: _ } => { let catches = std::mem::take(&mut f.catch_stack); - f.enter_scope(ScopeType::TryCatch(catches)); + f.push_scope(ScopeType::TryCatch(catches), 0); } - Op::TryExcept { num_excepts: _ } => { + Op::TryExcept { + num_excepts: _, + environment_width, + } => { let catches = std::mem::take(&mut f.catch_stack); - f.enter_scope(ScopeType::TryCatch(catches)); + f.push_scope(ScopeType::TryCatch(catches), *environment_width); } Op::EndCatch(label) | Op::EndExcept(label) => { let is_catch = matches!(op, Op::EndCatch(_)); @@ -699,6 +740,12 @@ pub fn moo_frame_execute( } } } + Op::BeginScope { num_bindings, .. } => { + f.push_scope(ScopeType::Block, *num_bindings); + } + Op::EndScope { num_bindings: _ } => { + f.pop_scope().expect("Missing scope"); + } Op::ExitId(label) => { f.jump(label); continue; diff --git a/crates/kernel/src/vm/moo_frame.rs b/crates/kernel/src/vm/moo_frame.rs index 2a7747d2..71c422d0 100644 --- a/crates/kernel/src/vm/moo_frame.rs +++ b/crates/kernel/src/vm/moo_frame.rs @@ -32,20 +32,10 @@ pub(crate) struct MooStackFrame { pub(crate) program: Program, /// The program counter. pub(crate) pc: usize, - // TODO: Language enhancement: Introduce lexical scopes to the MOO language: - // add a 'with' keyword to the language which introduces a new scope, similar to ML's "let": - // with x = 1 in - // ... - // endlet - // Multiple variables can be introduced at once: - // with x = 1, y = 2 in ... - // Variables not declared with 'with' are verb-scoped as they are now - // 'with' variables that shadow already-known verb-scoped variables override the verb-scope - // Add LetBegin and LetEnd opcodes to the language. - // Make the environment have a width, and expand and contract as scopes are entered and exited. - // Likewise, Names in Program should be scope delimited somehow /// The values of the variables currently in scope, by their offset. pub(crate) environment: BitArray>, + /// The current used scope size, used when entering and exiting local scopes. + pub(crate) environment_width: usize, /// The value stack. pub(crate) valstack: Vec, /// A stack of active scopes. Used for catch and finally blocks and in the future for lexical @@ -75,6 +65,10 @@ pub(crate) enum ScopeType { /// Note that `return` and `exit` are not considered failures. TryFinally(Label), TryCatch(Vec<(CatchType, Label)>), + If, + While, + For, + Block, } /// A scope is a record of the current size of the valstack when it was created, and are @@ -85,6 +79,7 @@ pub(crate) enum ScopeType { pub(crate) struct Scope { pub(crate) scope_type: ScopeType, pub(crate) valstack_pos: usize, + pub(crate) environment_width: usize, } impl Encode for MooStackFrame { @@ -100,6 +95,7 @@ impl Encode for MooStackFrame { env[i] = Some(v.clone()) } env.encode(encoder)?; + self.environment_width.encode(encoder)?; self.valstack.encode(encoder)?; self.scope_stack.encode(encoder)?; self.temp.encode(encoder)?; @@ -120,7 +116,7 @@ impl Decode for MooStackFrame { environment.set(i, v.clone()); } } - + let environment_width = usize::decode(decoder)?; let valstack = Vec::decode(decoder)?; let scope_stack = Vec::decode(decoder)?; let temp = Var::decode(decoder)?; @@ -130,6 +126,7 @@ impl Decode for MooStackFrame { program, pc, environment, + environment_width, valstack, scope_stack, temp, @@ -151,7 +148,7 @@ impl<'de> BorrowDecode<'de> for MooStackFrame { environment.set(i, v.clone()); } } - + let environment_width = usize::borrow_decode(decoder)?; let valstack = Vec::borrow_decode(decoder)?; let scope_stack = Vec::borrow_decode(decoder)?; let temp = Var::borrow_decode(decoder)?; @@ -161,6 +158,7 @@ impl<'de> BorrowDecode<'de> for MooStackFrame { program, pc, environment, + environment_width, valstack, scope_stack, temp, @@ -173,10 +171,11 @@ impl<'de> BorrowDecode<'de> for MooStackFrame { impl MooStackFrame { pub(crate) fn new(program: Program) -> Self { let environment = BitArray::new(); - + let environment_width = program.var_names.global_width(); Self { program, environment, + environment_width, valstack: vec![], scope_stack: vec![], pc: 0, @@ -185,6 +184,7 @@ impl MooStackFrame { finally_stack: vec![], } } + pub(crate) fn find_line_no(&self, pc: usize) -> Option { if self.program.line_number_spans.is_empty() { return None; @@ -293,16 +293,32 @@ impl MooStackFrame { self.pc = label.position.0 as usize; } - pub fn enter_scope(&mut self, scope: ScopeType) { + /// Enter a new lexical scope and/or try/catch handling block. + pub fn push_scope(&mut self, scope: ScopeType, environment_width: u16) { + // If this is a lexical scope, expand the environment to accommodate the new variables. + // (This is just updating environment_width) + let environment_width = environment_width as usize; + assert!(environment_width <= self.environment.len()); + self.environment_width += environment_width; + self.scope_stack.push(Scope { scope_type: scope, valstack_pos: self.valstack.len(), + environment_width, }); } pub fn pop_scope(&mut self) -> Option { let scope = self.scope_stack.pop()?; self.valstack.truncate(scope.valstack_pos); + + // Clear out the environment for the scope that is being exited. + // Everything in environment after old width - new-width should be set to v_none + let old_width = self.environment_width; + for i in old_width - scope.environment_width..self.environment_width { + self.environment.set(i, v_none()); + } + self.environment_width -= scope.environment_width; Some(scope) } } diff --git a/crates/kernel/src/vm/vm_call.rs b/crates/kernel/src/vm/vm_call.rs index 1101d724..6a6e69bc 100644 --- a/crates/kernel/src/vm/vm_call.rs +++ b/crates/kernel/src/vm/vm_call.rs @@ -223,12 +223,13 @@ impl VMExecState { /// Call into a builtin function. pub(crate) fn call_builtin_function( &mut self, - bf_func_num: usize, + bf_func_num: u16, args: List, exec_args: &VmExecParams, world_state: &mut dyn WorldState, session: Arc, ) -> ExecutionResult { + let bf_func_num = bf_func_num as usize; if bf_func_num >= exec_args.builtin_registry.builtins.len() { return self.raise_error(E_VARNF); } diff --git a/crates/kernel/src/vm/vm_test.rs b/crates/kernel/src/vm/vm_test.rs index a8252e9b..73706462 100644 --- a/crates/kernel/src/vm/vm_test.rs +++ b/crates/kernel/src/vm/vm_test.rs @@ -103,7 +103,11 @@ mod tests { #[test] fn test_simple_vm_execute() { - let program = mk_program(vec![Imm(0.into()), Pop, Done], vec![1.into()], Names::new()); + let program = mk_program( + vec![Imm(0.into()), Pop, Done], + vec![1.into()], + Names::new(64), + ); let state_source = test_db_with_verb("test", &program); let mut state = state_source.new_world_state().unwrap(); let session = Arc::new(NoopClientSession::new()); @@ -124,7 +128,7 @@ mod tests { &mk_program( vec![Imm(0.into()), Imm(1.into()), Ref, Return, Done], vec![v_str("hello"), 2.into()], - Names::new(), + Names::new(64), ), ); let mut state = state_source.new_world_state().unwrap(); @@ -154,7 +158,7 @@ mod tests { Done, ], vec![v_str("hello"), 2.into(), 4.into()], - Names::new(), + Names::new(64), ), ) .new_world_state() @@ -178,7 +182,7 @@ mod tests { &mk_program( vec![Imm(0.into()), Imm(1.into()), Ref, Return, Done], vec![v_list(&[111.into(), 222.into(), 333.into()]), 2.into()], - Names::new(), + Names::new(64), ), ) .new_world_state() @@ -213,7 +217,7 @@ mod tests { 2.into(), 3.into(), ], - Names::new(), + Names::new(64), ), ) .new_world_state() @@ -365,7 +369,7 @@ mod tests { &mk_program( vec![Imm(0.into()), Imm(1.into()), GetProp, Return, Done], vec![v_obj(0), v_str("test_prop")], - Names::new(), + Names::new(64), ), ) .new_world_state() @@ -402,7 +406,7 @@ mod tests { let return_verb_binary = mk_program( vec![Imm(0.into()), Return, Done], vec![v_int(666)], - Names::new(), + Names::new(64), ); // The second actually calls the first verb, and returns the result. @@ -416,7 +420,7 @@ mod tests { Done, ], vec![v_obj(0), v_str("test_return_verb"), v_empty_list()], - Names::new(), + Names::new(64), ); let mut state = test_db_with_verbs(&[ ("test_return_verb", &return_verb_binary), @@ -681,14 +685,7 @@ mod tests { /// A VM body that is empty should return v_none() and not panic. #[test] fn test_regression_zero_body_function() { - let binary = Program { - literals: vec![], - jump_labels: vec![], - var_names: Names::default(), - main_vector: Arc::new(vec![]), - fork_vectors: vec![], - line_number_spans: vec![], - }; + let binary = Program::new(); let mut state = test_db_with_verb("test", &binary) .new_world_state() .unwrap(); @@ -777,6 +774,214 @@ mod tests { assert_eq!(result, Ok(v_int(333))); } + #[test] + fn test_lexical_scoping() { + // Assign a value to a global from a lexically scoped value. + let program = r#" + x = 52; + begin + let y = 42; + x = y; + end + return x; + "#; + let compiled = compile(program).unwrap(); + let mut state = world_with_test_programs(&[("test", &compiled)]); + let session = Arc::new(NoopClientSession::new()); + let builtin_registry = Arc::new(BuiltinRegistry::new()); + let result = call_verb( + state.as_mut(), + session.clone(), + builtin_registry, + "test", + vec![], + ); + assert_eq!(result, Ok(v_int(42))); + } + + #[test] + fn test_lexical_scoping_shadowing1() { + // Global with inner scope shadowing it, return value should be the value assigned in the + // outer (global) scope, since the new lexical scoped value should not be visible. + let program = r#" + x = 52; + begin + let x = 42; + x = 1; + end + return x; + "#; + let compiled = compile(program).unwrap(); + let mut state = world_with_test_programs(&[("test", &compiled)]); + let session = Arc::new(NoopClientSession::new()); + let builtin_registry = Arc::new(BuiltinRegistry::new()); + let result = call_verb( + state.as_mut(), + session.clone(), + builtin_registry, + "test", + vec![], + ); + assert_eq!(result, Ok(v_int(52))); + } + + #[test] + fn test_lexical_scoping_shadowing2() { + // Global is set, then shadowed in lexical scope, and returned inside the inner scope, + // should return the inner scope value. + let program = r#" + x = 52; + begin + let x = 42; + let y = 66; + return {x, y}; + end + "#; + let compiled = compile(program).unwrap(); + let mut state = world_with_test_programs(&[("test", &compiled)]); + let session = Arc::new(NoopClientSession::new()); + let builtin_registry = Arc::new(BuiltinRegistry::new()); + let result = call_verb( + state.as_mut(), + session.clone(), + builtin_registry, + "test", + vec![], + ); + assert_eq!(result, Ok(v_list(&[v_int(42), v_int(66)]))); + } + + #[test] + fn test_lexical_scoping_we_must_go_deeper() { + // Global is set, then shadowed in lexical scope, and returned inside the inner scope, + // should return the inner scope value. + let program = r#" + x = 52; + begin + let x = 42; + let y = 66; + begin + let z = 99; + y = 13; + return {x, y, z}; + end + end + "#; + let compiled = compile(program).unwrap(); + let mut state = world_with_test_programs(&[("test", &compiled)]); + let session = Arc::new(NoopClientSession::new()); + let builtin_registry = Arc::new(BuiltinRegistry::new()); + let result = call_verb( + state.as_mut(), + session.clone(), + builtin_registry, + "test", + vec![], + ); + assert_eq!(result, Ok(v_list(&[v_int(42), v_int(13), v_int(99)]))); + } + + /// Verify that if statements get their own lexical scope, in this case "y" shadowing the + /// global "y" value. + #[test] + fn test_lexical_scoping_in_if_blocks() { + let program = r#" + global y = 2; + let z = 3; + if (1) + let y = 5; + return {y, z}; + else + return 0; + endif"#; + let compiled = compile(program).unwrap(); + let mut state = world_with_test_programs(&[("test", &compiled)]); + let session = Arc::new(NoopClientSession::new()); + let builtin_registry = Arc::new(BuiltinRegistry::new()); + let result = call_verb( + state.as_mut(), + session.clone(), + builtin_registry, + "test", + vec![], + ); + assert_eq!(result, Ok(v_list(&[v_int(5), v_int(3)]))); + } + + /// Same as above but for `while` + #[test] + fn test_lexical_scoping_in_while_blocks() { + let program = r#" + global y = 2; + let z = 3; + while (1) + let y = 5; + return {y, z}; + endwhile"#; + let compiled = compile(program).unwrap(); + let mut state = world_with_test_programs(&[("test", &compiled)]); + let session = Arc::new(NoopClientSession::new()); + let builtin_registry = Arc::new(BuiltinRegistry::new()); + let result = call_verb( + state.as_mut(), + session.clone(), + builtin_registry, + "test", + vec![], + ); + assert_eq!(result, Ok(v_list(&[v_int(5), v_int(3)]))); + } + + /// And same as above for "for in" + #[test] + fn test_lexical_scoping_in_for_blocks() { + let program = r#" + global y = 2; + let z = 3; + for x in ({1,2,3}) + let y = 5; + return {y, z}; + endfor"#; + let compiled = compile(program).unwrap(); + let mut state = world_with_test_programs(&[("test", &compiled)]); + let session = Arc::new(NoopClientSession::new()); + let builtin_registry = Arc::new(BuiltinRegistry::new()); + let result = call_verb( + state.as_mut(), + session.clone(), + builtin_registry, + "test", + vec![], + ); + assert_eq!(result, Ok(v_list(&[v_int(5), v_int(3)]))); + } + + /// And for try/except + #[test] + fn test_lexical_scoping_in_try_blocks() { + let program = r#" + global y = 2; + let z = 3; + try + let y = 5; + return {y, z}; + except (E_INVARG) + return 0; + endtry"#; + let compiled = compile(program).unwrap(); + let mut state = world_with_test_programs(&[("test", &compiled)]); + let session = Arc::new(NoopClientSession::new()); + let builtin_registry = Arc::new(BuiltinRegistry::new()); + let result = call_verb( + state.as_mut(), + session.clone(), + builtin_registry, + "test", + vec![], + ); + assert_eq!(result, Ok(v_list(&[v_int(5), v_int(3)]))); + } + #[test_case("return 1;", v_int(1); "simple return")] #[test_case( r#"rest = "me:words"; rest[1..0] = ""; return rest;"#, diff --git a/crates/kernel/src/vm/vm_unwind.rs b/crates/kernel/src/vm/vm_unwind.rs index ef99bea4..ac056025 100644 --- a/crates/kernel/src/vm/vm_unwind.rs +++ b/crates/kernel/src/vm/vm_unwind.rs @@ -271,6 +271,10 @@ impl VMExecState { } } } + _ => { + // This is a lexical scope, so we just let it pop off the stack and + // continue on. + } } } }