diff --git a/resources/tests/strict/assert.clsp b/resources/tests/strict/assert.clsp new file mode 100644 index 000000000..cf9a3801e --- /dev/null +++ b/resources/tests/strict/assert.clsp @@ -0,0 +1,11 @@ +(mod (A) + (include *strict-cl-21*) + + (include defmac_assert.clib) + + (assert + 1 + A + 13 + ) + ) diff --git a/resources/tests/strict/defmac_assert.clib b/resources/tests/strict/defmac_assert.clib new file mode 100644 index 000000000..1dccb32c8 --- /dev/null +++ b/resources/tests/strict/defmac_assert.clib @@ -0,0 +1,10 @@ +( + (defun assert_ (items) + (if (r items) + (qq (if (unquote (f items)) (unquote (assert_ (r items))) (x))) + (f items) + ) + ) + + (defmac assert items (assert_ items)) +) \ No newline at end of file diff --git a/resources/tests/strict/defmac_if_smoke.clsp b/resources/tests/strict/defmac_if_smoke.clsp new file mode 100644 index 000000000..d3baf7cca --- /dev/null +++ b/resources/tests/strict/defmac_if_smoke.clsp @@ -0,0 +1,8 @@ +(mod () + (include *strict-cl-21*) + + (include defmac_simple_if.clib) + + (if_ t1 t2 t3) + ) + \ No newline at end of file diff --git a/resources/tests/strict/defmac_simple_if.clib b/resources/tests/strict/defmac_simple_if.clib new file mode 100644 index 000000000..32655f11f --- /dev/null +++ b/resources/tests/strict/defmac_simple_if.clib @@ -0,0 +1,3 @@ +( + (defmac if_ (C T E) (qq (if (unquote C) (unquote T) (unquote E)))) +) \ No newline at end of file diff --git a/resources/tests/strict/double-constant-fail.clsp b/resources/tests/strict/double-constant-fail.clsp new file mode 100644 index 000000000..7af9d1e23 --- /dev/null +++ b/resources/tests/strict/double-constant-fail.clsp @@ -0,0 +1,24 @@ +(mod (X) + (include *strict-cl-21*) + + ;; A macro-level function to pass through only real integers. + (defun pass-through-integers (X) + (if (not (number? X)) + (x "not a number given to only-integers" X) + X + ) + ) + + ;; A macro which at preprocessing time throws if the given argument + ;; wasn't a lexical integer. + (defmac only-integers (X) (pass-through-integers X)) + + ;; Note: when macro expanding, N is the N argument to the body of + ;; the double macro, not the integer literal, so we use the function + ;; version of pass-through-integers in the macro body. + (defmac double (N) (* 2 (pass-through-integers N))) + + ;; Here the macro form of only-integers can determine whether the + ;; double macro produced an integer or some other expression. + (only-integers (double "hithere")) + ) diff --git a/resources/tests/strict/double-constant-pass-in-function.clsp b/resources/tests/strict/double-constant-pass-in-function.clsp new file mode 100644 index 000000000..a3367039f --- /dev/null +++ b/resources/tests/strict/double-constant-pass-in-function.clsp @@ -0,0 +1,26 @@ +(mod (X) + (include *strict-cl-21*) + + ;; A macro-level function to pass through only real integers. + (defun pass-through-integers (X) + (if (not (number? X)) + (x "not a number given to only-integers" X) + X + ) + ) + + ;; A macro which at preprocessing time throws if the given argument + ;; wasn't a lexical integer. + (defmac only-integers (X) (pass-through-integers X)) + + ;; Note: when macro expanding, N is the N argument to the body of + ;; the double macro, not the integer literal, so we use the function + ;; version of pass-through-integers in the macro body. + (defmac double (N) (* 2 (pass-through-integers N))) + + ;; Here the macro form of only-integers can determine whether the + ;; double macro produced an integer or some other expression. + (defun F (N) (+ N (only-integers (double 99)))) + + (F X) + ) diff --git a/resources/tests/strict/double-constant-pass.clsp b/resources/tests/strict/double-constant-pass.clsp new file mode 100644 index 000000000..7b50bd1ee --- /dev/null +++ b/resources/tests/strict/double-constant-pass.clsp @@ -0,0 +1,24 @@ +(mod (X) + (include *strict-cl-21*) + + ;; A macro-level function to pass through only real integers. + (defun pass-through-integers (X) + (if (not (number? X)) + (x "not a number given to only-integers" X) + X + ) + ) + + ;; A macro which at preprocessing time throws if the given argument + ;; wasn't a lexical integer. + (defmac only-integers (X) (pass-through-integers X)) + + ;; Note: when macro expanding, N is the N argument to the body of + ;; the double macro, not the integer literal, so we use the function + ;; version of pass-through-integers in the macro body. + (defmac double (N) (* 2 (pass-through-integers N))) + + ;; Here the macro form of only-integers can determine whether the + ;; double macro produced an integer or some other expression. + (only-integers (double 99)) + ) diff --git a/resources/tests/strict/strict-classify-expr-if.clsp b/resources/tests/strict/strict-classify-expr-if.clsp new file mode 100644 index 000000000..03288183d --- /dev/null +++ b/resources/tests/strict/strict-classify-expr-if.clsp @@ -0,0 +1,24 @@ +(mod (X) + (include *strict-cl-21*) + ;; Ensure macros can expand inside other macros when using advanced primitives. + (defmac classify-expr (G) + (if (number? G) + 1 + (if (symbol? G) + 2 + (if (string? G) + 3 + (if (l G) + 4 + 0 + ) + ) + ) + ) + ) + + (if X + (classify-expr X) + (list (classify-expr ()) (classify-expr 33) (classify-expr test) (classify-expr "foo") (classify-expr (* 3 2)) (classify-expr (list 1 2 3))) + ) + ) diff --git a/resources/tests/strict/strict-in-place-factorial.clsp b/resources/tests/strict/strict-in-place-factorial.clsp new file mode 100644 index 000000000..6b036beac --- /dev/null +++ b/resources/tests/strict/strict-in-place-factorial.clsp @@ -0,0 +1,6 @@ +(mod (X) + (include *strict-cl-21*) + (defmac factorial (N) + (if (> 2 N) 1 (qq (* (unquote N) (factorial (- N 1)))))) + (factorial 5) + ) diff --git a/resources/tests/strict/strict-list-fail.clsp b/resources/tests/strict/strict-list-fail.clsp new file mode 100644 index 000000000..9e4e42502 --- /dev/null +++ b/resources/tests/strict/strict-list-fail.clsp @@ -0,0 +1,4 @@ +(mod (X) + (include *strict-cl-21*) + (list X (+ X 1) (+ X2)) + ) diff --git a/resources/tests/strict/strict-list-pass.clsp b/resources/tests/strict/strict-list-pass.clsp new file mode 100644 index 000000000..efbba4770 --- /dev/null +++ b/resources/tests/strict/strict-list-pass.clsp @@ -0,0 +1,4 @@ +(mod (X) + (include *strict-cl-21*) + (list X (+ X 1) (+ X 2)) + ) diff --git a/resources/tests/strict/strict-nested-list.clsp b/resources/tests/strict/strict-nested-list.clsp new file mode 100644 index 000000000..6bc4bddb4 --- /dev/null +++ b/resources/tests/strict/strict-nested-list.clsp @@ -0,0 +1,4 @@ +(mod (X) + (include *strict-cl-21*) + (list X (list X) (list (list X))) + ) diff --git a/resources/tests/strict/strict-test-fail.clsp b/resources/tests/strict/strict-test-fail.clsp new file mode 100644 index 000000000..4ca88390b --- /dev/null +++ b/resources/tests/strict/strict-test-fail.clsp @@ -0,0 +1,9 @@ +(mod (X) + (include *strict-cl-21*) + ;; This wouldn't be able to be rejected because X1 is coming from a macro + ;; expansion. This should fail in strict but succeed wrong non-strict. + (if X + (+ X1 2) + 5 + ) + ) diff --git a/resources/tests/strict/strict-test-pass.clsp b/resources/tests/strict/strict-test-pass.clsp new file mode 100644 index 000000000..6dfbc39de --- /dev/null +++ b/resources/tests/strict/strict-test-pass.clsp @@ -0,0 +1,7 @@ +(mod (X) + (include *strict-cl-21*) + (if X + (+ X 2) + 5 + ) + ) diff --git a/resources/tests/strict/test-inner-include.clinc b/resources/tests/strict/test-inner-include.clinc new file mode 100644 index 000000000..704dfcca7 --- /dev/null +++ b/resources/tests/strict/test-inner-include.clinc @@ -0,0 +1,3 @@ +( + (include defmac_simple_if.clib) +) diff --git a/resources/tests/strict/test-inner-include.clsp b/resources/tests/strict/test-inner-include.clsp new file mode 100644 index 000000000..6fc0dabf8 --- /dev/null +++ b/resources/tests/strict/test-inner-include.clsp @@ -0,0 +1,7 @@ +(mod (X) + (include *strict-cl-21*) + + (include test-inner-include.clinc) + + (if_ X (* X 2) (+ X 1)) + ) diff --git a/src/classic/clvm_tools/cmds.rs b/src/classic/clvm_tools/cmds.rs index 7e607c4be..301570d7d 100644 --- a/src/classic/clvm_tools/cmds.rs +++ b/src/classic/clvm_tools/cmds.rs @@ -1,5 +1,6 @@ use core::cell::RefCell; +use std::borrow::Borrow; use std::collections::{BTreeMap, HashMap}; use std::fs; use std::io; @@ -42,7 +43,6 @@ use crate::classic::clvm_tools::stages::stage_0::{ }; use crate::classic::clvm_tools::stages::stage_2::operators::run_program_for_search_paths; use crate::classic::platform::PathJoin; -use crate::compiler::dialect::detect_modern; use crate::classic::platform::argparse::{ Argument, ArgumentParser, ArgumentValue, ArgumentValueConv, IntConversion, NArgsSpec, @@ -55,6 +55,8 @@ use crate::compiler::clvm::start_step; use crate::compiler::compiler::{compile_file, DefaultCompilerOpts}; use crate::compiler::comptypes::{CompileErr, CompilerOpts}; use crate::compiler::debug::build_symbol_table_mut; +use crate::compiler::dialect::detect_modern; +use crate::compiler::frontend::frontend; use crate::compiler::optimize::maybe_finalize_program_via_classic_optimizer; use crate::compiler::preprocessor::gather_dependencies; use crate::compiler::prims; @@ -811,6 +813,70 @@ fn fix_log( } } +// A function which performs preprocessing on a whole program and renders the +// output to the user. +// +// This is used in the same way as cc -E in a C compiler; to see what +// preprocessing did to the source so you can debug and improve your macros. +// +// Without this, it's difficult for some to visualize how macro are functioning +// and what forms they output. +fn perform_preprocessing( + stdout: &mut Stream, + opts: Rc, + input_file: &str, + program_text: &str, +) -> Result<(), CompileErr> { + let srcloc = Srcloc::start(input_file); + // Parse the source file. + let parsed = parse_sexp(srcloc.clone(), program_text.bytes())?; + // Get the detected dialect and compose a sigil that matches. + // Classic preprocessing (also shared by standard sigil 21 and 21) does macro + // expansion during the compile process, making all macros available to all + // code regardless of its lexical order and therefore isn't rendered in a + // unified way (for example, 'com' and 'mod' forms invoke macros when + // encountered and expanded. By contrast strict mode reads the macros and + // evaluates them in that order (as in C). + // + // The result is fully rendered before the next stage of compilation so that + // it can be inspected and so that the execution environment for macros is + // fully and cleanly separated from compile time. + let stepping_form_text = match opts.dialect().stepping { + Some(21) => Some("(include *strict-cl-21*)".to_string()), + Some(n) => Some(format!("(include *standard-cl-{n}*)")), + _ => None, + }; + let frontend = frontend(opts, &parsed)?; + let fe_sexp = frontend.to_sexp(); + let with_stepping = if let Some(s) = stepping_form_text { + let parsed_stepping_form = parse_sexp(srcloc.clone(), s.bytes())?; + if let sexp::SExp::Cons(_, a, rest) = fe_sexp.borrow() { + Rc::new(sexp::SExp::Cons( + srcloc.clone(), + a.clone(), + Rc::new(sexp::SExp::Cons( + srcloc.clone(), + parsed_stepping_form[0].clone(), + rest.clone(), + )), + )) + } else { + fe_sexp + } + } else { + fe_sexp + }; + + let whole_mod = sexp::SExp::Cons( + srcloc.clone(), + Rc::new(sexp::SExp::Atom(srcloc, b"mod".to_vec())), + with_stepping, + ); + + stdout.write_str(&format!("{}", whole_mod)); + Ok(()) +} + fn get_disassembly_ver(p: &HashMap) -> Option { if let Some(ArgumentValue::ArgInt(x)) = p.get("operators_version") { return Some(*x as usize); @@ -957,6 +1023,18 @@ pub fn launch_tool(stdout: &mut Stream, args: &[String], tool_name: &str, defaul .set_type(Rc::new(PathJoin {})) .set_default(ArgumentValue::ArgString(None, "main.sym".to_string())), ); + parser.add_argument( + vec!["--strict".to_string()], + Argument::new() + .set_action(TArgOptionAction::StoreTrue) + .set_help("For modern dialects, don't treat unknown names as constants".to_string()), + ); + parser.add_argument( + vec!["-E".to_string(), "--preprocess".to_string()], + Argument::new() + .set_action(TArgOptionAction::StoreTrue) + .set_help("Perform strict mode preprocessing and show the result".to_string()), + ); parser.add_argument( vec!["--operators-version".to_string()], Argument::new() @@ -1240,8 +1318,7 @@ pub fn launch_tool(stdout: &mut Stream, args: &[String], tool_name: &str, defaul .unwrap_or_else(|| "main.sym".to_string()); // In testing: short circuit for modern compilation. - // Now stepping is the optional part. - if let Some(dialect) = dialect.and_then(|d| d.stepping) { + if let Some(stepping) = dialect.as_ref().and_then(|d| d.stepping) { let do_optimize = parsed_args .get("optimize") .map(|x| matches!(x, ArgumentValue::ArgBool(true))) @@ -1249,12 +1326,21 @@ pub fn launch_tool(stdout: &mut Stream, args: &[String], tool_name: &str, defaul let runner = Rc::new(DefaultProgramRunner::new()); let use_filename = input_file.unwrap_or_else(|| "*command*".to_string()); let opts = Rc::new(DefaultCompilerOpts::new(&use_filename)) + .set_dialect(dialect.unwrap_or_default()) .set_optimize(do_optimize) .set_search_paths(&search_paths) - .set_frontend_opt(dialect > 21) + .set_frontend_opt(stepping > 21) .set_disassembly_ver(get_disassembly_ver(&parsed_args)); let mut symbol_table = HashMap::new(); + // Short circuit preprocessing display. + if parsed_args.get("preprocess").is_some() { + if let Err(e) = perform_preprocessing(stdout, opts, &use_filename, &input_program) { + stdout.write_str(&format!("{}: {}", e.0, e.1)); + } + return; + } + let unopt_res = compile_file( &mut allocator, runner.clone(), @@ -1543,6 +1629,8 @@ pub fn launch_tool(stdout: &mut Stream, args: &[String], tool_name: &str, defaul only_exn, &log_content, symbol_table, + // Clippy: disassemble no longer requires mutability, + // but this callback interface delivers it. &|allocator, p| disassemble(allocator, p, disassembly_ver), ); } else { @@ -1553,6 +1641,7 @@ pub fn launch_tool(stdout: &mut Stream, args: &[String], tool_name: &str, defaul only_exn, &log_content, symbol_table, + // Same as above. &|allocator, p| disassemble(allocator, p, disassembly_ver), ); } diff --git a/src/compiler/cldb.rs b/src/compiler/cldb.rs index 95df3dcd2..11b4c7f59 100644 --- a/src/compiler/cldb.rs +++ b/src/compiler/cldb.rs @@ -150,6 +150,7 @@ impl CldbRun { self.runner.clone(), self.prim_map.clone(), &self.step, + None, ), }; diff --git a/src/compiler/clvm.rs b/src/compiler/clvm.rs index f0020efab..7246aae96 100644 --- a/src/compiler/clvm.rs +++ b/src/compiler/clvm.rs @@ -20,6 +20,16 @@ use crate::compiler::srcloc::Srcloc; use crate::util::{number_from_u8, u8_from_number, Number}; +/// Provide a way of intercepting and running new primitives. +pub trait PrimOverride { + fn try_handle( + &self, + head: Rc, + context: Rc, + tail: Rc, + ) -> Result>, RunFailure>; +} + /// An object which contains the state of a running CLVM program in a compact /// form. /// @@ -161,7 +171,15 @@ fn translate_head( Some(v) => Ok(Rc::new(v.with_loc(l.clone()))), }, SExp::Cons(_l, _a, nil) => match nil.borrow() { - SExp::Nil(_l1) => run(allocator, runner, prim_map, sexp.clone(), context, None), + SExp::Nil(_l1) => run( + allocator, + runner, + prim_map, + sexp.clone(), + context, + None, + None, + ), _ => Err(RunFailure::RunErr( sexp.loc(), format!("Unexpected head form in clvm {sexp}"), @@ -393,6 +411,7 @@ pub fn run_step( runner: Rc, prim_map: Rc, Rc>>, step_: &RunStep, + prim_override: Option<&dyn PrimOverride>, ) -> Result { let mut step = step_.clone(); @@ -496,7 +515,7 @@ pub fn run_step( } } } - RunStep::Op(head, _context, tail, None, parent) => { + RunStep::Op(head, context, tail, None, parent) => { let aval = atom_value(head.clone())?; let apply_atom = 2_i32.to_bigint().unwrap(); let if_atom = 3_i32.to_bigint().unwrap(); @@ -514,6 +533,12 @@ pub fn run_step( -1 }; + if let Some(ovr) = prim_override { + if let Some(res) = ovr.try_handle(head.clone(), context.clone(), tail.clone())? { + return Ok(RunStep::OpResult(res.loc(), res.clone(), parent.clone())); + } + } + let op = if aval == apply_atom { "apply".to_string() } else if aval == if_atom { @@ -629,6 +654,7 @@ pub fn run( prim_map: Rc, Rc>>, sexp_: Rc, context_: Rc, + prim_override: Option<&dyn PrimOverride>, iter_limit: Option, ) -> Result, RunFailure> { let mut step = start_step(sexp_, context_); @@ -641,7 +667,13 @@ pub fn run( } } iters += 1; - step = run_step(allocator, runner.clone(), prim_map.clone(), &step)?; + step = run_step( + allocator, + runner.clone(), + prim_map.clone(), + &step, + prim_override, + )?; if let RunStep::Done(_, x) = step { return Ok(x); } @@ -681,6 +713,7 @@ pub fn parse_and_run( prim_map, code[0].clone(), args[0].clone(), + None, step_limit, ) } diff --git a/src/compiler/codegen.rs b/src/compiler/codegen.rs index 84a48b693..8267a4964 100644 --- a/src/compiler/codegen.rs +++ b/src/compiler/codegen.rs @@ -361,6 +361,7 @@ pub fn process_macro_call( opts.prim_map(), code, Rc::new(args_to_macro), + None, Some(MACRO_TIME_LIMIT), ) .map_err(|e| match e { @@ -662,6 +663,26 @@ pub fn generate_expr_code( create_name_lookup(compiler, l.clone(), atom, true) .map(|f| Ok(CompiledCode(l.clone(), f))) .unwrap_or_else(|_| { + // Finally enable strictness for variable names. + // This is possible because the modern macro system + // takes great care to preserve as much information + // from the source code as possible. + // + // When we come here in strict mode, we have + // a string, integer or atom depending on the + // user's desire and the explicitly generated + // result from the macro, therefore we can return + // an error if this atom didn't have a binding. + if opts.dialect().strict { + return Err(CompileErr( + l.clone(), + format!( + "Unbound use of {} as a variable name", + decode_string(atom) + ), + )); + } + // Pass through atoms that don't look up on behalf of // macros, as it's possible that a macro returned // something that's canonically a name in number form. @@ -674,19 +695,26 @@ pub fn generate_expr_code( }) } } - // Since macros are in this language and the runtime has - // a very narrow data representation, we'll need to - // accomodate bare numbers coming back in place of identifiers. - // I'm considering ways to make this better. - SExp::Integer(l, i) => generate_expr_code( - context, - opts, - compiler, - Rc::new(BodyForm::Value(SExp::Atom( - l.clone(), - u8_from_number(i.clone()), - ))), - ), + SExp::Integer(l, i) => { + // This code can assume that an integer is an integer because + // strict mode closes the necessary loophole below. Values + // intended as variable names are never crushed into integer + // like values from modern macros. + let ambiguous_int_value = if opts.dialect().strict { + Rc::new(BodyForm::Quoted(SExp::Integer(l.clone(), i.clone()))) + } else { + // Since macros are in this language and the runtime has + // a very narrow data representation, we'll need to + // accomodate bare numbers coming back in place of identifiers, + // but only in legacy non-strict mode. + Rc::new(BodyForm::Value(SExp::Atom( + l.clone(), + u8_from_number(i.clone()), + ))) + }; + + generate_expr_code(context, opts, compiler, ambiguous_int_value) + } _ => Ok(CompiledCode( v.loc(), Rc::new(primquote(v.loc(), Rc::new(v.clone()))), @@ -1313,6 +1341,7 @@ fn start_codegen( opts.prim_map(), Rc::new(code), Rc::new(SExp::Nil(defc.loc.clone())), + None, Some(CONST_EVAL_LIMIT), ) .map_err(|r| { diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index f3c1c8273..bc5cde0cd 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -13,7 +13,7 @@ use crate::classic::clvm_tools::stages::stage_0::TRunProgram; use crate::compiler::clvm::sha256tree; use crate::compiler::codegen::{codegen, hoist_body_let_binding, process_helper_let_bindings}; use crate::compiler::comptypes::{CompileErr, CompileForm, CompilerOpts, PrimaryCodegen}; -use crate::compiler::dialect::AcceptedDialect; +use crate::compiler::dialect::{AcceptedDialect, KNOWN_DIALECTS}; use crate::compiler::frontend::frontend; use crate::compiler::optimize::get_optimizer; use crate::compiler::prims; @@ -23,24 +23,6 @@ use crate::compiler::{BasicCompileContext, CompileContextWrapper}; use crate::util::Number; lazy_static! { - pub static ref KNOWN_DIALECTS: HashMap = { - let mut known_dialects: HashMap = HashMap::new(); - known_dialects.insert( - "*standard-cl-21*".to_string(), - indoc! {"( - (defconstant *chialisp-version* 21) - )"} - .to_string(), - ); - known_dialects.insert( - "*standard-cl-22*".to_string(), - indoc! {"( - (defconstant *chialisp-version* 22) - )"} - .to_string(), - ); - known_dialects - }; pub static ref STANDARD_MACROS: String = { indoc! {"( (defmacro if (A B C) (qq (a (i (unquote A) (com (unquote B)) (com (unquote C))) @))) @@ -58,6 +40,35 @@ lazy_static! { "} .to_string() }; + pub static ref ADVANCED_MACROS: String = { + indoc! {"( + (defmac __chia__primitive__if (A B C) + (qq (a (i (unquote A) (com (unquote B)) (com (unquote C))) @)) + ) + + (defun __chia__if (ARGS) + (__chia__primitive__if (r (r (r ARGS))) + (qq (a (i (unquote (f ARGS)) (com (unquote (f (r ARGS)))) (com (unquote (__chia__if (r (r ARGS)))))) @)) + (qq (a (i (unquote (f ARGS)) (com (unquote (f (r ARGS)))) (com (unquote (f (r (r ARGS)))))) @)) + ) + ) + + (defmac if ARGS (__chia__if ARGS)) + + (defun __chia__compile-list (args) + (if args + (c 4 (c (f args) (c (__chia__compile-list (r args)) ()))) + () + ) + ) + + (defmac list ARGS (__chia__compile-list ARGS)) + + (defun-inline / (A B) (f (divmod A B))) + ) + "} + .to_string() + }; } #[derive(Clone, Debug)] @@ -74,8 +85,6 @@ pub struct DefaultCompilerOpts { pub disassembly_ver: Option, pub prim_map: Rc, Rc>>, pub dialect: AcceptedDialect, - - known_dialects: Rc>, } pub fn create_prim_map() -> Rc, Rc>> { @@ -242,6 +251,11 @@ impl CompilerOpts for DefaultCompilerOpts { copy.start_env = start_env; Rc::new(copy) } + fn set_prim_map(&self, prims: Rc, Rc>>) -> Rc { + let mut copy = self.clone(); + copy.prim_map = prims; + Rc::new(copy) + } fn read_new_file( &self, @@ -249,9 +263,13 @@ impl CompilerOpts for DefaultCompilerOpts { filename: String, ) -> Result<(String, Vec), CompileErr> { if filename == "*macros*" { - return Ok((filename, STANDARD_MACROS.clone().as_bytes().to_vec())); - } else if let Some(content) = self.known_dialects.get(&filename) { - return Ok((filename, content.as_bytes().to_vec())); + if self.dialect().strict { + return Ok((filename, ADVANCED_MACROS.bytes().collect())); + } else { + return Ok((filename, STANDARD_MACROS.bytes().collect())); + } + } else if let Some(dialect) = KNOWN_DIALECTS.get(&filename) { + return Ok((filename, dialect.content.bytes().collect())); } for dir in self.include_dirs.iter() { @@ -304,7 +322,6 @@ impl DefaultCompilerOpts { dialect: AcceptedDialect::default(), prim_map: create_prim_map(), disassembly_ver: None, - known_dialects: Rc::new(KNOWN_DIALECTS.clone()), } } } diff --git a/src/compiler/comptypes.rs b/src/compiler/comptypes.rs index bc1da1e19..8f5b2bb62 100644 --- a/src/compiler/comptypes.rs +++ b/src/compiler/comptypes.rs @@ -251,6 +251,8 @@ pub struct DefmacData { pub args: Rc, /// The program appearing in the macro definition. pub program: Rc, + /// Whether this is an an advanced macro. + pub advanced: bool, } /// Information from a constant definition. @@ -436,6 +438,8 @@ pub trait CompilerOpts { fn set_code_generator(&self, new_compiler: PrimaryCodegen) -> Rc; /// Set the environment shape to assume. fn set_start_env(&self, start_env: Option>) -> Rc; + /// Set the primitive map in use so we can add custom primitives. + fn set_prim_map(&self, new_map: Rc, Rc>>) -> Rc; /// Using the search paths list we have, try to read a file by name, /// Returning the expanded path to the file and its content. @@ -596,6 +600,38 @@ impl CompileForm { } } +pub fn generate_defmacro_sexp(mac: &DefmacData) -> Rc { + if mac.advanced { + Rc::new(SExp::Cons( + mac.loc.clone(), + Rc::new(SExp::atom_from_string(mac.loc.clone(), "defmac")), + Rc::new(SExp::Cons( + mac.loc.clone(), + Rc::new(SExp::atom_from_vec(mac.nl.clone(), &mac.name)), + Rc::new(SExp::Cons( + mac.loc.clone(), + mac.args.clone(), + Rc::new(SExp::Cons( + mac.loc.clone(), + mac.program.exp.to_sexp(), + Rc::new(SExp::Nil(mac.loc.clone())), + )), + )), + )), + )) + } else { + Rc::new(SExp::Cons( + mac.loc.clone(), + Rc::new(SExp::atom_from_string(mac.loc.clone(), "defmacro")), + Rc::new(SExp::Cons( + mac.loc.clone(), + Rc::new(SExp::atom_from_vec(mac.nl.clone(), &mac.name)), + mac.program.to_sexp(), + )), + )) + } +} + impl HelperForm { /// Get a reference to the HelperForm's name. pub fn name(&self) -> &Vec { @@ -646,15 +682,7 @@ impl HelperForm { ], )), }, - HelperForm::Defmacro(mac) => Rc::new(SExp::Cons( - mac.loc.clone(), - Rc::new(SExp::atom_from_string(mac.loc.clone(), "defmacro")), - Rc::new(SExp::Cons( - mac.loc.clone(), - Rc::new(SExp::atom_from_vec(mac.nl.clone(), &mac.name)), - mac.program.to_sexp(), - )), - )), + HelperForm::Defmacro(mac) => generate_defmacro_sexp(mac), HelperForm::Defun(inline, defun) => { let di_string = "defun-inline".to_string(); let d_string = "defun".to_string(); diff --git a/src/compiler/dialect.rs b/src/compiler/dialect.rs index 305a0048c..e3f876e93 100644 --- a/src/compiler/dialect.rs +++ b/src/compiler/dialect.rs @@ -10,6 +10,7 @@ use crate::compiler::sexp::decode_string; #[derive(Clone, Debug, Default)] pub struct AcceptedDialect { pub stepping: Option, + pub strict: bool, } /// A package containing the content we should insert when a dialect include is @@ -27,23 +28,55 @@ lazy_static! { ( "*standard-cl-21*", DialectDescription { - accepted: AcceptedDialect { stepping: Some(21) }, + accepted: AcceptedDialect { + stepping: Some(21), + ..AcceptedDialect::default() + }, content: indoc! {"( (defconstant *chialisp-version* 21) )"} .to_string(), }, ), + ( + "*strict-cl-21*", + DialectDescription { + accepted: AcceptedDialect { + stepping: Some(21), + strict: true, + }, + content: indoc! {"( + (defconstant *chialisp-version* 22) + )"} + .to_string(), + }, + ), ( "*standard-cl-22*", DialectDescription { - accepted: AcceptedDialect { stepping: Some(22) }, + accepted: AcceptedDialect { + stepping: Some(22), + strict: false, + }, content: indoc! {"( (defconstant *chialisp-version* 22) )"} .to_string(), }, ), + ( + "*standard-cl-23*", + DialectDescription { + accepted: AcceptedDialect { + stepping: Some(23), + strict: true, + }, + content: indoc! {"( + (defconstant *chialisp-version* 23) + )"} + .to_string(), + }, + ), ]; for (n, v) in dialect_list.iter() { dialects.insert(n.to_string(), v.clone()); diff --git a/src/compiler/evaluate.rs b/src/compiler/evaluate.rs index 664c2749e..940b5fe92 100644 --- a/src/compiler/evaluate.rs +++ b/src/compiler/evaluate.rs @@ -239,7 +239,7 @@ fn get_bodyform_from_arginput(l: &Srcloc, arginput: &ArgInputs) -> Rc // // It's possible this will result in irreducible (unknown at compile time) // argument expressions. -fn create_argument_captures( +pub fn create_argument_captures( argument_captures: &mut HashMap, Rc>, formed_arguments: &ArgInputs, function_arg_spec: Rc, @@ -1618,6 +1618,7 @@ impl<'info> Evaluator { self.prims.clone(), prim, args, + None, Some(PRIM_RUN_LIMIT), ) .map_err(|e| match e { diff --git a/src/compiler/frontend.rs b/src/compiler/frontend.rs index 34b5b4b0a..a2c1727bd 100644 --- a/src/compiler/frontend.rs +++ b/src/compiler/frontend.rs @@ -255,16 +255,18 @@ fn make_let_bindings( opts: Rc, body: Rc, ) -> Result>, CompileErr> { - let err = Err(CompileErr( - body.loc(), - "Bad binding tail ".to_string() + &body.to_string(), - )); + let err = Err(CompileErr(body.loc(), format!("Bad binding tail {body:?}"))); + let do_atomize = if !opts.dialect().strict { + |a: &SExp| -> SExp { a.atomize() } + } else { + |a: &SExp| -> SExp { a.clone() } + }; match body.borrow() { SExp::Nil(_) => Ok(vec![]), SExp::Cons(_, head, tl) => head .proper_list() .filter(|x| x.len() == 2) - .map(|x| match (x[0].atomize(), &x[1]) { + .map(|x| match (do_atomize(&x[0]), &x[1]) { (SExp::Atom(l, name), expr) => { let compiled_body = compile_bodyform(opts.clone(), Rc::new(expr.clone()))?; let mut result = Vec::new(); @@ -621,6 +623,7 @@ fn compile_defmacro( name, args: args.clone(), program: Rc::new(p), + advanced: false, }) }) } @@ -709,7 +712,7 @@ pub fn compile_helperform( matched.args, ) .map(Some) - } else if matched.op_name == b"defmacro" { + } else if matched.op_name == b"defmacro" || matched.op_name == b"defmac" { compile_defmacro( opts, l, diff --git a/src/compiler/optimize/mod.rs b/src/compiler/optimize/mod.rs index 0b7c666d7..7758eb911 100644 --- a/src/compiler/optimize/mod.rs +++ b/src/compiler/optimize/mod.rs @@ -357,6 +357,7 @@ pub fn optimize_expr( opts.prim_map(), code.to_sexp(), Rc::new(SExp::Nil(l)), + None, Some(CONST_FOLD_LIMIT), ) .map(|x| { diff --git a/src/compiler/preprocessor.rs b/src/compiler/preprocessor.rs deleted file mode 100644 index 640f90b4b..000000000 --- a/src/compiler/preprocessor.rs +++ /dev/null @@ -1,344 +0,0 @@ -use std::borrow::Borrow; -use std::collections::HashMap; -use std::rc::Rc; - -use clvmr::allocator::Allocator; - -use crate::classic::clvm::__type_compatibility__::{Bytes, BytesFromType}; - -use crate::compiler::cldb::hex_to_modern_sexp; -use crate::compiler::compiler::KNOWN_DIALECTS; -use crate::compiler::comptypes::{CompileErr, CompilerOpts, IncludeDesc, IncludeProcessType}; -use crate::compiler::runtypes::RunFailure; -use crate::compiler::sexp::{decode_string, enlist, parse_sexp, SExp}; -use crate::compiler::srcloc::Srcloc; -use crate::util::ErrInto; - -/// Determines how an included file is used. -/// -/// Basic means that the file contains helper forms to include in the program. -/// Processed means that some kind of processing is done and the result is a named -/// constant. -#[derive(Clone, Debug)] -enum IncludeType { - /// Normal include in chialisp. The code in the target file will join the - /// program being compiled. - Basic(IncludeDesc), - /// The data in the file will be processed in some way and the result will - /// live in a named constant. - Processed(IncludeDesc, IncludeProcessType, Vec), -} - -/// Given a specification of an include file, load up the forms inside it and -/// return them (or an error if the file couldn't be read or wasn't a list). -pub fn process_include( - opts: Rc, - include: IncludeDesc, -) -> Result>, CompileErr> { - let filename_and_content = opts.read_new_file(opts.filename(), decode_string(&include.name))?; - let content = filename_and_content.1; - let start_of_file = Srcloc::start(&decode_string(&include.name)); - - // Because we're also subsequently returning CompileErr later in the pipe, - // this needs an explicit err map. - parse_sexp(start_of_file.clone(), content.iter().copied()) - .err_into() - .and_then(|x| match x[0].proper_list() { - None => Err(CompileErr( - start_of_file, - "Includes should contain a list of forms".to_string(), - )), - Some(v) => Ok(v.iter().map(|x| Rc::new(x.clone())).collect()), - }) -} - -fn compose_defconst(loc: Srcloc, name: &[u8], sexp: Rc) -> Rc { - Rc::new(enlist( - loc.clone(), - &[ - Rc::new(SExp::Atom(loc.clone(), b"defconst".to_vec())), - Rc::new(SExp::Atom(loc.clone(), name.to_vec())), - Rc::new(SExp::Cons( - loc.clone(), - Rc::new(SExp::Atom(loc, vec![1])), - sexp, - )), - ], - )) -} - -fn process_embed( - loc: Srcloc, - opts: Rc, - fname: &str, - kind: IncludeProcessType, - constant_name: &[u8], -) -> Result>, CompileErr> { - let mut allocator = Allocator::new(); - let run_to_compile_err = |e| match e { - RunFailure::RunExn(l, x) => CompileErr( - l, - format!("failed to convert compiled clvm to expression: throw ({x})"), - ), - RunFailure::RunErr(l, e) => CompileErr( - l, - format!("failed to convert compiled clvm to expression: {e}"), - ), - }; - - let (full_name, content) = opts.read_new_file(opts.filename(), fname.to_string())?; - let content = match kind { - IncludeProcessType::Bin => Rc::new(SExp::Atom(loc.clone(), content)), - IncludeProcessType::Hex => hex_to_modern_sexp( - &mut allocator, - &HashMap::new(), - loc.clone(), - &decode_string(&content), - ) - .map_err(run_to_compile_err)?, - IncludeProcessType::SExpression => { - let parsed = parse_sexp(Srcloc::start(&full_name), content.iter().copied()) - .map_err(|e| CompileErr(e.0, e.1))?; - if parsed.len() != 1 { - return Err(CompileErr( - loc, - format!("More than one form (or empty data) in {fname}"), - )); - } - - parsed[0].clone() - } - }; - - Ok(vec![compose_defconst(loc, constant_name, content)]) -} - -/* Expand include inline in forms */ -fn process_pp_form( - opts: Rc, - includes: &mut Vec, - body: Rc, -) -> Result>, CompileErr> { - // Support using the preprocessor to collect dependencies recursively. - let recurse_dependencies = |opts: Rc, - includes: &mut Vec, - desc: IncludeDesc| - -> Result<(), CompileErr> { - let name_string = decode_string(&desc.name); - if KNOWN_DIALECTS.contains_key(&name_string) { - return Ok(()); - } - - let (full_name, content) = opts.read_new_file(opts.filename(), name_string)?; - includes.push(IncludeDesc { - name: full_name.as_bytes().to_vec(), - ..desc - }); - - let parsed = parse_sexp(Srcloc::start(&full_name), content.iter().copied()) - .map_err(|e| CompileErr(e.0, e.1))?; - if parsed.is_empty() { - return Ok(()); - } - - let program_form = parsed[0].clone(); - if let Some(l) = program_form.proper_list() { - for elt in l.iter() { - process_pp_form(opts.clone(), includes, Rc::new(elt.clone()))?; - } - } - - Ok(()) - }; - - let include_type: Option = body - .proper_list() - .map(|x| x.iter().map(|elt| elt.atomize()).collect()) - .map(|x: Vec| { - match &x[..] { - [SExp::Atom(kw, inc), SExp::Atom(nl, fname)] => { - if inc == b"include" { - return Ok(Some(IncludeType::Basic(IncludeDesc { - kw: kw.clone(), - nl: nl.clone(), - kind: None, - name: fname.clone(), - }))); - } - } - - // Accepted forms: - // (embed-file varname bin file.dat) - // (embed-file varname sexp file.clvm) - // (embed-file varname hex file.hex) - [SExp::Atom(kl, embed_file), SExp::Atom(_, name), SExp::Atom(_, kind), SExp::Atom(nl, fname)] => { - if embed_file == b"embed-file" { - if kind == b"hex" { - return Ok(Some(IncludeType::Processed( - IncludeDesc { - kw: kl.clone(), - nl: nl.clone(), - kind: Some(IncludeProcessType::Hex), - name: fname.clone(), - }, - IncludeProcessType::Hex, - name.clone() - ))); - } else if kind == b"bin" { - return Ok(Some(IncludeType::Processed( - IncludeDesc { - kw: kl.clone(), - nl: nl.clone(), - kind: Some(IncludeProcessType::Bin), - name: fname.clone(), - }, - IncludeProcessType::Bin, - name.clone(), - ))); - } else if kind == b"sexp" { - return Ok(Some(IncludeType::Processed( - IncludeDesc { - kw: kl.clone(), - nl: nl.clone(), - kind: Some(IncludeProcessType::SExpression), - name: fname.clone(), - }, - IncludeProcessType::SExpression, - name.clone(), - ))); - } else { - return Err(CompileErr( - body.loc(), - format!("bad include kind in embed-file {body}") - )); - } - } - } - - [] => {} - - // Ensure that legal empty or atom expressions don't try include - _ => { - // Include is only allowed as a proper form. It's a keyword in - // this language. - if let SExp::Atom(_, inc) = &x[0] { - if inc == b"include" { - return Err(CompileErr( - body.loc(), - format!("bad tail in include {body}"), - )); - } - } - } - } - - Ok(None) - }) - .unwrap_or_else(|| Ok(None))?; - - match include_type { - Some(IncludeType::Basic(f)) => { - recurse_dependencies(opts.clone(), includes, f.clone())?; - process_include(opts, f) - } - Some(IncludeType::Processed(f, kind, name)) => process_embed( - body.loc(), - opts, - &Bytes::new(Some(BytesFromType::Raw(f.name.to_vec()))).decode(), - kind, - &name, - ), - _ => Ok(vec![body]), - } -} - -fn preprocess_( - opts: Rc, - includes: &mut Vec, - body: Rc, -) -> Result>, CompileErr> { - match body.borrow() { - SExp::Cons(_, head, rest) => match rest.borrow() { - SExp::Nil(_nl) => process_pp_form(opts, includes, head.clone()), - _ => { - let lst = process_pp_form(opts.clone(), includes, head.clone())?; - let mut rs = preprocess_(opts, includes, rest.clone())?; - let mut result = lst; - result.append(&mut rs); - Ok(result) - } - }, - _ => Ok(vec![body]), - } -} - -fn inject_std_macros(body: Rc) -> SExp { - match body.proper_list() { - Some(v) => { - let include_form = Rc::new(SExp::Cons( - body.loc(), - Rc::new(SExp::atom_from_string(body.loc(), "include")), - Rc::new(SExp::Cons( - body.loc(), - Rc::new(SExp::quoted_from_string(body.loc(), "*macros*")), - Rc::new(SExp::Nil(body.loc())), - )), - )); - let mut v_clone: Vec> = v.iter().map(|x| Rc::new(x.clone())).collect(); - let include_copy: &SExp = include_form.borrow(); - v_clone.insert(0, Rc::new(include_copy.clone())); - enlist(body.loc(), &v_clone) - } - _ => { - let body_clone: &SExp = body.borrow(); - body_clone.clone() - } - } -} - -/// Run the preprocessor over this code, which at present just finds (include ...) -/// forms in the source and includes the content of in a combined list. If a file -/// can't be found via the directory list in CompilerOrs. -pub fn preprocess( - opts: Rc, - includes: &mut Vec, - cmod: Rc, -) -> Result>, CompileErr> { - let tocompile = if opts.stdenv() { - let injected = inject_std_macros(cmod); - Rc::new(injected) - } else { - cmod - }; - - preprocess_(opts, includes, tocompile) -} - -/// Visit all files used during compilation. -/// This reports a list of all files used while compiling the input file, via any -/// form that causes compilation to include another file. The file names are path -/// expanded based on the include path they were found in (from opts). -pub fn gather_dependencies( - opts: Rc, - real_input_path: &str, - file_content: &str, -) -> Result, CompileErr> { - let mut includes = Vec::new(); - let loc = Srcloc::start(real_input_path); - - let parsed = parse_sexp(loc.clone(), file_content.bytes())?; - - if parsed.is_empty() { - return Ok(vec![]); - } - - if let Some(l) = parsed[0].proper_list() { - for elt in l.iter() { - process_pp_form(opts.clone(), &mut includes, Rc::new(elt.clone()))?; - } - } else { - return Err(CompileErr(loc, "malformed list body".to_string())); - }; - - Ok(includes) -} diff --git a/src/compiler/preprocessor/macros.rs b/src/compiler/preprocessor/macros.rs new file mode 100644 index 000000000..68a0c0b0d --- /dev/null +++ b/src/compiler/preprocessor/macros.rs @@ -0,0 +1,441 @@ +use std::borrow::Borrow; +use std::collections::HashMap; +use std::rc::Rc; + +use num_bigint::ToBigInt; +use num_traits::ToPrimitive; + +use crate::classic::clvm::__type_compatibility__::{bi_one, bi_zero}; + +use crate::compiler::clvm::PrimOverride; +use crate::compiler::comptypes::{CompileErr, CompilerOpts}; +use crate::compiler::runtypes::RunFailure; +use crate::compiler::sexp::{decode_string, printable, SExp}; +use crate::compiler::srcloc::Srcloc; +use crate::util::{number_from_u8, Number}; + +// If the bodyform represents a constant, only match a quoted string. +fn match_quoted_string(body: Rc) -> Result<(Srcloc, Vec), CompileErr> { + match body.borrow() { + SExp::QuotedString(_, b'x', _) => {} + SExp::QuotedString(al, _, an) => return Ok((al.clone(), an.clone())), + _ => {} + } + + Err(CompileErr(body.loc(), "string required".to_string())) +} + +fn match_atom(body: Rc) -> Result<(Srcloc, Vec), CompileErr> { + if let SExp::Atom(al, an) = body.borrow() { + return Ok((al.clone(), an.clone())); + } + Err(CompileErr(body.loc(), "atom required".to_string())) +} + +enum MatchedNumber { + MatchedInt(Srcloc, Number), + MatchedHex(Srcloc, Vec), +} + +fn match_number(body: Rc) -> Result { + match body.borrow() { + SExp::Integer(il, n) => { + return Ok(MatchedNumber::MatchedInt(il.clone(), n.clone())); + } + SExp::QuotedString(ql, b'x', b) => { + return Ok(MatchedNumber::MatchedHex(ql.clone(), b.clone())); + } + SExp::Atom(al, b) => { + // An atom with unprintable characters is rendered as an integer. + if !printable(b) { + let to_integer = number_from_u8(b); + return Ok(MatchedNumber::MatchedInt(al.clone(), to_integer)); + } + } + SExp::Nil(il) => { + return Ok(MatchedNumber::MatchedInt(il.clone(), bi_zero())); + } + _ => {} + } + + Err(CompileErr(body.loc(), "Not a number".to_string())) +} + +fn numeric_value(body: Rc) -> Result { + match match_number(body.clone())? { + MatchedNumber::MatchedInt(_, n) => Ok(n), + MatchedNumber::MatchedHex(_, h) => Ok(number_from_u8(&h)), + } +} + +fn usize_value(body: Rc) -> Result { + let n = numeric_value(body.clone())?; + if let Some(res) = n.to_usize() { + Ok(res) + } else { + Err(CompileErr(body.loc(), "Value out of range".to_string())) + } +} + +/// A container for a function to evaluate in advanced preprocessor macros. +/// We use this trait (which is very similar to the extension trait in Evaluator) +/// as a definite handler for a specific named form, so optional returns aren't +/// needed. These are held in a collection and looked up. To be maximally +/// conservative with typing and lifetime, we hold these via Rc. +pub trait ExtensionFunction { + fn want_interp(&self) -> bool { + true + } + fn required_args(&self) -> Option; + fn try_eval(&self, loc: &Srcloc, args: &[Rc]) -> Result, CompileErr>; +} + +struct StringQ; + +impl StringQ { + fn create() -> Rc { + Rc::new(StringQ) + } +} + +impl ExtensionFunction for StringQ { + fn required_args(&self) -> Option { + Some(1) + } + + fn try_eval(&self, loc: &Srcloc, args: &[Rc]) -> Result, CompileErr> { + let res = match match_quoted_string(args[0].clone()) { + Ok(_) => SExp::Integer(loc.clone(), bi_one()), + _ => SExp::Nil(loc.clone()), + }; + + Ok(Rc::new(res)) + } +} + +struct NumberQ; + +impl NumberQ { + fn create() -> Rc { + Rc::new(NumberQ) + } +} + +impl ExtensionFunction for NumberQ { + fn required_args(&self) -> Option { + Some(1) + } + + fn try_eval(&self, loc: &Srcloc, args: &[Rc]) -> Result, CompileErr> { + let res = match match_number(args[0].clone()) { + Ok(_) => SExp::Integer(loc.clone(), bi_one()), + _ => SExp::Nil(loc.clone()), + }; + + Ok(Rc::new(res)) + } +} + +struct SymbolQ; + +impl SymbolQ { + fn create() -> Rc { + Rc::new(SymbolQ) + } +} + +impl ExtensionFunction for SymbolQ { + fn required_args(&self) -> Option { + Some(1) + } + + fn try_eval(&self, loc: &Srcloc, args: &[Rc]) -> Result, CompileErr> { + let res = match match_atom(args[0].clone()) { + Ok(_) => SExp::Integer(loc.clone(), bi_one()), + _ => SExp::Nil(loc.clone()), + }; + + Ok(Rc::new(res)) + } +} + +struct SymbolToString; + +impl SymbolToString { + fn create() -> Rc { + Rc::new(SymbolToString) + } +} + +impl ExtensionFunction for SymbolToString { + fn required_args(&self) -> Option { + Some(1) + } + + fn try_eval(&self, _loc: &Srcloc, args: &[Rc]) -> Result, CompileErr> { + let (loc, value) = match_atom(args[0].clone())?; + Ok(Rc::new(SExp::QuotedString(loc, b'\"', value))) + } +} + +struct StringToSymbol; + +impl StringToSymbol { + fn create() -> Rc { + Rc::new(StringToSymbol) + } +} + +impl ExtensionFunction for StringToSymbol { + fn required_args(&self) -> Option { + Some(1) + } + + fn try_eval(&self, _loc: &Srcloc, args: &[Rc]) -> Result, CompileErr> { + let (loc, value) = match_quoted_string(args[0].clone())?; + Ok(Rc::new(SExp::Atom(loc, value))) + } +} + +struct StringAppend; + +impl StringAppend { + fn create() -> Rc { + Rc::new(StringAppend) + } +} + +impl ExtensionFunction for StringAppend { + fn required_args(&self) -> Option { + None + } + + fn try_eval(&self, loc: &Srcloc, args: &[Rc]) -> Result, CompileErr> { + let mut out_vec = Vec::new(); + let mut out_loc = None; + for a in args.iter() { + let (loc, mut value) = match_quoted_string(a.clone())?; + if out_loc.is_none() { + out_loc = Some(loc); + } + out_vec.append(&mut value); + } + Ok(Rc::new(SExp::QuotedString( + out_loc.unwrap_or_else(|| loc.clone()), + b'\"', + out_vec, + ))) + } +} + +struct NumberToString; + +impl NumberToString { + fn create() -> Rc { + Rc::new(NumberToString) + } +} + +impl ExtensionFunction for NumberToString { + fn required_args(&self) -> Option { + Some(1) + } + + fn try_eval(&self, _loc: &Srcloc, args: &[Rc]) -> Result, CompileErr> { + let match_res = match_number(args[0].clone())?; + let (use_loc, int_val) = match &match_res { + MatchedNumber::MatchedInt(l, i) => (l.clone(), i.clone()), + MatchedNumber::MatchedHex(l, h) => (l.clone(), number_from_u8(h)), + }; + Ok(Rc::new(SExp::QuotedString( + use_loc, + b'\"', + int_val.to_string().as_bytes().to_vec(), + ))) + } +} + +struct StringToNumber; + +impl StringToNumber { + fn create() -> Rc { + Rc::new(StringToNumber) + } +} + +impl ExtensionFunction for StringToNumber { + fn required_args(&self) -> Option { + Some(1) + } + + fn try_eval(&self, _loc: &Srcloc, args: &[Rc]) -> Result, CompileErr> { + let (loc, value) = match_quoted_string(args[0].clone())?; + if let Ok(cvt_bi) = decode_string(&value).parse::() { + Ok(Rc::new(SExp::Integer(loc, cvt_bi))) + } else { + Err(CompileErr(loc, "bad number".to_string())) + } + } +} + +struct StringLength; + +impl StringLength { + fn create() -> Rc { + Rc::new(StringLength) + } +} + +impl ExtensionFunction for StringLength { + fn required_args(&self) -> Option { + Some(1) + } + + fn try_eval(&self, _loc: &Srcloc, args: &[Rc]) -> Result, CompileErr> { + let (loc, value) = match_quoted_string(args[0].clone())?; + if let Some(len_bi) = value.len().to_bigint() { + return Ok(Rc::new(SExp::Integer(loc, len_bi))); + } + Err(CompileErr( + args[0].loc(), + "Error getting string length".to_string(), + )) + } +} + +struct Substring; + +impl Substring { + fn create() -> Rc { + Rc::new(Substring) + } +} + +impl ExtensionFunction for Substring { + fn required_args(&self) -> Option { + Some(3) + } + + fn try_eval(&self, _loc: &Srcloc, args: &[Rc]) -> Result, CompileErr> { + let start_element = usize_value(args[1].clone())?; + let end_element = usize_value(args[2].clone())?; + + match args[0].borrow() { + SExp::QuotedString(l, ch, s) => { + if start_element > end_element || start_element > s.len() || end_element > s.len() { + return Err(CompileErr( + l.clone(), + "start greater than end in substring".to_string(), + )); + } + let result_value: Vec = s + .iter() + .take(end_element) + .skip(start_element) + .copied() + .collect(); + Ok(Rc::new(SExp::QuotedString(l.clone(), *ch, result_value))) + } + _ => Err(CompileErr(args[0].loc(), "Not a string".to_string())), + } + } +} + +/// An evaluator extension for the preprocessor. +/// +/// Implements scheme like conversion functions for handling chialisp programs and +/// bits of them. +/// +/// These functions are provided: +/// +/// Enhanced versions of builtin macros: +/// +/// if -- first class short circuiting, no round trip to CLVM space +/// list -- simple own implementation +/// c -- cons preserving exact input values +/// f -- first and rest exactly preserving part of cons. +/// r -- +/// +/// Queries +/// +/// string? +/// number? +/// symbol? +/// +/// Basic conversion +/// +/// string->symbol +/// symbol->string +/// string->number +/// number->string +/// +/// Working with values +/// +/// string-append s0 s1 ... +/// string-length +/// substring s start end +/// +pub struct PreprocessorExtension { + extfuns: HashMap, Rc>, +} + +impl PrimOverride for PreprocessorExtension { + fn try_handle( + &self, + head: Rc, + _context: Rc, + tail: Rc, + ) -> Result>, RunFailure> { + if let SExp::Atom(hl, head_atom) = head.borrow() { + let have_args: Vec> = if let Some(args_list) = tail.proper_list() { + args_list.into_iter().map(Rc::new).collect() + } else { + return Ok(None); + }; + + if let Some(extension) = self.extfuns.get(head_atom) { + let res = extension.try_eval(hl, &have_args)?; + return Ok(Some(res)); + } + } + + Ok(None) + } +} + +impl PreprocessorExtension { + pub fn new() -> Self { + let extfuns = [ + (b"string?".to_vec(), StringQ::create()), + (b"number?".to_vec(), NumberQ::create()), + (b"symbol?".to_vec(), SymbolQ::create()), + (b"string->symbol".to_vec(), StringToSymbol::create()), + (b"symbol->string".to_vec(), SymbolToString::create()), + (b"string->number".to_vec(), StringToNumber::create()), + (b"number->string".to_vec(), NumberToString::create()), + (b"string-append".to_vec(), StringAppend::create()), + (b"string-length".to_vec(), StringLength::create()), + (b"substring".to_vec(), Substring::create()), + ]; + PreprocessorExtension { + extfuns: HashMap::from(extfuns), + } + } + + /// Introduce new primitive names for the operators we use to bootstrap macros. + pub fn enrich_prims(&self, opts: Rc) -> Rc { + let old_prim_map = opts.prim_map(); + let old_prim_map_borrowed: &HashMap, Rc> = old_prim_map.borrow(); + let mut new_prim_map_cloned = old_prim_map_borrowed.clone(); + let srcloc = Srcloc::start("*defmac*"); + + for (f, _) in self.extfuns.iter() { + if !new_prim_map_cloned.contains_key(f) { + new_prim_map_cloned + .insert(f.clone(), Rc::new(SExp::Atom(srcloc.clone(), f.clone()))); + } + } + + opts.set_prim_map(Rc::new(new_prim_map_cloned)) + } +} diff --git a/src/compiler/preprocessor/mod.rs b/src/compiler/preprocessor/mod.rs new file mode 100644 index 000000000..69fadd9dc --- /dev/null +++ b/src/compiler/preprocessor/mod.rs @@ -0,0 +1,600 @@ +mod macros; + +use std::borrow::Borrow; +use std::collections::HashMap; +use std::rc::Rc; + +use clvmr::allocator::Allocator; + +use crate::classic::clvm_tools::binutils::assemble; +use crate::classic::clvm_tools::stages::stage_0::{DefaultProgramRunner, TRunProgram}; + +use crate::compiler::cldb::hex_to_modern_sexp; +use crate::compiler::clvm; +use crate::compiler::clvm::truthy; +use crate::compiler::comptypes::{ + BodyForm, CompileErr, CompileForm, CompilerOpts, HelperForm, IncludeDesc, IncludeProcessType, +}; +use crate::compiler::dialect::{detect_modern, KNOWN_DIALECTS}; +use crate::compiler::evaluate::{create_argument_captures, ArgInputs}; +use crate::compiler::frontend::{compile_helperform, frontend}; +use crate::compiler::preprocessor::macros::PreprocessorExtension; +use crate::compiler::rename::rename_args_helperform; +use crate::compiler::runtypes::RunFailure; +use crate::compiler::sexp::{ + decode_string, enlist, parse_sexp, Atom, NodeSel, SExp, SelectNode, ThisNode, +}; +use crate::compiler::srcloc::Srcloc; +use crate::util::ErrInto; + +/// Determines how an included file is used. +/// +/// Basic means that the file contains helper forms to include in the program. +/// Processed means that some kind of processing is done and the result is a named +/// constant. +#[derive(Clone, Debug)] +enum IncludeType { + /// Normal include in chialisp. The code in the target file will join the + /// program being compiled. + Basic(IncludeDesc), + /// The data in the file will be processed in some way and the result will + /// live in a named constant. + Processed(IncludeDesc, IncludeProcessType, Vec), +} + +struct Preprocessor { + opts: Rc, + ppext: Rc, + runner: Rc, + helpers: Vec, + strict: bool, + stored_macros: HashMap, Rc>, +} + +fn compose_defconst(loc: Srcloc, name: &[u8], sexp: Rc) -> Rc { + Rc::new(enlist( + loc.clone(), + &[ + Rc::new(SExp::Atom(loc.clone(), b"defconst".to_vec())), + Rc::new(SExp::Atom(loc.clone(), name.to_vec())), + Rc::new(SExp::Cons( + loc.clone(), + Rc::new(SExp::Atom(loc, vec![1])), + sexp, + )), + ], + )) +} + +fn make_defmac_name(name: &[u8]) -> Vec { + let mut res = b"__chia__defmac__".to_vec(); + res.append(&mut name.to_vec()); + res +} + +fn nilize(v: Rc) -> Rc { + if let SExp::Cons(l, a, b) = v.borrow() { + let a_conv = nilize(a.clone()); + let b_conv = nilize(b.clone()); + if Rc::as_ptr(&a_conv) == Rc::as_ptr(a) && Rc::as_ptr(&b_conv) == Rc::as_ptr(b) { + v.clone() + } else { + Rc::new(SExp::Cons(l.clone(), a_conv, b_conv)) + } + } else if !truthy(v.clone()) { + Rc::new(SExp::Nil(v.loc())) + } else { + v + } +} + +impl Preprocessor { + pub fn new(opts: Rc) -> Self { + let runner = Rc::new(DefaultProgramRunner::new()); + let ppext = Rc::new(PreprocessorExtension::new()); + let opts_prims = ppext.enrich_prims(opts.clone()); + Preprocessor { + opts: opts_prims, + ppext, + runner, + helpers: Vec::new(), + strict: opts.dialect().strict, + stored_macros: HashMap::default(), + } + } + + /// Given a specification of an include file, load up the forms inside it and + /// return them (or an error if the file couldn't be read or wasn't a list). + pub fn process_include( + &mut self, + includes: &mut Vec, + include: &IncludeDesc, + ) -> Result>, CompileErr> { + let filename_and_content = self + .opts + .read_new_file(self.opts.filename(), decode_string(&include.name))?; + let content = filename_and_content.1; + let start_of_file = Srcloc::start(&decode_string(&include.name)); + + // Because we're also subsequently returning CompileErr later in the pipe, + // this needs an explicit err map. + let parsed: Vec> = parse_sexp(start_of_file.clone(), content.iter().copied()) + .err_into() + .and_then(|x| match x[0].proper_list() { + None => Err(CompileErr( + start_of_file, + "Includes should contain a list of forms".to_string(), + )), + Some(v) => Ok(v.iter().map(|x| Rc::new(x.clone())).collect()), + })?; + + if self.strict { + let mut result = Vec::new(); + for p in parsed.into_iter() { + let mut new_forms = self.process_pp_form(includes, p.clone())?; + result.append(&mut new_forms); + } + + Ok(result) + } else { + Ok(parsed) + } + } + + fn process_embed( + &mut self, + loc: Srcloc, + fname: &str, + kind: &IncludeProcessType, + constant_name: &[u8], + ) -> Result>, CompileErr> { + let mut allocator = Allocator::new(); + let run_to_compile_err = |e| match e { + RunFailure::RunExn(l, x) => CompileErr( + l, + format!("failed to convert compiled clvm to expression: throw ({x})"), + ), + RunFailure::RunErr(l, e) => CompileErr( + l, + format!("failed to convert compiled clvm to expression: {e}"), + ), + }; + + let (full_name, content) = self + .opts + .read_new_file(self.opts.filename(), fname.to_string())?; + let content = match kind { + IncludeProcessType::Bin => Rc::new(SExp::Atom(loc.clone(), content)), + IncludeProcessType::Hex => hex_to_modern_sexp( + &mut allocator, + &HashMap::new(), + loc.clone(), + &decode_string(&content), + ) + .map_err(run_to_compile_err)?, + IncludeProcessType::SExpression => { + let parsed = parse_sexp(Srcloc::start(&full_name), content.iter().copied()) + .map_err(|e| CompileErr(e.0, e.1))?; + if parsed.len() != 1 { + return Err(CompileErr(loc, format!("More than one form in {fname}"))); + } + + parsed[0].clone() + } + }; + + Ok(vec![compose_defconst(loc, constant_name, content)]) + } + + // Support using the preprocessor to collect dependencies recursively. + fn recurse_dependencies( + &mut self, + includes: &mut Vec, + desc: IncludeDesc, + ) -> Result<(), CompileErr> { + let name_string = decode_string(&desc.name); + if KNOWN_DIALECTS.contains_key(&name_string) { + return Ok(()); + } + + let (full_name, content) = self.opts.read_new_file(self.opts.filename(), name_string)?; + includes.push(IncludeDesc { + name: full_name.as_bytes().to_vec(), + ..desc + }); + + let parsed = parse_sexp(Srcloc::start(&full_name), content.iter().copied()) + .map_err(|e| CompileErr(e.0, e.1))?; + if parsed.is_empty() { + return Ok(()); + } + + let program_form = parsed[0].clone(); + if let Some(l) = program_form.proper_list() { + for elt in l.iter() { + self.process_pp_form(includes, Rc::new(elt.clone()))?; + } + } + + Ok(()) + } + + fn add_helper(&mut self, h: HelperForm) { + for i in 0..=self.helpers.len() { + if i == self.helpers.len() { + self.helpers.push(h); + break; + } else if self.helpers[i].name() == h.name() { + self.helpers[i] = h; + break; + } + } + } + + // Check for and apply preprocessor level macros. + // This is maximally permissive. + fn expand_macros( + &mut self, + body: Rc, + start: bool, + ) -> Result>, CompileErr> { + if let SExp::Cons(l, f, r) = body.borrow() { + // First expand inner macros. + let first_expanded = self.expand_macros(f.clone(), true)?; + let rest_expanded = self.expand_macros(r.clone(), false)?; + let new_self = match (first_expanded, rest_expanded) { + (None, None) => Some(body.clone()), + (Some(f), None) => Some(Rc::new(SExp::Cons(l.clone(), f, r.clone()))), + (None, Some(r)) => Some(Rc::new(SExp::Cons(l.clone(), f.clone(), r))), + (Some(f), Some(r)) => Some(Rc::new(SExp::Cons(l.clone(), f, r))), + }; + + if !start { + return Ok(new_self); + } + + if let Ok(NodeSel::Cons((_, name), args)) = NodeSel::Cons(Atom::Here(()), ThisNode) + .select_nodes(new_self.clone().unwrap_or_else(|| body.clone())) + { + let defmac_name = make_defmac_name(&name); + + // See if it's a form that calls one of our macros. + for m in self.helpers.iter() { + if let HelperForm::Defun(_, mdata) = &m { + // We record upfront macros + if mdata.name != defmac_name { + continue; + } + + // The name matched, try calling it. + + // Form argument env. + let mut macro_arg_env = HashMap::new(); + let args_borrowed: &SExp = args.borrow(); + create_argument_captures( + &mut macro_arg_env, + &ArgInputs::Whole(Rc::new(BodyForm::Quoted(args_borrowed.clone()))), + mdata.args.clone(), + )?; + + let mut allocator = Allocator::new(); + let compiled_program = if let Some(compiled_program) = + self.stored_macros.get(&mdata.name) + { + compiled_program.clone() + } else { + // as inline defuns because they're closest to that + // semantically. + let mut symbol_table = HashMap::new(); + let new_program = CompileForm { + loc: body.loc(), + args: mdata.args.clone(), + include_forms: vec![], + helpers: self.helpers.clone(), + exp: mdata.body.clone(), + }; + + let program_sexp = Rc::new(SExp::Cons( + body.loc(), + Rc::new(SExp::Atom(body.loc(), b"mod".to_vec())), + new_program.to_sexp(), + )); + + let compiled_program = self.opts.set_stdenv(false).compile_program( + &mut allocator, + self.runner.clone(), + program_sexp, + &mut symbol_table, + )?; + self.stored_macros + .insert(mdata.name.clone(), Rc::new(compiled_program.clone())); + Rc::new(compiled_program) + }; + + let ppext: &PreprocessorExtension = self.ppext.borrow(); + let res = clvm::run( + &mut allocator, + self.runner.clone(), + self.opts.prim_map(), + compiled_program, + args.clone(), + Some(ppext), + None, + ) + .map(nilize) + .map_err(CompileErr::from)?; + + if let Some(final_result) = self.expand_macros(res.clone(), true)? { + return Ok(Some(final_result)); + } else { + return Ok(Some(res)); + } + } + } + } + + return Ok(new_self); + } + + Ok(None) + } + + // If it's a defmac (preprocessor level macro), add it to the evaulator. + fn decode_macro(&mut self, definition: Rc) -> Result, CompileErr> { + if let Ok(NodeSel::Cons( + (defmac_loc, kw), + NodeSel::Cons((nl, name), NodeSel::Cons(args, body)), + )) = NodeSel::Cons( + Atom::Here(()), + NodeSel::Cons(Atom::Here(()), NodeSel::Cons(ThisNode, ThisNode)), + ) + .select_nodes(definition.clone()) + { + let is_defmac = kw == b"defmac"; + if is_defmac + || kw == b"defmacro" + || kw == b"defun" + || kw == b"defun-inline" + || kw == b"defconst" + || kw == b"defconstant" + { + if is_defmac { + let target_defun = Rc::new(SExp::Cons( + defmac_loc.clone(), + Rc::new(SExp::atom_from_string(defmac_loc, "defun")), + Rc::new(SExp::Cons( + nl.clone(), + Rc::new(SExp::Atom(nl, make_defmac_name(&name))), + Rc::new(SExp::Cons(args.loc(), args.clone(), body)), + )), + )); + if let Some(helper) = compile_helperform(self.opts.clone(), target_defun)? { + self.add_helper(rename_args_helperform(&helper)?); + } else { + return Err(CompileErr( + definition.loc(), + "defmac found but couldn't be converted to function".to_string(), + )); + } + } else if let Some(helper) = compile_helperform(self.opts.clone(), definition)? { + self.add_helper(rename_args_helperform(&helper)?); + } + } + } + + Ok(None) + } + + /* Expand include inline in forms */ + fn process_pp_form( + &mut self, + includes: &mut Vec, + unexpanded_body: Rc, + ) -> Result>, CompileErr> { + let body = self + .expand_macros(unexpanded_body.clone(), true)? + .unwrap_or_else(|| unexpanded_body.clone()); + // Support using the preprocessor to collect dependencies recursively. + let included: Option = body + .proper_list() + .map(|x| x.iter().map(|elt| elt.atomize()).collect()) + .map(|x: Vec| { + match &x[..] { + [SExp::Atom(kw, inc), SExp::Atom(nl, fname)] => { + if "include".as_bytes().to_vec() == *inc { + return Ok(Some(IncludeType::Basic( + IncludeDesc { + kw: kw.clone(), + nl: nl.clone(), + name: fname.clone(), + kind: None, + } + ))); + } + } + [SExp::Atom(kw, inc), SExp::QuotedString(nl, _, fname)] => { + if "include".as_bytes().to_vec() == *inc { + return Ok(Some(IncludeType::Basic( + IncludeDesc { + kw: kw.clone(), + nl: nl.clone(), + name: fname.clone(), + kind: None, + } + ))); + } + } + + [SExp::Atom(kl, embed_file), SExp::Atom(_, name), SExp::Atom(_, kind), SExp::Atom(nl, fname)] => { + if embed_file == b"embed-file" { + if kind == b"hex" { + return Ok(Some(IncludeType::Processed( + IncludeDesc { + kw: kl.clone(), + nl: nl.clone(), + kind: Some(IncludeProcessType::Hex), + name: fname.clone(), + }, + IncludeProcessType::Hex, + name.clone() + ))); + } else if kind == b"bin" { + return Ok(Some(IncludeType::Processed( + IncludeDesc { + kw: kl.clone(), + nl: nl.clone(), + kind: Some(IncludeProcessType::Bin), + name: fname.clone(), + }, + IncludeProcessType::Bin, + name.clone(), + ))); + } else if kind == b"sexp" { + return Ok(Some(IncludeType::Processed( + IncludeDesc { + kw: kl.clone(), + nl: nl.clone(), + kind: Some(IncludeProcessType::SExpression), + name: fname.clone(), + }, + IncludeProcessType::SExpression, + name.clone(), + ))); + } else { + return Err(CompileErr( + body.loc(), + format!("bad include kind in embed-file {body}") + )); + } + } + } + + [] => {} + + // Ensure that legal empty or atom expressions don't try include + _ => { + // Include is only allowed as a proper form. It's a keyword in + // this language. + if let SExp::Atom(_, inc) = &x[0] { + if "include".as_bytes().to_vec() == *inc { + return Err(CompileErr( + body.loc(), + format!("bad tail in include {body}"), + )); + } else { + // Try to pick up helperforms. + if let Some(()) = self.decode_macro(body.clone())? { + return Ok(None); + } + } + } + } + } + + Ok(None) + }) + .unwrap_or_else(|| Ok(None))?; + + if let Some(()) = self.decode_macro(body.clone())? { + Ok(vec![]) + } else if let Some(IncludeType::Basic(i)) = &included { + self.recurse_dependencies(includes, i.clone())?; + self.process_include(includes, i) + } else if let Some(IncludeType::Processed(f, kind, name)) = &included { + self.recurse_dependencies(includes, f.clone())?; + self.process_embed(body.loc(), &decode_string(&f.name), kind, name) + } else { + Ok(vec![body]) + } + } + + fn inject_std_macros(&mut self, body: Rc) -> SExp { + match body.proper_list() { + Some(v) => { + let include_form = Rc::new(SExp::Cons( + body.loc(), + Rc::new(SExp::atom_from_string(body.loc(), "include")), + Rc::new(SExp::Cons( + body.loc(), + Rc::new(SExp::quoted_from_string(body.loc(), "*macros*")), + Rc::new(SExp::Nil(body.loc())), + )), + )); + let mut v_clone: Vec> = v.iter().map(|x| Rc::new(x.clone())).collect(); + let include_copy: &SExp = include_form.borrow(); + v_clone.insert(0, Rc::new(include_copy.clone())); + enlist(body.loc(), &v_clone) + } + _ => { + let body_clone: &SExp = body.borrow(); + body_clone.clone() + } + } + } + + pub fn run( + &mut self, + includes: &mut Vec, + cmod: Rc, + ) -> Result>, CompileErr> { + let mut result = Vec::new(); + let mut tocompile = if self.opts.stdenv() { + let injected = self.inject_std_macros(cmod); + Rc::new(injected) + } else { + cmod + }; + + while let SExp::Cons(_, f, r) = tocompile.borrow() { + let mut lst = self.process_pp_form(includes, f.clone())?; + result.append(&mut lst); + tocompile = r.clone(); + } + + Ok(result) + } +} + +/// Run the preprocessor over this code, which at present just finds (include ...) +/// forms in the source and includes the content of in a combined list. If a file +/// can't be found via the directory list in CompilerOrs. +pub fn preprocess( + opts: Rc, + includes: &mut Vec, + cmod: Rc, +) -> Result>, CompileErr> { + let mut p = Preprocessor::new(opts); + p.run(includes, cmod) +} + +/// Visit all files used during compilation. +/// This reports a list of all files used while compiling the input file, via any +/// form that causes compilation to include another file. The file names are path +/// expanded based on the include path they were found in (from opts). +pub fn gather_dependencies( + mut opts: Rc, + real_input_path: &str, + file_content: &str, +) -> Result, CompileErr> { + let mut allocator = Allocator::new(); + + let assembled_input = assemble(&mut allocator, file_content) + .map_err(|e| CompileErr(Srcloc::start(real_input_path), e.1))?; + let dialect = detect_modern(&mut allocator, assembled_input); + opts = opts.set_stdenv(dialect.strict).set_dialect(dialect.clone()); + if let Some(stepping) = dialect.stepping { + opts = opts + .set_optimize(stepping > 22) + .set_frontend_opt(stepping > 21); + } + + let parsed = parse_sexp(Srcloc::start(real_input_path), file_content.bytes())?; + let program = frontend(opts, &parsed)?; + + let filtered_results: Vec = program + .include_forms + .into_iter() + .filter(|f| !f.name.starts_with(b"*")) + .collect(); + Ok(filtered_results) +} diff --git a/src/compiler/rename.rs b/src/compiler/rename.rs index 30557c681..3dbb91800 100644 --- a/src/compiler/rename.rs +++ b/src/compiler/rename.rs @@ -276,6 +276,8 @@ fn rename_in_bodyform( } } +/// Given a set of sequential bindings, create a stack of let forms that have +/// the same meaning. This is used to propogate renaming. pub fn desugar_sequential_let_bindings( bindings: &[Rc], body: &BodyForm, @@ -417,12 +419,8 @@ fn rename_in_helperform( body: Rc::new(rename_in_bodyform(namemap, defc.body.clone())?), })), HelperForm::Defmacro(mac) => Ok(HelperForm::Defmacro(DefmacData { - loc: mac.loc.clone(), - kw: mac.kw.clone(), - nl: mac.nl.clone(), - name: mac.name.to_vec(), - args: mac.args.clone(), program: Rc::new(rename_in_compileform(namemap, mac.program.clone())?), + ..mac.clone() })), HelperForm::Defun(inline, defun) => Ok(HelperForm::Defun( *inline, @@ -434,7 +432,7 @@ fn rename_in_helperform( } } -fn rename_args_helperform(h: &HelperForm) -> Result { +pub fn rename_args_helperform(h: &HelperForm) -> Result { match h { HelperForm::Defconstant(defc) => Ok(HelperForm::Defconstant(DefconstData { loc: defc.loc.clone(), @@ -456,15 +454,12 @@ fn rename_args_helperform(h: &HelperForm) -> Result { let local_renamed_arg = rename_in_cons(&local_namemap, mac.args.clone(), true); let local_renamed_body = rename_args_compileform(mac.program.borrow())?; Ok(HelperForm::Defmacro(DefmacData { - loc: mac.loc.clone(), - kw: mac.kw.clone(), - nl: mac.nl.clone(), - name: mac.name.clone(), args: local_renamed_arg, program: Rc::new(rename_in_compileform( &local_namemap, Rc::new(local_renamed_body), )?), + ..mac.clone() })) } HelperForm::Defun(inline, defun) => { @@ -503,18 +498,25 @@ fn rename_in_compileform( }) } +/// For all the HelperForms in a CompileForm, do renaming in them so that all +/// unique variable bindings in the program have unique names. pub fn rename_children_compileform(c: &CompileForm) -> Result { + let c_ref: &CompileForm = c; let local_renamed_helpers = map_m(&rename_args_helperform, &c.helpers)?; let local_renamed_body = rename_args_bodyform(c.exp.borrow())?; Ok(CompileForm { - loc: c.loc.clone(), - args: c.args.clone(), - include_forms: c.include_forms.clone(), helpers: local_renamed_helpers, exp: Rc::new(local_renamed_body), + ..c_ref.clone() }) } +/// Given a compileform, perform renaming in descendants so that every variable +/// name that lives in a different scope has a unique name. This allows +/// compilation to treat identical forms as equivalent and ensures that forms +/// that look the same but refer to different variables are different. It also +/// ensures that future tricky variable name uses decide on one binding from their +/// lexical scope. pub fn rename_args_compileform(c: &CompileForm) -> Result { let new_names = invent_new_names_sexp(c.args.clone()); let mut local_namemap = HashMap::new(); diff --git a/src/compiler/runtypes.rs b/src/compiler/runtypes.rs index 720822337..526f9afef 100644 --- a/src/compiler/runtypes.rs +++ b/src/compiler/runtypes.rs @@ -1,6 +1,7 @@ use std::fmt::Display; use std::rc::Rc; +use crate::compiler::comptypes::CompileErr; use crate::compiler::sexp::SExp; use crate::compiler::srcloc::Srcloc; @@ -27,3 +28,20 @@ impl Display for RunFailure { Ok(()) } } + +impl From for CompileErr { + fn from(item: RunFailure) -> Self { + match item { + RunFailure::RunExn(l, s) => CompileErr(l.clone(), format!("Runtime exception: {s}")), + RunFailure::RunErr(l, s) => CompileErr(l.clone(), format!("Runtime error: {s}")), + } + } +} + +impl From for RunFailure { + fn from(e: CompileErr) -> Self { + match e { + CompileErr(l, e) => RunFailure::RunErr(l, e), + } + } +} diff --git a/src/compiler/sexp.rs b/src/compiler/sexp.rs index 58d01ad5b..30becac44 100644 --- a/src/compiler/sexp.rs +++ b/src/compiler/sexp.rs @@ -272,7 +272,7 @@ fn normalize_int(v: Vec, base: u32) -> Number { fn from_hex(l: Srcloc, v: &[u8]) -> SExp { let mut result = vec![0; (v.len() - 2) / 2]; hex2bin(&v[2..], &mut result).expect("should convert from hex"); - SExp::QuotedString(l, b'"', result) + SExp::QuotedString(l, b'x', result) } fn make_atom(l: Srcloc, v: Vec) -> SExp { @@ -356,7 +356,7 @@ pub fn decode_string(v: &[u8]) -> String { return String::from_utf8_lossy(v).as_ref().to_string(); } -fn printable(a: &[u8]) -> bool { +pub fn printable(a: &[u8]) -> bool { for ch in a.iter() { if (*ch as char).is_control() || !(*ch as char).is_ascii() { return false; @@ -980,3 +980,116 @@ fn test_tricky_parser_tail_03() { ))]) ); } + +// This is a trait that generates a haskell-like ad-hoc type from the user's +// construction of NodeSel and ThisNode. +// the result is transformed into a NodeSel tree of NodePtr if it can be. +// The type of the result is an ad-hoc shape derived from the shape of the +// original request. +// +// This mirrors code in src/classic/clvm/sexp.rs +// +// It's a nicer way of modelling matches that will overtake bespoke code for a lot +// of things. +#[derive(Debug, Clone)] +pub enum NodeSel { + Cons(T, U), +} + +#[derive(Debug, Clone)] +pub enum First { + Here(T), +} + +#[derive(Debug, Clone)] +pub enum Rest { + Here(T), +} + +#[derive(Debug, Clone)] +pub struct ThisNode; + +pub enum Atom { + Here(T), +} + +pub trait SelectNode { + fn select_nodes(&self, s: Rc) -> Result; +} + +impl SelectNode, E> for ThisNode { + fn select_nodes(&self, s: Rc) -> Result, E> { + Ok(s) + } +} + +impl SelectNode<(Srcloc, Vec), (Srcloc, String)> for Atom<()> { + fn select_nodes(&self, s: Rc) -> Result<(Srcloc, Vec), (Srcloc, String)> { + if let SExp::Atom(loc, name) = s.borrow() { + return Ok((loc.clone(), name.clone())); + } + + Err((s.loc(), "Not an atom".to_string())) + } +} + +impl SelectNode for Atom<&str> { + fn select_nodes(&self, s: Rc) -> Result { + let Atom::Here(name) = self; + if let Ok((l, n)) = Atom::Here(()).select_nodes(s.clone()) { + if n == name.as_bytes() { + return Ok(l); + } + } + + Err((s.loc(), format!("Not an atom named {name}"))) + } +} + +impl SelectNode<(), E> for () { + fn select_nodes(&self, _n: Rc) -> Result<(), E> { + Ok(()) + } +} + +impl SelectNode, E> for First +where + R: SelectNode + Clone, + E: From<(Srcloc, String)>, +{ + fn select_nodes(&self, s: Rc) -> Result, E> { + let First::Here(f) = &self; + let NodeSel::Cons(first, ()) = NodeSel::Cons(f.clone(), ()).select_nodes(s)?; + Ok(First::Here(first)) + } +} + +impl SelectNode, E> for Rest +where + R: SelectNode + Clone, + E: From<(Srcloc, String)>, +{ + fn select_nodes(&self, s: Rc) -> Result, E> { + let Rest::Here(f) = &self; + let NodeSel::Cons((), rest) = NodeSel::Cons((), f.clone()).select_nodes(s)?; + Ok(Rest::Here(rest)) + } +} + +impl SelectNode, E> for NodeSel +where + R: SelectNode, + S: SelectNode, + E: From<(Srcloc, String)>, +{ + fn select_nodes(&self, s: Rc) -> Result, E> { + let NodeSel::Cons(my_left, my_right) = &self; + if let SExp::Cons(_, l, r) = s.borrow() { + let first = my_left.select_nodes(l.clone())?; + let rest = my_right.select_nodes(r.clone())?; + Ok(NodeSel::Cons(first, rest)) + } else { + Err(E::from((s.loc(), "not a cons".to_string()))) + } + } +} diff --git a/src/tests/classic/embed.rs b/src/tests/classic/embed.rs index 45691e57d..d74796ad6 100644 --- a/src/tests/classic/embed.rs +++ b/src/tests/classic/embed.rs @@ -60,9 +60,11 @@ fn test_embed_exhaustive() { for order in 0..=1 { for exists in 0..=1 { for (include_kind, include_file, want_hash) in include_list.iter() { - for modern in 0..=1 { - let modern_sigil = if modern > 0 { + for modern in 0..=2 { + let modern_sigil = if modern == 1 { "(include *standard-cl-21*)" + } else if modern == 2 { + "(include *strict-cl-21*)" } else { "" }; diff --git a/src/tests/classic/run.rs b/src/tests/classic/run.rs index 69e8f297e..22313ecb4 100644 --- a/src/tests/classic/run.rs +++ b/src/tests/classic/run.rs @@ -1078,6 +1078,194 @@ fn test_cost_reporting_0() { ); } +#[test] +fn test_strict_smoke_0() { + let result = do_basic_run(&vec![ + "run".to_string(), + "-i".to_string(), + "resources/tests/strict".to_string(), + "resources/tests/strict/strict-test-fail.clsp".to_string(), + ]); + assert!(result.contains("Unbound")); + assert!(result.contains("X1")); +} + +#[test] +fn test_strict_smoke_1() { + let result_prog = do_basic_run(&vec![ + "run".to_string(), + "-i".to_string(), + "resources/tests/strict".to_string(), + "resources/tests/strict/strict-test-pass.clsp".to_string(), + ]); + let result = do_basic_brun(&vec!["brun".to_string(), result_prog, "(13)".to_string()]) + .trim() + .to_string(); + assert_eq!(result, "15"); +} + +#[test] +fn test_strict_list_fail() { + let result = do_basic_run(&vec![ + "run".to_string(), + "-i".to_string(), + "resources/tests/strict".to_string(), + "resources/tests/strict/strict-list-fail.clsp".to_string(), + ]); + assert!(result.contains("Unbound")); + assert!(result.contains("X2")); +} + +#[test] +fn test_strict_list_pass() { + let result_prog = do_basic_run(&vec![ + "run".to_string(), + "-i".to_string(), + "resources/tests/strict".to_string(), + "resources/tests/strict/strict-list-pass.clsp".to_string(), + ]); + let result = do_basic_brun(&vec!["brun".to_string(), result_prog, "(13)".to_string()]) + .trim() + .to_string(); + assert_eq!(result, "(strlen 14 15)"); +} + +#[test] +fn test_strict_nested_list_pass() { + let result_prog = do_basic_run(&vec![ + "run".to_string(), + "-i".to_string(), + "resources/tests/strict".to_string(), + "resources/tests/strict/strict-nested-list.clsp".to_string(), + ]); + let result = do_basic_brun(&vec!["brun".to_string(), result_prog, "(13)".to_string()]) + .trim() + .to_string(); + assert_eq!(result, "(strlen (strlen) ((strlen)))"); +} + +#[test] +fn test_double_constant_pass() { + let result_prog = do_basic_run(&vec![ + "run".to_string(), + "-i".to_string(), + "resources/tests/strict".to_string(), + "resources/tests/strict/double-constant-pass.clsp".to_string(), + ]); + let result = do_basic_brun(&vec!["brun".to_string(), result_prog, "()".to_string()]) + .trim() + .to_string(); + assert_eq!(result, "198"); +} + +#[test] +fn test_double_constant_fail() { + let result = do_basic_run(&vec![ + "run".to_string(), + "-i".to_string(), + "resources/tests/strict".to_string(), + "resources/tests/strict/double-constant-fail.clsp".to_string(), + ]); + assert!(result.contains("not a number given to only-integers")); + assert!(result.contains("\"hithere\"")); +} + +#[test] +fn test_double_constant_pass_in_function() { + let result_prog = do_basic_run(&vec![ + "run".to_string(), + "-i".to_string(), + "resources/tests/strict".to_string(), + "resources/tests/strict/double-constant-pass-in-function.clsp".to_string(), + ]); + let result = do_basic_brun(&vec!["brun".to_string(), result_prog, "(13)".to_string()]) + .trim() + .to_string(); + assert_eq!(result, "211"); +} + +#[test] +fn test_check_symbol_kinds_nested_if() { + let result_prog = do_basic_run(&vec![ + "run".to_string(), + "-i".to_string(), + "resources/tests/strict".to_string(), + "resources/tests/strict/strict-classify-expr-if.clsp".to_string(), + ]); + let result_1 = do_basic_brun(&vec![ + "brun".to_string(), + result_prog.clone(), + "(1)".to_string(), + ]) + .trim() + .to_string(); + assert_eq!(result_1, "2"); + let result_0 = do_basic_brun(&vec!["brun".to_string(), result_prog, "(0)".to_string()]) + .trim() + .to_string(); + assert_eq!(result_0, "(q 1 2 3 4 4)"); +} + +// Note: this program is intentionally made to properly preprocess but trigger +// an error in strict compilation as a demonstration and test that the preprocessor +// is a mechanically separate step from compilation. Separating them like this +// has the advantage that you can emit preprocessed compiler input on its own +// and also that it internally untangles the stages and makes compilation simpler. +#[test] +fn test_defmac_if_smoke_preprocess() { + let result_prog = do_basic_run(&vec![ + "run".to_string(), + "-i".to_string(), + "resources/tests/strict".to_string(), + "-E".to_string(), + "resources/tests/strict/defmac_if_smoke.clsp".to_string(), + ]); + assert_eq!( + result_prog, + "(mod () (include *strict-cl-21*) (a (i t1 (com t2) (com t3)) @))" + ); + let result2 = do_basic_run(&vec!["run".to_string(), result_prog]); + assert!(result2.contains("Unbound use")); + // Ensure that we're identifying one of the actually misused variables, but + // do not make a claim about which one is identified first. + assert!(result2.contains("of t1") || result2.contains("of t2") || result2.contains("of t3")); +} + +#[test] +fn test_defmac_assert_smoke_preprocess() { + let result_prog = do_basic_run(&vec![ + "run".to_string(), + "-i".to_string(), + "resources/tests/strict".to_string(), + "-E".to_string(), + "resources/tests/strict/assert.clsp".to_string(), + ]); + assert_eq!( + result_prog, + "(mod (A) (include *strict-cl-21*) (a (i 1 (com (a (i A (com 13) (com (x))) @)) (com (x))) @))" + ); + let result_after_preproc = do_basic_run(&vec!["run".to_string(), result_prog]); + let result_with_preproc = do_basic_run(&vec![ + "run".to_string(), + "-i".to_string(), + "resources/tests/strict".to_string(), + "resources/tests/strict/assert.clsp".to_string(), + ]); + assert_eq!(result_after_preproc, result_with_preproc); + let run_result_true = do_basic_brun(&vec![ + "brun".to_string(), + result_with_preproc.clone(), + "(15)".to_string(), + ]); + assert_eq!(run_result_true.trim(), "13"); + let run_result_false = do_basic_brun(&vec![ + "brun".to_string(), + result_with_preproc.clone(), + "(0)".to_string(), + ]); + assert_eq!(run_result_false.trim(), "FAIL: clvm raise ()"); +} + #[test] fn test_assign_fancy_final_dot_rest() { let result_prog = do_basic_run(&vec![ @@ -1344,6 +1532,53 @@ fn test_classic_obeys_operator_choice_at_compile_time_version_0() { assert_eq!(compiled, "FAIL: unimplemented operator 48"); } +#[test] +fn test_continued_if() { + let prog = indoc! {" +(mod X + (include *strict-cl-21*) + + (defun bigatom (Xs Ys) + (if + Xs (concat (f Xs) (bigatom (r Xs) Ys)) + Ys (concat (f Ys) (bigatom (r Ys) ())) + () + ) + ) + + (bigatom (q . (3 4 5)) X) + )"} + .to_string(); + let compiled = do_basic_run(&vec!["run".to_string(), prog]) + .trim() + .to_string(); + let res = do_basic_brun(&vec![ + "brun".to_string(), + compiled, + "(13 99 144)".to_string(), + ]) + .trim() + .to_string(); + assert_eq!(res.to_string(), "0x0304050d630090"); +} + +#[test] +fn test_preprocess_can_recurse() { + let prog = "resources/tests/strict/test-inner-include.clsp".to_string(); + let res = do_basic_run(&vec![ + "run".to_string(), + "-i".to_string(), + "resources/tests/strict".to_string(), + prog.clone(), + ]) + .trim() + .to_string(); + assert_eq!( + res, + "(2 (1 2 (3 5 (1 2 (1 18 5 (1 . 2)) 1) (1 2 (1 16 5 (1 . 1)) 1)) 1) (4 (1) 1))" + ); +} + #[test] fn test_assign_rename_tricky() { let filename = "resources/tests/cse-complex-21.clsp"; diff --git a/src/tests/classic/stage_2.rs b/src/tests/classic/stage_2.rs index 8bf43064f..9ac9aecf6 100644 --- a/src/tests/classic/stage_2.rs +++ b/src/tests/classic/stage_2.rs @@ -377,6 +377,9 @@ impl CompilerOpts for TestCompilerOptsPresentsOwnFiles { fn set_start_env(&self, _start_env: Option>) -> Rc { Rc::new(self.clone()) } + fn set_prim_map(&self, _prims: Rc, Rc>>) -> Rc { + Rc::new(self.clone()) + } fn set_disassembly_ver(&self, _ver: Option) -> Rc { Rc::new(self.clone()) } diff --git a/src/tests/compiler/compiler.rs b/src/tests/compiler/compiler.rs index 9a38cfb34..b43211a8d 100644 --- a/src/tests/compiler/compiler.rs +++ b/src/tests/compiler/compiler.rs @@ -7,6 +7,7 @@ use crate::classic::clvm_tools::stages::stage_0::DefaultProgramRunner; use crate::compiler::clvm::run; use crate::compiler::compiler::{compile_file, DefaultCompilerOpts}; use crate::compiler::comptypes::{CompileErr, CompilerOpts}; +use crate::compiler::dialect::AcceptedDialect; use crate::compiler::frontend::{collect_used_names_sexp, frontend}; use crate::compiler::rename::rename_in_cons; use crate::compiler::runtypes::RunFailure; @@ -27,14 +28,23 @@ fn run_string_maybe_opt( content: &String, args: &String, fe_opt: bool, + strict: bool, ) -> Result, CompileErr> { let mut allocator = Allocator::new(); let runner = Rc::new(DefaultProgramRunner::new()); let mut opts: Rc = Rc::new(DefaultCompilerOpts::new(&"*test*".to_string())); let srcloc = Srcloc::start(&"*test*".to_string()); opts = opts - .set_frontend_opt(fe_opt) + .set_frontend_opt(fe_opt && !strict) .set_search_paths(&vec!["resources/tests".to_string()]); + + if strict { + opts = opts.set_dialect(AcceptedDialect { + stepping: Some(21), + strict: true, + }); + } + let sexp_args = parse_sexp(srcloc.clone(), args.bytes())?[0].clone(); compile_file( @@ -51,6 +61,7 @@ fn run_string_maybe_opt( Rc::new(HashMap::new()), Rc::new(x), sexp_args, + None, Some(TEST_TIMEOUT), ) .map_err(|e| match e { @@ -61,7 +72,11 @@ fn run_string_maybe_opt( } pub fn run_string(content: &String, args: &String) -> Result, CompileErr> { - run_string_maybe_opt(content, args, false) + run_string_maybe_opt(content, args, false, false) +} + +pub fn run_string_strict(content: &String, args: &String) -> Result, CompileErr> { + run_string_maybe_opt(content, args, false, true) } // Given some renaming that leaves behind gensym style names with _$_ in them, @@ -162,6 +177,7 @@ fn run_test_1_maybe_opt(opt: bool) { &"(mod () (defun f (a b) (+ (* a a) b)) (f 3 1))".to_string(), &"()".to_string(), opt, + false, ) .unwrap(); assert_eq!(result.to_string(), "10".to_string()); @@ -189,6 +205,7 @@ fn run_test_2_maybe_opt(opt: bool) { &"(mod (c) (defun f (a b) (+ (* a a) b)) (f 3 c))".to_string(), &"(4)".to_string(), opt, + false, ) .unwrap(); assert_eq!(result.to_string(), "13".to_string()); @@ -209,7 +226,8 @@ fn run_test_3_maybe_opt(opt: bool) { run_string_maybe_opt( &"(mod (arg_one) (defun factorial (input) (if (= input 1) 1 (* (factorial (- input 1)) input))) (factorial arg_one))".to_string(), &"(5)".to_string(), - opt + opt, + false, ).unwrap(); assert_eq!(result.to_string(), "120".to_string()); } @@ -229,7 +247,8 @@ fn run_test_4_maybe_opt(opt: bool) { run_string_maybe_opt( &"(mod () (defun makelist (a) (if a (c (q . 4) (c (f a) (c (makelist (r a)) (q . ())))) (q . ()))) (makelist (q . (1 2 3))))".to_string(), &"()".to_string(), - opt + opt, + false, ).unwrap(); assert_eq!(result.to_string(), "(4 1 (4 2 (4 3 ())))".to_string()); } @@ -245,8 +264,13 @@ fn run_test_4_opt() { } fn run_test_5_maybe_opt(opt: bool) { - let result = - run_string_maybe_opt(&"(mod (a) (list 1 2))".to_string(), &"()".to_string(), opt).unwrap(); + let result = run_string_maybe_opt( + &"(mod (a) (list 1 2))".to_string(), + &"()".to_string(), + opt, + false, + ) + .unwrap(); assert_eq!(result.to_string(), "(1 2)".to_string()); } @@ -265,7 +289,8 @@ fn run_test_6_maybe_opt(opt: bool) { run_string_maybe_opt( &"(mod args (defmacro square (input) (qq (* (unquote input) (unquote input)))) (defun sqre_list (my_list) (if my_list (c (square (f my_list)) (sqre_list (r my_list))) my_list)) (sqre_list args))".to_string(), &"(10 9 8 7)".to_string(), - opt + opt, + false, ).unwrap(); assert_eq!(result.to_string(), "(100 81 64 49)".to_string()); } @@ -285,7 +310,8 @@ fn run_test_7_maybe_opt(opt: bool) { run_string_maybe_opt( &"(mod (PASSWORD_HASH password new_puzhash amount) (defconstant CREATE_COIN 51) (defun check_password (PASSWORD_HASH password new_puzhash amount) (if (= (sha256 password) PASSWORD_HASH) (list (list CREATE_COIN new_puzhash amount)) (x))) (check_password PASSWORD_HASH password new_puzhash amount))".to_string(), &"(0x2ac6aecf15ac3042db34af4863da46111da7e1bf238fc13da1094f7edc8972a1 \"sha256ftw\" 0x12345678 1000000000)".to_string(), - opt + opt, + false, ).unwrap(); assert_eq!( result.to_string(), @@ -308,6 +334,7 @@ fn run_test_8_maybe_opt(opt: bool) { &"(mod (a b) (let ((x (+ a 1)) (y (+ b 1))) (+ x y)))".to_string(), &"(5 8)".to_string(), opt, + false, ) .unwrap(); assert_eq!(result.to_string(), "15".to_string()); @@ -528,6 +555,7 @@ fn run_test_9_maybe_opt(opt: bool) { &"(mod (a) (defun f (i) (let ((x (not i)) (y (* i 2))) (+ x y))) (f a))".to_string(), &"(0)".to_string(), opt, + false, ) .unwrap(); assert_eq!(result.to_string(), "1".to_string()); @@ -548,6 +576,7 @@ fn run_test_10_maybe_opt(opt: bool) { &"(mod (a) (defun f (i) (let ((x (not i)) (y (* i 2))) (+ x y))) (f a))".to_string(), &"(3)".to_string(), opt, + false, ) .unwrap(); assert_eq!(result.to_string(), "6".to_string()); @@ -753,6 +782,7 @@ fn test_collatz_maybe_opt(opt: bool) { .to_string(), &"(4)".to_string(), opt, + false, ) .unwrap(); assert_eq!(result.to_string(), "2"); @@ -792,6 +822,7 @@ fn fancy_nested_let_bindings_should_work() { .to_string(), &"(1 2 3 100 99)".to_string(), false, + false, ) .unwrap(); assert_eq!(result.to_string(), "8"); @@ -810,6 +841,7 @@ fn let_as_argument() { .to_string(), &"(5)".to_string(), false, + false, ) .unwrap(); assert_eq!(result.to_string(), "13"); @@ -829,6 +861,7 @@ fn recursive_let_complicated_arguments() { .to_string(), &"(7 13)".to_string(), false, + false, ) .unwrap(); assert_eq!(result.to_string(), "32"); @@ -862,6 +895,7 @@ fn test_let_structure_access_1() { .to_string(), &"(7 13)".to_string(), false, + false, ) .unwrap(); // a = 1 @@ -906,6 +940,7 @@ fn test_let_structure_access_2() { .to_string(), &"(7 13)".to_string(), false, + false, ) .unwrap(); // a = 1 @@ -935,6 +970,7 @@ fn test_let_inline_1() { .to_string(), &"(5)".to_string(), false, + false, ) .unwrap(); assert_eq!(result.to_string(), "11"); diff --git a/src/tests/compiler/mod.rs b/src/tests/compiler/mod.rs index 74cb6aa23..f7e502ead 100644 --- a/src/tests/compiler/mod.rs +++ b/src/tests/compiler/mod.rs @@ -9,6 +9,7 @@ mod cldb; mod clvm; mod compiler; mod evaluate; +mod preprocessor; mod repl; mod restargs; mod runtypes; diff --git a/src/tests/compiler/preprocessor.rs b/src/tests/compiler/preprocessor.rs new file mode 100644 index 000000000..e2a40e515 --- /dev/null +++ b/src/tests/compiler/preprocessor.rs @@ -0,0 +1,610 @@ +use crate::compiler::compiler::DefaultCompilerOpts; +use crate::compiler::comptypes::CompilerOpts; +use crate::compiler::dialect::AcceptedDialect; +use crate::compiler::preprocessor::preprocess; +use crate::compiler::sexp::parse_sexp; +use crate::compiler::srcloc::Srcloc; +use std::rc::Rc; + +use crate::tests::compiler::compiler::run_string_strict; + +#[test] +fn test_defmac_basic_0() { + let prog = indoc! {" + (mod (X) + (defmac double-arg (A) (list (string->symbol (string-append (symbol->string A) \"1\")) (string->symbol (string-append (symbol->string A) \"2\")))) + (defun strange (double-arg X) (+ X1 X2)) + (strange X (* 2 X)) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(3)".to_string()).unwrap(); + assert_eq!(res.to_string(), "9"); +} + +#[test] +fn test_defmac_basic_shared_constant() { + let prog = indoc! {" + (mod (X) + (defconstant twostring \"2\") + (defmac double-arg (A) (list (string->symbol (string-append (symbol->string A) \"1\")) (string->symbol (string-append (symbol->string A) twostring)))) + (defun strange (double-arg X) (+ X1 X2)) + (c twostring (strange X (* 2 X))) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(3)".to_string()).unwrap(); + assert_eq!(res.to_string(), "(\"2\" . 9)"); +} + +#[test] +fn test_defmac_basic_shared_constant_not_string_with_string_operator() { + let prog = indoc! {" + (mod (X) + (defconstant twostring 2) + (defmac double-arg (A) (list (string->symbol (string-append (symbol->string A) \"1\")) (string->symbol (string-append (symbol->string A) twostring)))) + (defun strange (double-arg X) (+ X1 X2)) + (c twostring (strange X (* 2 X))) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(3)".to_string()); + assert!(res.is_err()); +} + +#[test] +fn test_defmac_basic_shared_constant_not_string_with_string_operator_fun() { + let prog = indoc! {" + (mod (X) + (defconstant twostring \"2\") + (defun make-arg-list (A) (list (string->symbol (string-append (symbol->string A) \"1\")) (string->symbol (string-append (symbol->string A) twostring)))) + (defmac double-arg (A) (make-arg-list A)) + (defun strange (double-arg X) (+ X1 X2)) + (c twostring (strange X (* 2 X))) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(3)".to_string()).unwrap(); + assert_eq!(res.to_string(), "(\"2\" . 9)"); +} + +#[test] +fn test_defmac_basic_test_is_string_pos() { + let prog = indoc! {" + (mod (X) + (defmac classify (S) + (if (string? S) + (qq (c 1 (unquote S))) + (qq (c 2 (unquote S))) + ) + ) + (c X (classify \"test\")) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(3)".to_string()).unwrap(); + assert_eq!(res.to_string(), "(3 1 . \"test\")"); +} + +#[test] +fn test_defmac_basic_test_is_string_neg() { + let prog = indoc! {" + (mod (X) + (defmac classify (S) + (if (string? S) + (qq (c 1 (unquote S))) + (qq (c 2 (unquote S))) + ) + ) + (c X (classify 99)) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(3)".to_string()).unwrap(); + assert_eq!(res.to_string(), "(3 2 . 99)"); +} + +#[test] +fn test_defmac_basic_test_is_symbol_pos() { + let prog = indoc! {" + (mod (X) + (defmac classify (S) + (if (symbol? S) + (qq (c 1 (unquote (symbol->string S)))) + (qq (c 2 (unquote S))) + ) + ) + (c X (classify test)) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(3)".to_string()).unwrap(); + assert_eq!(res.to_string(), "(3 1 . \"test\")"); +} + +#[test] +fn test_defmac_basic_test_is_symbol_neg() { + let prog = indoc! {" + (mod (X) + (defmac classify (S) + (if (symbol? S) + (qq (c 1 (unquote S))) + (qq (c 2 (unquote S))) + ) + ) + (c X (classify \"test\")) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(3)".to_string()).unwrap(); + assert_eq!(res.to_string(), "(3 2 . \"test\")"); +} + +#[test] +fn test_defmac_basic_test_is_number_pos() { + let prog = indoc! {" + (mod (X) + (defmac classify (S) + (if (number? S) + (qq (c 1 (unquote S))) + (qq (c 2 (unquote S))) + ) + ) + (c X (classify 7)) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(3)".to_string()).unwrap(); + assert_eq!(res.to_string(), "(3 1 . 7)"); +} + +#[test] +fn test_defmac_basic_test_is_number_neg() { + let prog = indoc! {" + (mod (X) + (defmac classify (S) + (if (number? S) + (qq (c 1 (unquote S))) + (qq (c 2 (unquote S))) + ) + ) + (c X (classify \"test\")) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(3)".to_string()).unwrap(); + assert_eq!(res.to_string(), "(3 2 . \"test\")"); +} + +#[test] +fn test_defmac_extension_from_function() { + let prog = indoc! {" + (mod (X) + (defun FX (X) (symbol->string X)) + (defmac F (X) (FX X)) + (c 3 (F X)) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(3)".to_string()).unwrap(); + assert_eq!(res.to_string(), "(3 . \"X\")"); +} + +#[test] +fn test_defmac_if_extension() { + let prog = indoc! {" + (mod (X) + (defun FX (X) (if X (number->string 1) 2)) + (defmac F (X) (c 1 (FX X))) + (F X) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(9)".to_string()).unwrap(); + assert_eq!(res.to_string(), "\"1\""); +} + +#[test] +fn test_defmac_create_match_form() { + let prog = indoc! {" + ;; Make a simple list match ala ocaml/elm/haskell. + ;; + ;; The real version will be more elaborate. This is a test case and a demo. + (mod X + (include *standard-cl-21*) + (defun list-nth (L N) + (if N + (list-nth (r L) (- N 1)) + (f L) + ) + ) + + (defun list-len (L N) + (if L + (list-len (r L) (+ N 1)) + N + ) + ) + + (defun funcall (name args) (c (string->symbol name) args)) + (defun quoted (X) (c 1 X)) + (defun equals (A B) (funcall \"=\" (list A B))) + (defun emit-list-nth (L N) (funcall \"list-nth\" (list L N))) + (defun emit-list-len (L N) (funcall \"list-len\" (list L N))) + (defun emit-if (C T E) (funcall \"if\" (list C T E))) + (defun emit-let (bindings body) (funcall \"let\" (list bindings body))) + + ;; Determine the size of each list as well as the constants and bindings + ;; Return either a list of (number-of-elts matches bindings) or symbol. + (defun build-matches-and-bindings (n pattern matches bindings) + (if (not pattern) + (list n matches bindings) + (if (l pattern) + (if (symbol? (f pattern)) + (build-matches-and-bindings (+ n 1) (r pattern) matches (c (c n (f pattern)) bindings)) + (build-matches-and-bindings (+ n 1) (r pattern) (c (c n (f pattern)) matches) bindings) + ) + pattern + ) + ) + ) + + ;; Emit code that matches each match list for length and constants. + (defun write-match-code (expr matches) + (if (not matches) + (quoted 1) + (if (l (f matches)) + (let* + ( + (offset (f (f matches))) + (desire (r (f matches))) + (this-match (equals (quoted desire) (emit-list-nth expr (quoted offset)))) + ) + (if (not (r matches)) + (list this-match) + (c this-match (write-match-code expr (r matches))) + ) + ) + (quoted 1) + ) + ) + ) + + ;; Generate let bindings for the bindings indicated in the match. + (defun let-bindings (expr bindings) + (if (not bindings) + () + (let + ((n (f (f bindings))) + (binding (r (f bindings))) + ) + (c (list binding (emit-list-nth expr n)) (let-bindings expr (r bindings))) + ) + ) + ) + + ;; Generate if expressions that match the indicates matches and return + ;; the indicated code with bindings installed. + (defun match-if (expr clauses) + (if (not clauses) + (list 8) + (let + ((extracted-clause-data (build-matches-and-bindings 0 (f (f clauses)) () ())) + (code (f (r (f clauses)))) + ) + (if (l extracted-clause-data) + (let + ( + (number-of-elts (f extracted-clause-data)) + (matches (list-nth extracted-clause-data 1)) + (bindings (list-nth extracted-clause-data 2)) + ) + (emit-if + (emit-if + (equals (emit-list-len expr 0) (quoted number-of-elts)) + ;; then + (c (string->symbol \"logand\") (write-match-code expr matches)) + ;; else + () + ) + ;; then + (emit-let (let-bindings expr bindings) code) + ;; else + (match-if expr (r clauses)) + ) + ) + (emit-let (list (list extracted-clause-data expr)) code) + ) + ) + ) + ) + + ;; match as below. + (defmac match (expr . matches) (match-if expr matches)) + + (match X + ((16 x y) (c 1 (+ x y))) + ((3 () b c) c) + ((3 1 b c) b) + (x x) + ) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(3 () 1001 1002)".to_string()).unwrap(); + assert_eq!(res.to_string(), "1002"); +} + +#[test] +fn test_defmac_stringq() { + let prog = indoc! {" + (mod () + (defmac is-string (X) (string? X)) + (list (is-string X) (is-string \"X\") (is-string 3)) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"()".to_string()).unwrap(); + assert_eq!(res.to_string(), "(() 1 ())"); +} + +#[test] +fn test_defmac_numberq() { + let prog = indoc! {" + (mod () + (defmac is-number (X) (number? X)) + (list (is-number X) (is-number \"X\") (is-number 3)) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"()".to_string()).unwrap(); + assert_eq!(res.to_string(), "(() () 1)"); +} + +#[test] +fn test_defmac_symbolq() { + let prog = indoc! {" + (mod () + (defmac is-symbol (X) (symbol? X)) + (list (is-symbol X) (is-symbol \"X\") (is-symbol 3)) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"()".to_string()).unwrap(); + assert_eq!(res.to_string(), "(1 () ())"); +} + +#[test] +fn test_defmac_string_to_symbol() { + let prog = indoc! {" + (mod () + (defmac is-symbol (X) (symbol? X)) + (list (is-symbol X) (is-symbol \"X\") (is-symbol 3)) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"()".to_string()).unwrap(); + assert_eq!(res.to_string(), "(1 () ())"); +} + +#[test] +fn test_defmac_string_to_symbol_converts() { + let prog = indoc! {" + (mod (X) + (defmac let_pi (code) (qq (let (((unquote (string->symbol \"pi\")) 31415)) (unquote code)))) + (let_pi (+ pi X)) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(5)".to_string()).unwrap(); + assert_eq!(res.to_string(), "31420"); +} + +#[test] +fn test_defmac_string_needs_conversion() { + let prog = indoc! {" + (mod (X) + (defmac let_pi (code) (qq (let ((\"pi\" 31415)) (unquote code)))) + (let_pi (+ pi X)) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(5)".to_string()); + eprintln!("res {res:?}"); + assert!(res.is_err()); +} + +#[test] +fn test_defmac_string_substr_0() { + let prog = indoc! {" + (mod (X) + (defmac first-letter-of (Q) + (let ((first-character (substring (symbol->string Q) 0 1))) + (qq (c (unquote first-character) (unquote (string->symbol first-character)))) + ) + ) + (first-letter-of Xanadu) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(5999)".to_string()).unwrap(); + assert_eq!(res.to_string(), "(\"X\" . 5999)"); +} + +#[test] +fn test_defmac_string_substr_bad() { + let prog = indoc! {" + (mod (test_variable_name) + (defmac bind-tail-of-symbol (N Q CODE) + (let* + ((stringified (symbol->string Q)) + (slen (string-length stringified)) + (suffix (string->symbol (substring stringified N slen)))) + (qq (let (((unquote suffix) (r (unquote Q)))) (unquote CODE))) + ) + ) + (bind-tail-of-symbol 100 test_variable_name (c 9999 variable_name)) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"((87 89 91))".to_string()); + assert!(res.is_err()); +} + +#[test] +fn test_defmac_string_to_number_0() { + let prog = indoc! {" + (mod (X_7) + (defmac add-n-to (X) + (let* + ((stringified (symbol->string X)) + (slen (string-length stringified)) + (number-part (substring stringified (- slen 1) slen)) + (numeric-value (string->number number-part))) + (qq (+ (unquote numeric-value) (unquote X))) + ) + ) + (add-n-to X_7) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(31)".to_string()).unwrap(); + assert_eq!(res.to_string(), "38"); +} + +#[test] +fn test_defmac_string_to_number_bad() { + let prog = indoc! {" + (mod (X_A) + (defmac add-n-to (X) + (let* + ((stringified (symbol->string X)) + (slen (string-length stringified)) + (number-part (substring stringified (- slen 1) slen)) + (numeric-value (string->number number-part))) + (qq (+ (unquote numeric-value) (unquote X))) + ) + ) + (add-n-to X_A) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(31)".to_string()); + assert!(res.is_err()); +} + +#[test] +fn test_defmac_number_to_string() { + let prog = indoc! {" + (mod (Q) + (defmac with-my-length (X) + (let* + ((stringified (symbol->string X)) + (slen (string-length stringified))) + (string->symbol (string-append stringified \"-\" (number->string slen))) + ) + ) + (defun F (Xanadu-6) (+ (with-my-length Xanadu) 99)) + (F Q) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"(37)".to_string()).unwrap(); + assert_eq!(res.to_string(), "136"); +} + +#[test] +fn test_preprocess_basic_list() { + let file = "*test*"; + let input_form_set = indoc! {"( + (include *strict-cl-21*) + (list 1 2 3) + )"}; + let parsed_forms = + parse_sexp(Srcloc::start(file), input_form_set.bytes()).expect("should parse"); + let opts: Rc = + Rc::new(DefaultCompilerOpts::new(file)).set_dialect(AcceptedDialect { + stepping: Some(21), + strict: true, + }); + let mut includes = Vec::new(); + let pp = preprocess(opts.clone(), &mut includes, parsed_forms[0].clone()) + .expect("should preprocess"); + assert_eq!(pp[pp.len() - 1].to_string(), "(4 1 (4 2 (4 3 ())))"); +} + +#[test] +fn test_preprocess_expansion_makes_numeric_operators() { + let prog = indoc! {" + (mod () + (include *strict-cl-21*) + (defmac G () (com (4 \"test\" ()))) + (G) + ) + "} + .to_string(); + let res = run_string_strict(&prog, &"()".to_string()).unwrap(); + assert_eq!(res.to_string(), "(\"test\")"); +} + +#[test] +fn test_preprocessor_tours_includes_properly() { + let prog = indoc! {" + ( ;; Note: preprocessing is run in the list of the body forms. + (include *strict-cl-21*) + (include condition_codes.clvm) + (include curry-and-treehash.clinc) + () + ) + "} + .to_string(); + let pname = "*test*"; + let opts: Rc = Rc::new(DefaultCompilerOpts::new(pname)) + .set_search_paths(&["resources/tests".to_string()]) + .set_dialect(AcceptedDialect { + stepping: Some(21), + strict: true, + }); + let parsed = parse_sexp(Srcloc::start(pname), prog.bytes()).expect("should parse"); + let mut includes = Vec::new(); + let res = preprocess(opts, &mut includes, parsed[0].clone()).expect("should preprocess"); + let expected_lines = &[ + "(defmac __chia__primitive__if (A B C) (qq (a (i (unquote A) (com (unquote B)) (com (unquote C))) @)))", + "(defun __chia__if (ARGS) (a (i (r (r (r ARGS))) (com (qq (a (i (unquote (f ARGS)) (com (unquote (f (r ARGS)))) (com (unquote (__chia__if (r (r ARGS)))))) @))) (com (qq (a (i (unquote (f ARGS)) (com (unquote (f (r ARGS)))) (com (unquote (f (r (r ARGS)))))) @)))) @))", + "(defmac if ARGS (__chia__if ARGS))", + "(defun __chia__compile-list (args) (a (i args (com (c 4 (c (f args) (c (__chia__compile-list (r args)) ())))) (com ())) @))", + "(defmac list ARGS (__chia__compile-list ARGS))", + "(defun-inline / (A B) (f (divmod A B)))", + "(defconstant *chialisp-version* 22)", + "(defconstant AGG_SIG_UNSAFE 49)", + "(defconstant AGG_SIG_ME 50)", + "(defconstant CREATE_COIN 51)", + "(defconstant RESERVE_FEE 52)", + "(defconstant CREATE_COIN_ANNOUNCEMENT 60)", + "(defconstant ASSERT_COIN_ANNOUNCEMENT 61)", + "(defconstant CREATE_PUZZLE_ANNOUNCEMENT 62)", + "(defconstant ASSERT_PUZZLE_ANNOUNCEMENT 63)", + "(defconstant ASSERT_MY_COIN_ID 70)", + "(defconstant ASSERT_MY_PARENT_ID 71)", + "(defconstant ASSERT_MY_PUZZLEHASH 72)", + "(defconstant ASSERT_MY_AMOUNT 73)", + "(defconstant ASSERT_SECONDS_RELATIVE 80)", + "(defconstant ASSERT_SECONDS_ABSOLUTE 81)", + "(defconstant ASSERT_HEIGHT_RELATIVE 82)", + "(defconstant ASSERT_HEIGHT_ABSOLUTE 83)", + "(defconstant ONE 1)", + "(defconstant TWO 2)", + "(defconstant A_KW 2)", + "(defconstant Q_KW 1)", + "(defconstant C_KW 4)", + "(defun-inline update-hash-for-parameter-hash (parameter-hash environment-hash) (sha256 TWO (sha256 ONE C_KW) (sha256 TWO (sha256 TWO (sha256 ONE Q_KW) parameter-hash) (sha256 TWO environment-hash (sha256 ONE ())))))", + "(defun build-curry-list (reversed-curry-parameter-hashes environment-hash) (a (i reversed-curry-parameter-hashes (com (build-curry-list (r reversed-curry-parameter-hashes) (update-hash-for-parameter-hash (f reversed-curry-parameter-hashes) environment-hash))) (com environment-hash)) @))", + "(defun-inline tree-hash-of-apply (function-hash environment-hash) (sha256 TWO (sha256 ONE A_KW) (sha256 TWO (sha256 TWO (sha256 ONE Q_KW) function-hash) (sha256 TWO environment-hash (sha256 ONE ())))))", + "(defun puzzle-hash-of-curried-function (function-hash . reversed-curry-parameter-hashes) (tree-hash-of-apply function-hash (build-curry-list reversed-curry-parameter-hashes (sha256 ONE ONE))))", + "()", + ]; + for (i, r) in res.iter().enumerate() { + assert_eq!(r.to_string(), expected_lines[i]); + } + assert_eq!(res.len(), expected_lines.len()); +}