From 5a5f2e728e605b4fa5e4f4d48514a47e906ded09 Mon Sep 17 00:00:00 2001 From: Oliver Linnarsson Date: Tue, 9 Jul 2024 21:47:10 +0200 Subject: [PATCH] feat: Optimize tokenizer in javascript Tokenizer is no longer being compiled to a recursive function. Which means that we no longer depend on the JS engine doing tail call optimization correctly. Which it usually didn't. --- gleam.toml | 1 + manifest.toml | 6 +- src/handles/internal/tokenizer.gleam | 216 +++++++++++++++------------ 3 files changed, 127 insertions(+), 96 deletions(-) diff --git a/gleam.toml b/gleam.toml index 1515336..b22624b 100644 --- a/gleam.toml +++ b/gleam.toml @@ -4,6 +4,7 @@ version = "3.0.3" description = "Handles is a templating language written in pure Gleam. Heavily inspired by Mustache and Handlebars.js" licences = ["MIT"] repository = { type = "github", user = "olian04", repo = "gleam_handles" } +gleam = ">= 1.0.0" [dependencies] gleam_stdlib = ">= 0.38.0 and < 1.0.0" diff --git a/manifest.toml b/manifest.toml index 38d2477..5c4e10a 100644 --- a/manifest.toml +++ b/manifest.toml @@ -2,10 +2,10 @@ # You typically do not need to edit this file packages = [ - { name = "gleam_stdlib", version = "0.38.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "663CF11861179AF415A625307447775C09404E752FF99A24E2057C835319F1BE" }, - { name = "gleeunit", version = "1.1.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "72CDC3D3F719478F26C4E2C5FED3E657AC81EC14A47D2D2DEBB8693CA3220C3B" }, + { name = "gleam_stdlib", version = "0.39.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "2D7DE885A6EA7F1D5015D1698920C9BAF7241102836CE0C3837A4F160128A9C4" }, + { name = "gleeunit", version = "1.2.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "F7A7228925D3EE7D0813C922E062BFD6D7E9310F0BEE585D3A42F3307E3CFD13" }, ] [requirements] gleam_stdlib = { version = ">= 0.38.0 and < 1.0.0" } -gleeunit = { version = ">= 1.1.2 and < 2.0.0"} +gleeunit = { version = ">= 1.1.2 and < 2.0.0" } diff --git a/src/handles/internal/tokenizer.gleam b/src/handles/internal/tokenizer.gleam index 4d4bb2a..94bb08f 100644 --- a/src/handles/internal/tokenizer.gleam +++ b/src/handles/internal/tokenizer.gleam @@ -1,4 +1,5 @@ import gleam/list +import gleam/pair import gleam/result import gleam/string import handles/error @@ -15,6 +16,12 @@ pub type Token { EachBlockEnd(index: Int) } +type Action { + AddToken(String, Int, Token) + Stop(error.TokenizerError) + Done +} + /// {{ const length_of_open_tag_syntax = 2 @@ -54,123 +61,146 @@ fn capture_tag_body( }) } -pub fn run( - input: String, - index: Int, - tokens: List(Token), -) -> Result(List(Token), error.TokenizerError) { +fn tokenize(input: String, index: Int) -> Action { case input { - "{{>" <> rest -> { - use #(body, rest) <- result.try(capture_tag_body(rest, index)) - case split_body(body) { - [] -> Error(error.MissingPartialId(index + length_of_open_tag_syntax)) - [_] -> Error(error.MissingArgument(index + length_of_open_tag_syntax)) - [id, arg] -> - run(rest, index + string.length("{{>}}") + string.length(body), [ - Partial(index + length_of_open_tag_syntax, id, split_arg(arg)), - ..tokens - ]) - _ -> - Error(error.UnexpectedMultipleArguments( - index + length_of_open_tag_syntax, - )) + "" -> Done + "{{>" <> rest -> + case capture_tag_body(rest, index) { + Error(err) -> Stop(err) + Ok(#(body, rest)) -> + case split_body(body) { + [] -> + Stop(error.MissingPartialId(index + length_of_open_tag_syntax)) + [_] -> + Stop(error.MissingArgument(index + length_of_open_tag_syntax)) + [id, arg] -> + AddToken( + rest, + index + length_of_block_syntax + string.length(body), + Partial(index + length_of_open_tag_syntax, id, split_arg(arg)), + ) + _ -> + Stop(error.UnexpectedMultipleArguments( + index + length_of_open_tag_syntax, + )) + } } - } - "{{#" <> rest -> { - use #(body, rest) <- result.try(capture_tag_body(rest, index)) - case split_body(body) { - [] -> Error(error.MissingBlockKind(index + length_of_open_tag_syntax)) - [_] -> Error(error.MissingArgument(index + length_of_open_tag_syntax)) - [kind, arg] -> - case kind { - "if" -> - run(rest, index + length_of_block_syntax + string.length(body), [ + "{{#" <> rest -> + case capture_tag_body(rest, index) { + Error(err) -> Stop(err) + Ok(#(body, rest)) -> + case split_body(body) { + [] -> + Stop(error.MissingBlockKind(index + length_of_open_tag_syntax)) + [_] -> + Stop(error.MissingArgument(index + length_of_open_tag_syntax)) + ["if", arg] -> + AddToken( + rest, + index + length_of_block_syntax + string.length(body), IfBlockStart(index + length_of_open_tag_syntax, split_arg(arg)), - ..tokens - ]) - "unless" -> - run(rest, index + length_of_block_syntax + string.length(body), [ + ) + ["unless", arg] -> + AddToken( + rest, + index + length_of_block_syntax + string.length(body), UnlessBlockStart( index + length_of_open_tag_syntax, split_arg(arg), ), - ..tokens - ]) - "each" -> - run(rest, index + length_of_block_syntax + string.length(body), [ + ) + ["each", arg] -> + AddToken( + rest, + index + length_of_block_syntax + string.length(body), EachBlockStart( index + length_of_open_tag_syntax, split_arg(arg), ), - ..tokens - ]) - _ -> Error(error.UnexpectedBlockKind(index)) + ) + [_, _] -> + Stop(error.UnexpectedBlockKind(index + length_of_open_tag_syntax)) + _ -> + Stop(error.UnexpectedMultipleArguments( + index + length_of_open_tag_syntax, + )) } - _ -> - Error(error.UnexpectedMultipleArguments( - index + length_of_open_tag_syntax, - )) } - } - "{{/" <> rest -> { - use #(body, rest) <- result.try(capture_tag_body(rest, index)) - case split_body(body) { - [] -> Error(error.MissingBlockKind(index + length_of_open_tag_syntax)) - [_, _] -> - Error(error.UnexpectedArgument(index + length_of_open_tag_syntax)) - [kind] -> - case kind { - "if" -> - run(rest, index + length_of_block_syntax + string.length(body), [ + "{{/" <> rest -> + case capture_tag_body(rest, index) { + Error(err) -> Stop(err) + Ok(#(body, rest)) -> + case split_body(body) { + [] -> + Stop(error.MissingBlockKind(index + length_of_open_tag_syntax)) + [_, _] -> + Stop(error.UnexpectedArgument(index + length_of_open_tag_syntax)) + ["if"] -> + AddToken( + rest, + index + length_of_block_syntax + string.length(body), IfBlockEnd(index + length_of_open_tag_syntax), - ..tokens - ]) - "unless" -> - run(rest, index + length_of_block_syntax + string.length(body), [ + ) + ["unless"] -> + AddToken( + rest, + index + length_of_block_syntax + string.length(body), UnlessBlockEnd(index + length_of_open_tag_syntax), - ..tokens - ]) - "each" -> - run(rest, index + length_of_block_syntax + string.length(body), [ + ) + ["each"] -> + AddToken( + rest, + index + length_of_block_syntax + string.length(body), EachBlockEnd(index + length_of_open_tag_syntax), - ..tokens - ]) - _ -> Error(error.UnexpectedBlockKind(index)) + ) + [_] -> + Stop(error.UnexpectedBlockKind(index + length_of_open_tag_syntax)) + _ -> + Stop(error.UnexpectedArgument(index + length_of_open_tag_syntax)) } - _ -> Error(error.UnexpectedArgument(index + length_of_open_tag_syntax)) } - } - "{{" <> rest -> { - use #(body, rest) <- result.try(capture_tag_body(rest, index)) - case split_body(body) { - [] -> Error(error.MissingArgument(index + length_of_open_tag_syntax)) - [arg] -> - run(rest, index + length_of_property_syntax + string.length(body), [ - Property(index + length_of_open_tag_syntax, split_arg(arg)), - ..tokens - ]) - _ -> - Error(error.UnexpectedMultipleArguments( - index + length_of_open_tag_syntax, - )) + "{{" <> rest -> + case capture_tag_body(rest, index) { + Error(err) -> Stop(err) + Ok(#(body, rest)) -> + case split_body(body) { + [] -> Stop(error.MissingArgument(index + length_of_open_tag_syntax)) + [arg] -> + AddToken( + rest, + index + length_of_property_syntax + string.length(body), + Property(index + length_of_open_tag_syntax, split_arg(arg)), + ) + _ -> + Stop(error.UnexpectedMultipleArguments( + index + length_of_open_tag_syntax, + )) + } } - } - _ -> - case input |> string.split_once("{{") { + case + input + |> string.split_once("{{") + |> result.map(pair.map_second(_, fn(it) { "{{" <> it })) + { Ok(#(str, rest)) -> - run("{{" <> rest, index + string.length(str), [ - Constant(index, str), - ..tokens - ]) - _ -> - case input { - "" -> Ok(list.reverse(tokens)) - str -> Ok(list.reverse([Constant(index, str), ..tokens])) - } + AddToken(rest, index + string.length(str), Constant(index, str)) + _ -> AddToken("", index + string.length(input), Constant(index, input)) } } } + +pub fn run( + input: String, + index: Int, + tokens: List(Token), +) -> Result(List(Token), error.TokenizerError) { + case tokenize(input, index) { + Done -> Ok(list.reverse(tokens)) + Stop(err) -> Error(err) + AddToken(rest, index, token) -> run(rest, index, [token, ..tokens]) + } +}