Skip to content

Commit

Permalink
feat: Optimize tokenizer in javascript
Browse files Browse the repository at this point in the history
Tokenizer is no longer being compiled to a
recursive function. Which means that we no longer
depend on the JS engine doing tail call optimization
correctly. Which it usually didn't.
  • Loading branch information
Olian04 committed Jul 9, 2024
1 parent e73a175 commit 5a5f2e7
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 96 deletions.
1 change: 1 addition & 0 deletions gleam.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ version = "3.0.3"
description = "Handles is a templating language written in pure Gleam. Heavily inspired by Mustache and Handlebars.js"
licences = ["MIT"]
repository = { type = "github", user = "olian04", repo = "gleam_handles" }
gleam = ">= 1.0.0"

[dependencies]
gleam_stdlib = ">= 0.38.0 and < 1.0.0"
Expand Down
6 changes: 3 additions & 3 deletions manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
# You typically do not need to edit this file

packages = [
{ name = "gleam_stdlib", version = "0.38.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "663CF11861179AF415A625307447775C09404E752FF99A24E2057C835319F1BE" },
{ name = "gleeunit", version = "1.1.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "72CDC3D3F719478F26C4E2C5FED3E657AC81EC14A47D2D2DEBB8693CA3220C3B" },
{ name = "gleam_stdlib", version = "0.39.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "2D7DE885A6EA7F1D5015D1698920C9BAF7241102836CE0C3837A4F160128A9C4" },
{ name = "gleeunit", version = "1.2.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "F7A7228925D3EE7D0813C922E062BFD6D7E9310F0BEE585D3A42F3307E3CFD13" },
]

[requirements]
gleam_stdlib = { version = ">= 0.38.0 and < 1.0.0" }
gleeunit = { version = ">= 1.1.2 and < 2.0.0"}
gleeunit = { version = ">= 1.1.2 and < 2.0.0" }
216 changes: 123 additions & 93 deletions src/handles/internal/tokenizer.gleam
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import gleam/list
import gleam/pair
import gleam/result
import gleam/string
import handles/error
Expand All @@ -15,6 +16,12 @@ pub type Token {
EachBlockEnd(index: Int)
}

type Action {
AddToken(String, Int, Token)
Stop(error.TokenizerError)
Done
}

/// {{
const length_of_open_tag_syntax = 2

Expand Down Expand Up @@ -54,123 +61,146 @@ fn capture_tag_body(
})
}

pub fn run(
input: String,
index: Int,
tokens: List(Token),
) -> Result(List(Token), error.TokenizerError) {
fn tokenize(input: String, index: Int) -> Action {
case input {
"{{>" <> rest -> {
use #(body, rest) <- result.try(capture_tag_body(rest, index))
case split_body(body) {
[] -> Error(error.MissingPartialId(index + length_of_open_tag_syntax))
[_] -> Error(error.MissingArgument(index + length_of_open_tag_syntax))
[id, arg] ->
run(rest, index + string.length("{{>}}") + string.length(body), [
Partial(index + length_of_open_tag_syntax, id, split_arg(arg)),
..tokens
])
_ ->
Error(error.UnexpectedMultipleArguments(
index + length_of_open_tag_syntax,
))
"" -> Done
"{{>" <> rest ->
case capture_tag_body(rest, index) {
Error(err) -> Stop(err)
Ok(#(body, rest)) ->
case split_body(body) {
[] ->
Stop(error.MissingPartialId(index + length_of_open_tag_syntax))
[_] ->
Stop(error.MissingArgument(index + length_of_open_tag_syntax))
[id, arg] ->
AddToken(
rest,
index + length_of_block_syntax + string.length(body),
Partial(index + length_of_open_tag_syntax, id, split_arg(arg)),
)
_ ->
Stop(error.UnexpectedMultipleArguments(
index + length_of_open_tag_syntax,
))
}
}
}

"{{#" <> rest -> {
use #(body, rest) <- result.try(capture_tag_body(rest, index))
case split_body(body) {
[] -> Error(error.MissingBlockKind(index + length_of_open_tag_syntax))
[_] -> Error(error.MissingArgument(index + length_of_open_tag_syntax))
[kind, arg] ->
case kind {
"if" ->
run(rest, index + length_of_block_syntax + string.length(body), [
"{{#" <> rest ->
case capture_tag_body(rest, index) {
Error(err) -> Stop(err)
Ok(#(body, rest)) ->
case split_body(body) {
[] ->
Stop(error.MissingBlockKind(index + length_of_open_tag_syntax))
[_] ->
Stop(error.MissingArgument(index + length_of_open_tag_syntax))
["if", arg] ->
AddToken(
rest,
index + length_of_block_syntax + string.length(body),
IfBlockStart(index + length_of_open_tag_syntax, split_arg(arg)),
..tokens
])
"unless" ->
run(rest, index + length_of_block_syntax + string.length(body), [
)
["unless", arg] ->
AddToken(
rest,
index + length_of_block_syntax + string.length(body),
UnlessBlockStart(
index + length_of_open_tag_syntax,
split_arg(arg),
),
..tokens
])
"each" ->
run(rest, index + length_of_block_syntax + string.length(body), [
)
["each", arg] ->
AddToken(
rest,
index + length_of_block_syntax + string.length(body),
EachBlockStart(
index + length_of_open_tag_syntax,
split_arg(arg),
),
..tokens
])
_ -> Error(error.UnexpectedBlockKind(index))
)
[_, _] ->
Stop(error.UnexpectedBlockKind(index + length_of_open_tag_syntax))
_ ->
Stop(error.UnexpectedMultipleArguments(
index + length_of_open_tag_syntax,
))
}
_ ->
Error(error.UnexpectedMultipleArguments(
index + length_of_open_tag_syntax,
))
}
}

"{{/" <> rest -> {
use #(body, rest) <- result.try(capture_tag_body(rest, index))
case split_body(body) {
[] -> Error(error.MissingBlockKind(index + length_of_open_tag_syntax))
[_, _] ->
Error(error.UnexpectedArgument(index + length_of_open_tag_syntax))
[kind] ->
case kind {
"if" ->
run(rest, index + length_of_block_syntax + string.length(body), [
"{{/" <> rest ->
case capture_tag_body(rest, index) {
Error(err) -> Stop(err)
Ok(#(body, rest)) ->
case split_body(body) {
[] ->
Stop(error.MissingBlockKind(index + length_of_open_tag_syntax))
[_, _] ->
Stop(error.UnexpectedArgument(index + length_of_open_tag_syntax))
["if"] ->
AddToken(
rest,
index + length_of_block_syntax + string.length(body),
IfBlockEnd(index + length_of_open_tag_syntax),
..tokens
])
"unless" ->
run(rest, index + length_of_block_syntax + string.length(body), [
)
["unless"] ->
AddToken(
rest,
index + length_of_block_syntax + string.length(body),
UnlessBlockEnd(index + length_of_open_tag_syntax),
..tokens
])
"each" ->
run(rest, index + length_of_block_syntax + string.length(body), [
)
["each"] ->
AddToken(
rest,
index + length_of_block_syntax + string.length(body),
EachBlockEnd(index + length_of_open_tag_syntax),
..tokens
])
_ -> Error(error.UnexpectedBlockKind(index))
)
[_] ->
Stop(error.UnexpectedBlockKind(index + length_of_open_tag_syntax))
_ ->
Stop(error.UnexpectedArgument(index + length_of_open_tag_syntax))
}
_ -> Error(error.UnexpectedArgument(index + length_of_open_tag_syntax))
}
}

"{{" <> rest -> {
use #(body, rest) <- result.try(capture_tag_body(rest, index))
case split_body(body) {
[] -> Error(error.MissingArgument(index + length_of_open_tag_syntax))
[arg] ->
run(rest, index + length_of_property_syntax + string.length(body), [
Property(index + length_of_open_tag_syntax, split_arg(arg)),
..tokens
])
_ ->
Error(error.UnexpectedMultipleArguments(
index + length_of_open_tag_syntax,
))
"{{" <> rest ->
case capture_tag_body(rest, index) {
Error(err) -> Stop(err)
Ok(#(body, rest)) ->
case split_body(body) {
[] -> Stop(error.MissingArgument(index + length_of_open_tag_syntax))
[arg] ->
AddToken(
rest,
index + length_of_property_syntax + string.length(body),
Property(index + length_of_open_tag_syntax, split_arg(arg)),
)
_ ->
Stop(error.UnexpectedMultipleArguments(
index + length_of_open_tag_syntax,
))
}
}
}

_ ->
case input |> string.split_once("{{") {
case
input
|> string.split_once("{{")
|> result.map(pair.map_second(_, fn(it) { "{{" <> it }))
{
Ok(#(str, rest)) ->
run("{{" <> rest, index + string.length(str), [
Constant(index, str),
..tokens
])
_ ->
case input {
"" -> Ok(list.reverse(tokens))
str -> Ok(list.reverse([Constant(index, str), ..tokens]))
}
AddToken(rest, index + string.length(str), Constant(index, str))
_ -> AddToken("", index + string.length(input), Constant(index, input))
}
}
}

pub fn run(
input: String,
index: Int,
tokens: List(Token),
) -> Result(List(Token), error.TokenizerError) {
case tokenize(input, index) {
Done -> Ok(list.reverse(tokens))
Stop(err) -> Error(err)
AddToken(rest, index, token) -> run(rest, index, [token, ..tokens])
}
}

0 comments on commit 5a5f2e7

Please sign in to comment.