diff --git a/src/cli/strings.ml b/src/cli/strings.ml index ae672c9..a721535 100644 --- a/src/cli/strings.ml +++ b/src/cli/strings.ml @@ -2,7 +2,7 @@ open! Core open Lwt.Infix open Lwt.Syntax -let version = "2.2.0" +let version = "2.2.1" let header = sprintf "/* Generated by okTurtles/strings v%s */\n\n" version @@ -104,8 +104,8 @@ let rec process_dir traversal ~path = function Lwt.return_unit in let on_error ~msg:_ = slow_parse () in - Parsing.Basic.exec_parser ~on_ok ~on_error Parsing.Pug.parser ~path ~language_name:"Pug" - source + Parsing.(Basic.exec_parser ~on_ok ~on_error (Pug.parser (Basic.make_string_parsers ()))) + ~path ~language_name:"Pug" source in collector) | { st_kind = S_REG; _ }, _, _ when String.is_suffix filename ~suffix:".html" -> @@ -229,6 +229,7 @@ let write_other ~outdir ~language english other = let main options = function | Debug lang -> + let string_parsers = Parsing.Basic.make_string_parsers () in Lwt_list.iter_s (fun path -> let* () = Lwt_io.printlf "\n>>> Debugging [%s]" path in @@ -254,8 +255,8 @@ let main options = function Vue.debug_template ~path [ Pug_native { parsed; length = None } ] template_script lang in let on_error ~msg:_ = slow_parse () in - Parsing.Basic.exec_parser ~on_ok ~on_error Parsing.Pug.parser ~path ~language_name:"Pug" - source) + Parsing.Basic.exec_parser ~on_ok ~on_error (Parsing.Pug.parser string_parsers) ~path + ~language_name:"Pug" source) | Html, _ when String.is_suffix path ~suffix:".html" -> let on_ok parsed = Vue.debug_template ~path [ Html { parsed; length = None } ] template_script lang diff --git a/src/cli/vue.ml b/src/cli/vue.ml index a067d5f..4271828 100644 --- a/src/cli/vue.ml +++ b/src/cli/vue.ml @@ -33,8 +33,7 @@ module Language = struct | Template (Template.HTML source) -> let on_ok parsed = Html { parsed; length = Some (String.length source) } in let on_error ~msg = Failed msg in - Parsing.Basic.exec_parser ~on_ok ~on_error Parsing.Html.parser ~path ~language_name:"HTML" source - |> Lwt.return + Basic.exec_parser ~on_ok ~on_error Html.parser ~path ~language_name:"HTML" source |> Lwt.return | Template (Template.PUG source) -> ( let slow_parse () = let collector = Utils.Collector.create ~path in @@ -46,7 +45,9 @@ module Language = struct | false -> let on_ok parsed = Pug_native { parsed; length = Some (String.length source) } |> Lwt.return in let on_error ~msg:_ = slow_parse () in - Basic.exec_parser ~on_ok ~on_error Pug.parser ~path ~language_name:"Pug" source) + Basic.exec_parser ~on_ok ~on_error + (Pug.parser (Basic.make_string_parsers ())) + ~path ~language_name:"Pug" source) | Script (Script.JS s) -> Js s |> Lwt.return | Script (Script.TS s) -> Ts s |> Lwt.return | Style (Style.CSS s) -> Css (String.length s) |> Lwt.return diff --git a/src/parsing/basic.ml b/src/parsing/basic.ml index 08bf077..c7b1bdf 100644 --- a/src/parsing/basic.ml +++ b/src/parsing/basic.ml @@ -54,7 +54,7 @@ let escapable_string_parser ~escape ~separator = let is_separator = Char.( = ) separator in let is_escape = Char.( = ) escape in let buf = Buffer.create 50 in - (char separator + char separator *> let rec loop escaping = any_char >>= fun x -> @@ -74,12 +74,25 @@ let escapable_string_parser ~escape ~separator = Buffer.add_char buf c; loop escaping in - loop false) - "Escapable string" + loop false + <|> ( return () >>= fun () -> + Buffer.clear buf; + fail "Invalid escapable string" ) + +let make_sq_string () = escapable_string_parser ~escape:'\\' ~separator:'\'' + +let make_dq_string () = escapable_string_parser ~escape:'\\' ~separator:'"' + +type string_parsers = { + sq_string: string Angstrom.t; + dq_string: string Angstrom.t; +} + +let make_string_parsers () = { sq_string = make_sq_string (); dq_string = make_dq_string () } let boundary_parsers tag = - let sq_string = escapable_string_parser ~escape:'\\' ~separator:'\'' in - let dq_string = escapable_string_parser ~escape:'\\' ~separator:'"' in + let sq_string = make_sq_string () in + let dq_string = make_dq_string () in let quoted_string = peek_char >>= function | Some '\'' -> sq_string @@ -96,7 +109,8 @@ let boundary_parsers tag = let ends = string " mlws *> string tag <* mlws <* char '>' in starts, ends -let block_parser (starts, ends) buf ~f = +let block_parser boundaries buf ~f = + let starts, ends = boundaries () in let line = take_remaining <* advance 1 >>| fun src_line -> Buffer.add_string buf src_line; diff --git a/src/parsing/basic.mli b/src/parsing/basic.mli new file mode 100644 index 0000000..42374a9 --- /dev/null +++ b/src/parsing/basic.mli @@ -0,0 +1,63 @@ +open! Core + +val lowercase : char -> bool + +val alphanum : char -> bool + +val is_identifier : char -> bool + +val is_ws : char -> bool + +val is_mlws : char -> bool + +val ws : unit Angstrom.t + +val ws1 : unit Angstrom.t + +val mlws : unit Angstrom.t + +val mlws1 : unit Angstrom.t + +val take_remaining : string Angstrom.t + +val skip_remaining : unit Angstrom.t + +val maybe : 'a Angstrom.t -> 'a option Angstrom.t + +val make_sq_string : unit -> string Angstrom.t + +val make_dq_string : unit -> string Angstrom.t + +type string_parsers = { + sq_string: string Angstrom.t; + dq_string: string Angstrom.t; +} + +val make_string_parsers : unit -> string_parsers + +val boundary_parsers : string -> (string, string option) Core.Tuple2.t list Angstrom.t * string Angstrom.t + +val block_parser : + (unit -> 'a Angstrom.t * 'b Angstrom.t) -> Buffer.t -> f:(string -> 'a -> 'c) -> 'c Angstrom.t + +val default_error_handler : path:string -> language_name:string -> unparsed:string -> 'a + +val default_syntax_error_handler : path:string -> language_name:string -> msg:string -> 'a + +val exec_parser : + on_ok:('a -> 'b) -> + ?on_error:(msg:string -> 'b) -> + 'a Angstrom.t -> + path:string -> + language_name:string -> + string -> + 'b + +val exec_parser_lwt : + on_ok:('a -> 'b Lwt.t) -> + ?on_error:(unparsed:string -> 'a option -> 'b Lwt.t) -> + 'a Angstrom.t -> + path:string -> + language_name:string -> + Lwt_io.input_channel -> + 'b Lwt.t diff --git a/src/parsing/html.mli b/src/parsing/html.mli index 432e495..6cd32f0 100644 --- a/src/parsing/html.mli +++ b/src/parsing/html.mli @@ -1,3 +1,7 @@ open! Core -include S.Parser +type t [@@deriving sexp_of] + +val collect : Utils.Collector.t -> t -> unit + +val parser : t Angstrom.t diff --git a/src/parsing/pug.ml b/src/parsing/pug.ml index 2371731..5258b74 100644 --- a/src/parsing/pug.ml +++ b/src/parsing/pug.ml @@ -84,7 +84,7 @@ let rollup (lines : lines) = let lvl = List.hd lines |> Option.value_map ~default:0 ~f:fst in loop lvl [] None lines |> fst3 |> Array.of_list_rev -let parser = +let parser Basic.{ sq_string; dq_string } = let open Angstrom in let open Basic in let comments = string "//" *> skip_remaining in @@ -93,10 +93,8 @@ let parser = let mlblank = sep_by comments mlws in let mlblank1 = sep_by1 comments mlws1 in let pug_string = - let single_quoted_string = escapable_string_parser ~escape:'\\' ~separator:'\'' in - let double_quoted_string = escapable_string_parser ~escape:'\\' ~separator:'"' in let unquoted_string = take_while1 is_identifier in - choice [ single_quoted_string; double_quoted_string; unquoted_string ] + choice [ sq_string; dq_string; unquoted_string ] in let symbols ll = ll |> List.map ~f:string |> choice in diff --git a/src/parsing/pug.mli b/src/parsing/pug.mli index 432e495..a186e35 100644 --- a/src/parsing/pug.mli +++ b/src/parsing/pug.mli @@ -1,3 +1,7 @@ open! Core -include S.Parser +type t [@@deriving sexp_of] + +val collect : Utils.Collector.t -> t -> unit + +val parser : Basic.string_parsers -> t Angstrom.t diff --git a/src/parsing/s.ml b/src/parsing/s.ml deleted file mode 100644 index c79aa37..0000000 --- a/src/parsing/s.ml +++ /dev/null @@ -1,9 +0,0 @@ -open! Core - -module type Parser = sig - type t [@@deriving sexp_of] - - val collect : Utils.Collector.t -> t -> unit - - val parser : t Angstrom.t -end diff --git a/src/parsing/script.ml b/src/parsing/script.ml index 7685d42..ca9009b 100644 --- a/src/parsing/script.ml +++ b/src/parsing/script.ml @@ -5,7 +5,7 @@ type raw = | TS of string [@@deriving sexp, yojson] -let boundaries = +let boundaries () = let open Angstrom in let starts, ends = Basic.boundary_parsers "script" in let starts = diff --git a/src/parsing/strings.ml b/src/parsing/strings.ml index 27db865..1c2e7e8 100644 --- a/src/parsing/strings.ml +++ b/src/parsing/strings.ml @@ -4,15 +4,14 @@ type line = | Translation of (string * string) | Comment -let parser = +let parser ~dq_string = let open Angstrom in let open Basic in - let double_quoted_string = escapable_string_parser ~escape:'\\' ~separator:'"' in let line = lift2 (fun x y -> Translation (x, y)) - (mlws *> double_quoted_string <* mlws <* char '=') - (mlws *> double_quoted_string <* mlws <* char ';' <* mlws) + (mlws *> dq_string <* mlws <* char '=') + (mlws *> dq_string <* mlws <* char ';' <* mlws) in let comment = (mlws @@ -44,9 +43,10 @@ let parse ~path ic = (String.take_while ~f:(Char.( <> ) '\n') unparsed) () in + let dq_string = Basic.make_dq_string () in let+ lines = - Basic.exec_parser_lwt ~on_ok:Lwt.return ~on_error:error_handler parser ~path ~language_name:".strings" - ic + Basic.exec_parser_lwt ~on_ok:Lwt.return ~on_error:error_handler (parser ~dq_string) ~path + ~language_name:".strings" ic in List.iter lines ~f:(function | Translation (x, y) -> String.Table.set table ~key:x ~data:y diff --git a/src/parsing/style.ml b/src/parsing/style.ml index 60edd95..0a4aff5 100644 --- a/src/parsing/style.ml +++ b/src/parsing/style.ml @@ -2,6 +2,6 @@ open! Core type raw = CSS of string [@@deriving sexp, yojson] -let boundaries = Basic.boundary_parsers "style" +let boundaries () = Basic.boundary_parsers "style" let parser buf = Basic.block_parser boundaries buf ~f:(fun raw _attrs -> CSS raw) diff --git a/src/parsing/template.ml b/src/parsing/template.ml index 6708256..fadca87 100644 --- a/src/parsing/template.ml +++ b/src/parsing/template.ml @@ -5,7 +5,7 @@ type raw = | PUG of string [@@deriving sexp, yojson] -let boundaries = +let boundaries () = let open Angstrom in let starts, ends = Basic.boundary_parsers "template" in let starts =