Skip to content

Commit

Permalink
Run the pre-precessor once before building the first parser checkpoint
Browse files Browse the repository at this point in the history
This completely removes the need to handle cases of dummy left-most
positions in the source overlay module.
  • Loading branch information
nberth committed Sep 22, 2023
1 parent e187bb8 commit 10628f2
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 34 deletions.
19 changes: 12 additions & 7 deletions src/lsp/cobol_parser/parser_engine.ml
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ module Make (Config: Cobol_config.T) = struct

(* --- *)

let do_parse: type m. m state -> _ -> _ * m state =
let do_parse: type m. m state -> _ -> _ -> _ * m state =

let rec next_tokens ({ preproc = { tokzr; _ }; _ } as ps) tokens =
match Tokzr.next_token tokzr tokens with
Expand Down Expand Up @@ -424,16 +424,22 @@ module Make (Config: Cobol_config.T) = struct
Some v, ps

in
fun ps c -> normal ps [] c
fun ps tokens c -> normal ps tokens c

let parse ?verbose ?show ~recovery
(type m) ~(memory: m memory) pp checkpoint
(type m) ~(memory: m memory) pp make_checkpoint
: ('a option, m) output * _ =
let ps = init_parser ?verbose ?show ~recovery
~tokenizer_memory:memory pp in
let res, ps =
(* TODO: catch in a deeper context to grab parsed tokens *)
try do_parse ps checkpoint with e -> None, add_diag (DIAGS.of_exn e) ps
let ps, tokens = produce_tokens ps in
let first_pos = match tokens with
| [] -> Cobol_preproc.position ps.preproc.pp
| t :: _ -> Cobol_common.Srcloc.start_pos ~@t
in
try do_parse ps tokens (make_checkpoint first_pos)
with e -> None, add_diag (DIAGS.of_exn e) ps
in
match memory with
| Amnesic ->
Expand Down Expand Up @@ -480,9 +486,8 @@ let parse
init_source_format = source_format}
in
let module P = Make (val config) in
P.parse ?verbose ?show ~memory ~recovery pp @@
Grammar.Incremental.compilation_group @@
Cobol_preproc.position pp
P.parse ?verbose ?show ~memory ~recovery pp
Grammar.Incremental.compilation_group
end
in
{
Expand Down
38 changes: 11 additions & 27 deletions src/lsp/cobol_preproc/src_overlay.ml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ type manager =
over_right_gap: limit Links.t; (** associates the right limit of a token to
the left limit of the next *)
cache: (srcloc * limit) Links.t;
leftmost_in_file: (string, limit) Hashtbl.t;
id: string; (** manager identifier (for logging/debugging) *)
}

Expand All @@ -80,37 +79,25 @@ let new_manager: string -> manager =
right_of = Links.create 42;
over_right_gap = Links.create 42;
cache = Links.create 42;
leftmost_in_file = Hashtbl.create 3;
id = Pretty.to_string "%s-%u" manager_name !id;
}

(** Returns left and right (potentially fresh) limits for the given source
location; for any given file, must be called with the leftmost location
first. *)
(* TODO: try to see whether registering the leftmost location in each file could
be done more efficiently wihtout a membership test on each new location (but
the pre-processor does not provide change-of-file info to the parser). *)
let limits: manager -> srcloc -> limit * limit = fun ctx loc ->
let s, e = match Cobol_common.Srcloc.as_unique_lexloc loc with
| Some lexloc -> lexloc
| _ -> Limit.make_virtual (), Limit.make_virtual ()
in
Links.replace ctx.right_of s (loc, e); (* replace to deal with duplicates *)
if not (Hashtbl.mem ctx.leftmost_in_file s.pos_fname)
then Hashtbl.add ctx.leftmost_in_file s.pos_fname s;
s, e

(** Links token limits *)
let link_limits ctx left right =
(* Replace to deal with overriding of limits during recovery. *)
Links.replace ctx.over_right_gap left right

(** [leftmost_limit_in ~filename ctx] finds the leftmost limit from a location
in [filename] that is registered in [ctx] (internal). Use with moderation
as this is quite inefficient. *)
let leftmost_limit_in ~filename ctx =
Hashtbl.find_opt ctx.leftmost_in_file filename

(** Returns a source location that spans between two given limits; returns a
valid pointwise location if the two given limits are physically equal. *)
let join_limits: manager -> limit * limit -> srcloc = fun ctx (s, e) ->
Expand All @@ -124,7 +111,7 @@ let join_limits: manager -> limit * limit -> srcloc = fun ctx (s, e) ->
in
Cobol_common.Srcloc.raw (pos, pos)
in
let try_limits (s, e) =
let try_limits (s, e: limit * limit) =

let rec proceed_from ?loc s = (* start search from left limit [s] *)
check ?loc @@ Links.find ctx.right_of s
Expand All @@ -137,7 +124,8 @@ let join_limits: manager -> limit * limit -> srcloc = fun ctx (s, e) ->
| None -> loc'
| Some loc -> Cobol_common.Srcloc.concat loc loc'
in
if e == e' (* physical comparison *)
if e.pos_cnum = e'.pos_cnum && (* compare only the fields that matter *)
e.pos_fname = e'.pos_fname
then (Links.replace ctx.cache s (loc, e); loc)
else try_cache_from ~loc @@ Links.find ctx.over_right_gap e'

Expand All @@ -163,18 +151,14 @@ let join_limits: manager -> limit * limit -> srcloc = fun ctx (s, e) ->
(* Printexc.(print_raw_backtrace Stdlib.stderr @@ get_callstack 10); *)
loc
in
(* first attempt assumes proper token limits: `s` is a left and `e` is a right
of tokens *)
try try_limits (s, e) with Not_found ->
(* otherwise try assuming `s` is an end of token *)
try try_limits (Links.find ctx.over_right_gap s, e) with Not_found ->
if s.pos_cnum = 0 (* potential special case with left-position forged by the
parser: retry with leftmost limit if it differs from
s *)
then match leftmost_limit_in ~filename:s.pos_fname ctx with
| Some l when l != s -> try_limits (l, e) (* physical equality is enough *)
| Some _ | None -> join_failure (s, e)
else join_failure (s, e)
try (* first attempt assumes proper token limits: `s` is a left and `e` is a
right of tokens *)
try_limits (s, e)
with Not_found ->
try (* otherwise try assuming `s` is an end of token *)
try_limits (Links.find ctx.over_right_gap s, e)
with Not_found ->
join_failure (s, e)

module New_manager (Id: sig val name: string end) : MANAGER = struct
let ctx = new_manager Id.name
Expand Down

0 comments on commit 10628f2

Please sign in to comment.