Skip to content

Commit fa0abb5

Browse files
committed
Finishing touches to the rewindable parser, with some tests
1 parent b5ec9a5 commit fa0abb5

18 files changed

+1634
-110
lines changed

src/lsp/cobol_lsp/lsp_document.ml

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -169,18 +169,9 @@ let reparse_and_analyze ?position ({ copybook; rewinder; textdoc; _ } as doc) =
169169
{ doc with artifacts = no_artifacts; rewinder = None; parsed = None }
170170
| Some position, Some rewinder ->
171171
extract_parsed_infos doc @@
172-
Cobol_parser.rewind_and_parse rewinder ~position
173-
begin fun ?new_position pp ->
174-
let contents = Lsp.Text_document.text textdoc in
175-
let contents = match new_position with
176-
| None -> contents
177-
| Some (Lexing.{ pos_cnum; _ } as _pos) ->
178-
EzString.after contents (pos_cnum - 1)
179-
in
180-
(* Pretty.error "contents = %S@." contents; *)
181-
Cobol_preproc.reset_preprocessor ?new_position pp
182-
(String { contents; filename = Lsp.Uri.to_path (uri doc) })
183-
end
172+
Cobol_parser.rewind_and_parse rewinder ~position @@
173+
Cobol_preproc.reset_preprocessor_for_string @@
174+
Lsp.Text_document.text textdoc
184175

185176
(** Creates a record for a document that is not yet parsed or analyzed. *)
186177
let blank ~project ?copybook textdoc =

src/lsp/cobol_parser/parser_engine.ml

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -519,9 +519,12 @@ module Make (Config: Cobol_config.T) = struct
519519
and ('a, 'm) rewindable_history_event =
520520
{
521521
preproc_position: Lexing.position;
522-
event_stage: ('a, 'm) interim_stage;
522+
event_stage: ('a, 'm) interim_stage_without_tokens;
523523
}
524524

525+
and ('a, 'm) interim_stage_without_tokens =
526+
'm state * 'a Grammar_interpr.env (* Always valid input_needed env. *)
527+
525528
let init_rewindable_parse ps ~make_checkpoint =
526529
{
527530
init = ps;
@@ -530,17 +533,23 @@ module Make (Config: Cobol_config.T) = struct
530533
}
531534

532535
(** Stores a stage as part of the memorized rewindable history events. *)
533-
let save_history_event ((ps, _, _) as stage) (store: _ rewindable_history) =
536+
let save_interim_stage (ps, _, env) (store: _ rewindable_history) =
534537
let preproc_position = Cobol_preproc.position ps.preproc.pp in
535538
match store with
539+
| store'
540+
when preproc_position.pos_cnum <> preproc_position.pos_bol ->
541+
(* We must only save positions that correspond to beginning of lines;
542+
this should only make us skip recording events at the end of
543+
inputs. *)
544+
store'
536545
| { preproc_position = prev_pos; _ } :: store'
537546
when prev_pos.pos_cnum = preproc_position.pos_cnum &&
538547
prev_pos.pos_fname = preproc_position.pos_fname ->
539548
(* Preprocessor did not advance further since last save: replace event
540549
with new parser state: *)
541-
{ preproc_position; event_stage = stage } :: store'
550+
{ preproc_position; event_stage = (ps, env) } :: store'
542551
| store' ->
543-
{ preproc_position; event_stage = stage } :: store'
552+
{ preproc_position; event_stage = (ps, env) } :: store'
544553

545554
let rewindable_parser_state = function
546555
| { stage = Final (_, ps) | Trans (ps, _, _); _ } -> ps
@@ -560,7 +569,7 @@ module Make (Config: Cobol_config.T) = struct
560569
| Trans ((ps, _, _) as state) ->
561570
let store, count =
562571
if count = save_stage then store, succ count
563-
else save_history_event state store, 0
572+
else save_interim_stage state store, 0
564573
and stage =
565574
try on_interim_stage state with e -> on_exn ps e
566575
in
@@ -574,15 +583,23 @@ module Make (Config: Cobol_config.T) = struct
574583
pos
575584
| Indexed { line; char } ->
576585
let ps = rewindable_parser_state rwps in
577-
let lexpos = Cobol_preproc.position ps.preproc.pp in
578586
let newline_cnums = Cobol_preproc.newline_cnums ps.preproc.pp in
579-
let pos_bol =
580-
try List.nth newline_cnums (line - 1)
581-
with Not_found | Invalid_argument _ -> 0
582-
in
583-
Lexing.{ lexpos with pos_bol;
584-
pos_cnum = pos_bol + char;
585-
pos_lnum = line + 1 }
587+
if newline_cnums = []
588+
then raise Not_found (* no complete line was processed yet; just skip *)
589+
else
590+
let lexpos = Cobol_preproc.position ps.preproc.pp in
591+
try
592+
let pos_bol =
593+
try List.nth newline_cnums (line - 1)
594+
with Not_found | Invalid_argument _ -> 0
595+
in
596+
Lexing.{ lexpos with pos_bol;
597+
pos_cnum = pos_bol + char;
598+
pos_lnum = line + 1 }
599+
with Failure _ ->
600+
(* The given line exceeds what was already processed, so we restart
601+
from the current preprocessor position. *)
602+
lexpos
586603

587604
let find_history_event_preceding ~position ({ store; _ } as rwps) =
588605
let lexpos = lexing_postion_of ~position rwps in
@@ -608,10 +625,11 @@ module Make (Config: Cobol_config.T) = struct
608625
let rwps =
609626
try
610627
let event, store = find_history_event_preceding ~position rwps in
611-
let ps, tokens, env = event.event_stage in
628+
let ps, env = event.event_stage in
612629
let pp = ps.preproc.pp in
613-
let pp = pp_rewind ?new_position:(Some event.preproc_position) pp in
630+
let pp = pp_rewind ~new_position:event.preproc_position pp in
614631
let ps = { ps with preproc = { ps.preproc with pp } } in
632+
let ps, tokens = produce_tokens ps in
615633
{ rwps with stage = Trans (ps, tokens, env); store }
616634
with Not_found -> (* rewinding before first checkpoint *)
617635
let pp = pp_rewind rwps.init.preproc.pp in

src/lsp/cobol_parser/parser_engine.mli

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ open Parser_outputs
2929
i.e,} {!Cobol_common.Diagnostics.Set.has_errors} holds). This is in
3030
particular the case when the resulting parse-tree is provided and recovery
3131
is enabled ([options.recovery <> DisableRecovery]), as in such a case, the
32-
parse-tree returned may contain dummy nodes and source locations produced
33-
using the recovery mechanism. *)
32+
parse-tree returned may contain dummy nodes and source locations produced by
33+
the recovery mechanism. *)
3434

3535
(** {1 Basic (one-shot) parsing} *)
3636

@@ -92,8 +92,8 @@ type position =
9292
Lexing.position (** raw lexing position *)
9393
| Indexed of
9494
{
95-
line: int; (** line number (starting at 0) *)
96-
char: int; (** character number in line (starting at 0) *)
95+
line: int; (** line number (starting from 0) *)
96+
char: int; (** character number in line (starting from 0) *)
9797
}
9898

9999
(** [rewind_and_parse rewinder preprocessor_rewind ~position] uses [rewinder] to

src/lsp/cobol_parser/text_lexer.ml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,15 +109,16 @@ module Make (Words: module type of Text_keywords) = struct
109109
let silenced_keywords =
110110
StringSet.of_list Words.silenced_keywords
111111

112-
let reserve_words: Cobol_config.words_spec -> unit =
112+
let reserve_words: Cobol_config.words_spec -> DIAGS.Set.t =
113113
let on_token_handle_of kwd descr ~f =
114-
try f @@ handle_of_keyword kwd with
114+
try f @@ handle_of_keyword kwd; DIAGS.Set.none with
115115
| Not_found when StringSet.mem kwd silenced_keywords ->
116-
() (* Ignore silently? Warn? *)
116+
DIAGS.Set.none (* Ignore silently? Warn? *)
117117
| Not_found ->
118-
Pretty.error "@[Unable@ to@ %s@ keyword:@ %s@]@." descr kwd
118+
DIAGS.Set.error "@[Unable@ to@ %s@ keyword:@ %s@]@." descr kwd
119119
in
120-
List.iter begin fun (w, word_spec) -> match word_spec with
120+
List.fold_left begin fun diags (w, word_spec) ->
121+
DIAGS.Set.union diags @@ match word_spec with
121122
| Cobol_config.ReserveWord { preserve_context_sensitivity } ->
122123
on_token_handle_of w "reserve" ~f:begin fun h ->
123124
if preserve_context_sensitivity
@@ -132,7 +133,7 @@ module Make (Words: module type of Text_keywords) = struct
132133
end
133134
| NotReserved ->
134135
on_token_handle_of w "unreserve" ~f:unreserve_token
135-
end
136+
end DIAGS.Set.none
136137

137138
let enable_tokens tokens =
138139
TokenHandles.iter enable_token tokens

src/lsp/cobol_parser/text_lexer.mli

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ val show_token_of_handle: token_handle -> string
3131
(* --- *)
3232

3333
val handle_of_token: Grammar_tokens.token -> token_handle
34-
val reserve_words: Cobol_config.words_spec -> unit
34+
val reserve_words: Cobol_config.words_spec -> Cobol_common.Diagnostics.Set.t
3535
val enable_tokens: TokenHandles.t -> unit
3636
val disable_tokens: TokenHandles.t -> unit
3737

src/lsp/cobol_parser/text_tokenizer.ml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,12 +303,11 @@ module Make (Config: Cobol_config.T) = struct
303303
let amnesic = Amnesic
304304
let eidetic = Eidetic []
305305
let init memory ~context_sensitive_tokens =
306-
init_text_lexer ~context_sensitive_tokens;
307306
{
308307
expect_picture_string = false;
309308
leftover_tokens = [];
310309
memory;
311-
diags = DIAGS.Set.none;
310+
diags = init_text_lexer ~context_sensitive_tokens;
312311
lexing_options = Text_lexer.default_lexing_options;
313312
}
314313

src/lsp/cobol_preproc/preproc.ml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,9 @@ let srclex_from_channel = make_srclex Lexing.from_channel
101101
let srclex_from_file ~source_format filename : any_srclexer =
102102
srclex_from_string ~source_format ~filename (EzFile.read_file filename)
103103

104-
(** Note: If given, assumes [position] corresponds to the begining of the
105-
input. If absent, restarts from first position. File name is kept from the
106-
previous input. *)
104+
(** Note: If given, assumes [position] corresponds to the beginning of the
105+
input, which {e must} also be at the beginning of a line. If absent,
106+
restarts from first position. File name is kept from the previous input. *)
107107
let srclex_restart make_lexing ?position input (Plx (s, prev_lexbuf)) =
108108
let lexbuf = make_lexing ?with_positions:(Some true) input in
109109
let pos_fname = match position with

src/lsp/cobol_preproc/preproc_engine.ml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,13 @@ let pp_pptokens: pptokens Pretty.printer =
390390
let reset_preprocessor ?new_position pp input =
391391
preprocessor input (`ResetPosition (pp, new_position))
392392

393+
let reset_preprocessor_for_string string ?new_position pp =
394+
let contents = match new_position with
395+
| Some Lexing.{ pos_cnum; _ } -> EzString.after string (pos_cnum - 1)
396+
| None -> string
397+
in (* filename is ignored *)
398+
reset_preprocessor ?new_position pp @@ String { contents; filename = "" }
399+
393400
(* --- *)
394401

395402
let preprocessor ?(options = Preproc_options.default) input =

src/lsp/cobol_preproc/preproc_engine.mli

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ val reset_preprocessor
2727
-> preprocessor
2828
-> input
2929
-> preprocessor
30+
val reset_preprocessor_for_string
31+
: string
32+
-> ?new_position:Lexing.position
33+
-> preprocessor
34+
-> preprocessor
35+
3036

3137
(* --- *)
3238

test/cobol_parsing/dune

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,28 @@
33
(modules test_picture_parsing test_combined_relations_parsing)
44
(libraries alcotest cobol_parser cobol_data))
55

6+
(library
7+
(name parser_testing)
8+
(modules Parser_testing)
9+
(libraries cobol_parser)
10+
)
11+
612
(library
713
(name test_cobol_parser)
8-
(modules cS_tokens decimal_point tokens parser_testing)
14+
(modules cS_tokens decimal_point tokens)
915
(preprocess
1016
(pps ppx_expect))
1117
(inline_tests
1218
(modes best)) ; add js for testing with nodejs
13-
(libraries cobol_parser)
19+
(libraries parser_testing)
20+
)
21+
22+
(library
23+
(name test_cobol_parser_rewind)
24+
(modules test_appending test_appending_large test_cutnpaste_large)
25+
(preprocess
26+
(pps ppx_expect))
27+
(inline_tests
28+
(modes best)) ; add js for testing with nodejs
29+
(libraries parser_testing testsuite_utils)
1430
)

0 commit comments

Comments
 (0)