Skip to content

Commit 639921f

Browse files
committed
add the ability to match EOF
1 parent 6360390 commit 639921f

File tree

7 files changed

+50
-15
lines changed

7 files changed

+50
-15
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "PikaParser"
22
uuid = "3bbf5609-3e7b-44cd-8549-7c69f321e792"
33
authors = ["The developers of PikaParser.jl"]
4-
version = "0.4.0"
4+
version = "0.5.0"
55

66
[deps]
77
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"

README.md

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,13 @@ P.find_match_at!(p, :expr, 1)
7474
```
7575
...which returns an index in the match table (if found), such as `45`.
7676

77-
You can have a look at the match: `p.matches[45]` should return:
78-
```julia
79-
PikaParser.Match(10, 1, 13, 2, [44])
80-
```
81-
where `10` is the renumbered rule ID for `:expr`, `1` is the starting position
82-
in the input, `13` is the length of the match (here, that is the whole input);
83-
`2` is the option index (in this case, it points to `:expr` option 2, which is
84-
`:minusexpr`), and 44 is the submatch of `:minusexpr`.
77+
You can have a look at the match: `p.matches[45]` should return: ```julia
78+
PikaParser.Match(10, 1, 13, 2, 52, 0, 41, 0) ``` where `10` is the renumbered
79+
rule ID for `:expr`, `1` is the starting position of the match in the input,
80+
`13` is the last position of the match (here, that means the whole input); `2`
81+
is the option index (in this case, it points to `:expr` option 2, which is
82+
`:minusexpr`). The rest of the `Match` structure is used for internal values
83+
that organize the match tree and submatches.
8584

8685
### Recovering parsed ASTs
8786

src/clauses.jl

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ rechildren(x::Scan, t::DataType, v::Vector) = Scan{valtype(v),t}(x.match)
4646
rechildren(x::Token, t::DataType, v::Vector) = Token{valtype(v),t}(x.token)
4747
rechildren(x::Tokens, t::DataType, v::Vector) =
4848
Tokens{valtype(v),t,typeof(x.tokens)}(x.tokens)
49+
rechildren(x::EndOfInput, t::DataType, v::Vector) = EndOfInput{valtype(v),t}()
4950
rechildren(x::Epsilon, t::DataType, v::Vector) = Epsilon{valtype(v),t}()
5051
rechildren(x::Fail, t::DataType, v::Vector) = Fail{valtype(v),t}()
5152
rechildren(x::Seq, t::DataType, v::Vector) = Seq{valtype(v),t}(v)
@@ -97,7 +98,7 @@ better_match_than(::Clause, new::Match, old::Match) = new.last > old.last
9798

9899

99100
can_match_epsilon(x::Union{Satisfy,Scan,Token,Tokens,Fail}, ::Vector{Bool}) = false
100-
can_match_epsilon(x::Epsilon, ::Vector{Bool}) = true
101+
can_match_epsilon(x::Union{Epsilon,EndOfInput}, ::Vector{Bool}) = true
101102
can_match_epsilon(x::Seq, ch::Vector{Bool}) = all(ch)
102103
can_match_epsilon(x::First, ch::Vector{Bool}) =
103104
isempty(ch) ? false :
@@ -236,16 +237,21 @@ end
236237
# Epsilon matches
237238
#
238239

239-
match_epsilon!(x::Clause, id::Int, pos::Int, st::ParserState) =
240+
match_epsilon!(x::Clause, id::Int, pos::Int, st::ParserState)::MatchResult =
240241
new_match!(Match(id, pos, prevind(st.input, pos), 0, submatch_empty(st)), st)
241242

242-
function match_epsilon!(x::FollowedBy, id::Int, pos::Int, st::ParserState)
243+
function match_epsilon!(x::EndOfInput, id::Int, pos::Int, st::ParserState)::MatchResult
244+
pos <= lastindex(st.input) ? 0 :
245+
new_match!(Match(id, pos, prevind(st.input, pos), 0, submatch_empty(st)), st)
246+
end
247+
248+
function match_epsilon!(x::FollowedBy, id::Int, pos::Int, st::ParserState)::MatchResult
243249
mid = lookup_best_match_id!(pos, x.follow, st)
244250
mid == 0 ? 0 :
245251
new_match!(Match(id, pos, prevind(st.input, pos), 1, submatch_record!(st, mid)), st)
246252
end
247253

248-
function match_epsilon!(x::NotFollowedBy, id::Int, pos::Int, st::ParserState)
254+
function match_epsilon!(x::NotFollowedBy, id::Int, pos::Int, st::ParserState)::MatchResult
249255
# This might technically cause infinite recursion, byt a cycle of
250256
# NotFollowedBy clauses is disallowed by the error thrown by
251257
# can_match_epsilon(::NotFollowedBy, ...)

src/frontend.jl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,17 @@ function tokens(xs::I) where {I}
4848
Tokens{Any,eltype(I),I}(xs)
4949
end
5050

51+
"""
52+
end_of_input :: Clause
53+
54+
An [`EndOfInput`](@ref) clause. Translate to strongly typed grammar with [`flatten`](@ref).
55+
56+
# Example
57+
58+
whole_file = first(:file_contents, end_of_input)
59+
"""
60+
const end_of_input = EndOfInput{Any,Any}()
61+
5162
"""
5263
epsilon :: Clause
5364

src/parse.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ function lookup_best_match_id!(
1515
return match_epsilon!(cls, clause, pos, st)
1616
elseif cls isa NotFollowedBy{Int,T}
1717
return match_epsilon!(cls, clause, pos, st)
18+
elseif cls isa EndOfInput{Int,T}
19+
return match_epsilon!(cls, clause, pos, st)
1820
elseif cls isa Epsilon{Int,T}
1921
return match_epsilon!(cls, clause, pos, st)
2022
elseif cls isa Many{Int,T}

src/structs.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Currently implemented clauses:
1616
- [`Tokens`](@ref)
1717
- [`Epsilon`](@ref)
1818
- [`Fail`](@ref)
19+
- [`EndOfInput`](@ref)
1920
- [`Seq`](@ref)
2021
- [`First`](@ref)
2122
- [`NotFollowedBy`](@ref)
@@ -87,6 +88,13 @@ end
8788
"""
8889
$(TYPEDEF)
8990
91+
Matches at the end of the input.
92+
"""
93+
struct EndOfInput{G,T} <: Clause{G,T} end
94+
95+
"""
96+
$(TYPEDEF)
97+
9098
An always-succeeding epsilon match.
9199
"""
92100
struct Epsilon{G,T} <: Clause{G,T} end

test/clauses.jl

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,11 @@ end
9191
end
9292

9393
@testset "Corner-case epsilon matches" begin
94+
str = "whateveρ"
95+
9496
rules = Dict(:x => P.followed_by(P.epsilon))
9597

96-
p = P.parse(P.make_grammar([:x], P.flatten(rules, Char)), "whatever")
98+
p = P.parse(P.make_grammar([:x], P.flatten(rules, Char)), str)
9799

98100
@test P.find_match_at!(p, :x, 1) != 0
99101
@test P.find_match_at!(p, :x, 8) != 0
@@ -102,7 +104,14 @@ end
102104

103105
# tie epsilon match
104106
rules = Dict(:x => P.tie(P.epsilon))
105-
p = P.parse(P.make_grammar([:x], P.flatten(rules, Char)), "whatever")
107+
p = P.parse(P.make_grammar([:x], P.flatten(rules, Char)), str)
106108

107109
@test P.traverse_match(p, P.find_match_at!(p, :x, 1)) == :(x())
110+
111+
rules = Dict(:x => P.end_of_input)
112+
p = P.parse(P.make_grammar([:x], P.flatten(rules, Char)), str)
113+
114+
@test P.find_match_at!(p, :x, firstindex(str)) == 0
115+
@test P.find_match_at!(p, :x, lastindex(str)) == 0
116+
@test P.find_match_at!(p, :x, nextind(str, lastindex(str))) != 0
108117
end

0 commit comments

Comments
 (0)