Skip to content

Commit 8a1940f

Browse files
authored
Merge pull request #18 from LCSB-BioCore/mk-stricter-checking
enforce the grammar checking for ε-matches in invalid positions
2 parents c0e75c4 + 10671af commit 8a1940f

File tree

5 files changed

+47
-13
lines changed

5 files changed

+47
-13
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "PikaParser"
22
uuid = "3bbf5609-3e7b-44cd-8549-7c69f321e792"
33
authors = ["The developers of PikaParser.jl"]
4-
version = "0.5.2"
4+
version = "0.6.0"
55

66
[deps]
77
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"

src/clauses.jl

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,18 @@ can_match_epsilon(x::Union{Epsilon,EndOfInput}, ::Vector{Bool}) = true
102102
can_match_epsilon(x::Seq, ch::Vector{Bool}) = all(ch)
103103
can_match_epsilon(x::First, ch::Vector{Bool}) =
104104
isempty(ch) ? false :
105-
any(ch[begin:end-1]) ? error("First with non-terminal epsilon match") : last(ch)
105+
any(ch[begin:end-1]) ?
106+
error(
107+
"Subclauses of `First` must never allow an epsilon match other than the last one.",
108+
) : last(ch)
106109
can_match_epsilon(x::NotFollowedBy, ch::Vector{Bool}) =
107-
ch[1] ? error("NotFollowedBy epsilon match") : true
110+
ch[1] ? error("Contents of `NotFollowedBy` clause must never allow an epsilon match.") :
111+
true
108112
can_match_epsilon(x::FollowedBy, ch::Vector{Bool}) = ch[1]
109-
can_match_epsilon(x::Some, ch::Vector{Bool}) = ch[1]
110-
can_match_epsilon(x::Many, ch::Vector{Bool}) = true
113+
can_match_epsilon(x::Some, ch::Vector{Bool}) =
114+
ch[1] ? error("Contents of `Some` clause must never allow an epsilon match.") : false
115+
can_match_epsilon(x::Many, ch::Vector{Bool}) =
116+
ch[1] ? error("Contents of `Many` clause must never allow an epsilon match.") : true
111117
can_match_epsilon(x::Tie, ch::Vector{Bool}) = ch[1]
112118

113119

src/grammar.jl

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,14 @@ function make_grammar(
7272

7373
while !isempty(q)
7474
cur = pop!(q)
75-
emptiable[cur] && continue
76-
emptiable[cur] =
75+
# Here we could skip the check if emptiable[cur] is already True, but
76+
# redoing it actually allows the `can_match_epsilon` implementations to
77+
# fail in case the grammar is somehow invalid.
78+
cur_is_emptiable =
7779
can_match_epsilon(clauses[cur], emptiable[child_clauses(clauses[cur])])
78-
if emptiable[cur]
79-
# there was a flip!
80+
if !emptiable[cur] && cur_is_emptiable
81+
# There was a flip, force rechecking.
82+
emptiable[cur] = cur_is_emptiable
8083
for pid in parent_clauses[cur]
8184
push!(q, pid)
8285
end

src/parse.jl

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,15 @@ function lookup_best_match_id!(
2121
return match_epsilon!(cls, clause, pos, st)
2222
elseif cls isa Many{Int,T}
2323
return match_epsilon!(cls, clause, pos, st)
24-
else
25-
# This is reached rarely in corner cases
24+
elseif cls isa Seq{Int,T}
25+
return match_epsilon!(cls, clause, pos, st)
26+
elseif cls isa First{Int,T}
27+
return match_epsilon!(cls, clause, pos, st)
28+
elseif cls isa Tie{Int,T}
2629
return match_epsilon!(cls, clause, pos, st)
30+
else
31+
# This should not be reached.
32+
return match_epsilon!(cls, clause, pos, st) # COV_EXCL_LINE
2733
end
2834
end
2935

@@ -48,6 +54,7 @@ function new_match!(match::Match, st::ParserState)
4854
end
4955

5056
for seed in st.grammar.seed_clauses[match.clause]
57+
# TODO: can we remove the extra epsilon condition in the next line?
5158
if updated || st.grammar.can_match_epsilon[seed]
5259
push!(st.q, seed)
5360
end
@@ -189,8 +196,10 @@ function parse(
189196
elseif cls isa Tie{Int,T}
190197
match_clause!(cls, clause, i, st)
191198
else
192-
# Fallback (execution shouldn't reach here)
193-
match_clause!(cls, clause, i, st)
199+
# Fallback (execution shouldn't reach here). You might want to
200+
# add a warning statement here if you added new clauses and
201+
# hunt for performance issues.
202+
match_clause!(cls, clause, i, st) # COV_EXCL_LINE
194203
end
195204
# Shame ends here.
196205
end

test/clauses.jl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,4 +114,20 @@ end
114114
@test P.find_match_at!(p, :x, firstindex(str)) == 0
115115
@test P.find_match_at!(p, :x, lastindex(str)) == 0
116116
@test P.find_match_at!(p, :x, nextind(str, lastindex(str))) != 0
117+
118+
rules = Dict(:x => P.seq(P.epsilon, :y => P.first(P.token('a'), P.epsilon)))
119+
p = P.parse(P.make_grammar([:x, :y], P.flatten(rules, Char)), "b")
120+
121+
@test P.find_match_at!(p, :x, firstindex(p.input)) != 0
122+
@test P.find_match_at!(p, :y, firstindex(p.input)) != 0
123+
end
124+
125+
@testset "Invalid epsilon matches are avoided" begin
126+
rules = Dict(:x => P.not_followed_by(P.epsilon))
127+
@test_throws ErrorException P.make_grammar([:x], P.flatten(rules, Char))
128+
129+
# thanks go to @CptWesley for bringing this up in
130+
# https://github.com/lukehutch/pikaparser/issues/35
131+
rules = Dict(:x => P.first(P.token('a'), P.epsilon), :y => P.first(P.seq(:y, :x), :x))
132+
@test_throws ErrorException P.make_grammar([:y], P.flatten(rules, Char))
117133
end

0 commit comments

Comments
 (0)