Skip to content

Commit e166ada

Browse files
authored
Merge pull request #14 from LCSB-BioCore/perf2
performance improvements
2 parents 4eafa2a + eee109e commit e166ada

File tree

18 files changed

+862
-346
lines changed

18 files changed

+862
-346
lines changed

.github/workflows/pr-format.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ name: Formatting
77

88
jobs:
99
formatting:
10-
if: github.event_name == 'pull_request' || (github.event_name == 'issue_comment' && github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER' || github.event.issue.user.id == github.event.comment.user.id) && startsWith(github.event.comment.body, '/format') )
10+
if: github.event_name == 'pull_request' || (github.event_name == 'issue_comment' && github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'OWNER' || github.event.issue.user.id == github.event.comment.user.id) && startsWith(github.event.comment.body, '/format') )
1111
runs-on: ubuntu-latest
1212
steps:
1313
- name: Clone the repository
@@ -57,6 +57,6 @@ jobs:
5757
":x: Auto-formatting triggered by [this comment](${{ github.event.comment.html_url }}) failed, perhaps someone pushed to the PR in the meantime?"
5858
fi
5959
else
60-
then gh pr comment ${{ github.event.issue.number }} --body \
60+
gh pr comment ${{ github.event.issue.number }} --body \
6161
":sunny: Auto-formatting triggered by [this comment](${{ github.event.comment.html_url }}) succeeded, but the code was already formatted correctly."
6262
fi

Project.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
name = "PikaParser"
22
uuid = "3bbf5609-3e7b-44cd-8549-7c69f321e792"
33
authors = ["The developers of PikaParser.jl"]
4-
version = "0.3.0"
4+
version = "0.4.0"
55

66
[deps]
77
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
88
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
99

1010
[compat]
11-
DataStructures = "0.18"
1211
DocStringExtensions = "0.8, 0.9"
1312
julia = "1.6"
1413

README.md

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11

22
# PikaParser.jl
33

4+
| Build status | Documentation |
5+
|:---:|:---:|
6+
| ![CI status](https://github.com/LCSB-BioCore/PikaParser.jl/workflows/CI/badge.svg?branch=master) [![codecov](https://codecov.io/gh/LCSB-BioCore/PikaParser.jl/branch/master/graph/badge.svg?token=A2ui7exGIH)](https://codecov.io/gh/LCSB-BioCore/PikaParser.jl) | [![stable documentation](https://img.shields.io/badge/docs-stable-blue)](https://lcsb-biocore.github.io/PikaParser.jl/stable) [![dev documentation](https://img.shields.io/badge/docs-dev-cyan)](https://lcsb-biocore.github.io/PikaParser.jl/dev) |
7+
48
A simple straightforward implementation of PikaParser in pure Julia, following
59
the specification by Luke A. D. Hutchison (see
610
https://github.com/lukehutch/pikaparser).
@@ -43,21 +47,24 @@ rules = Dict(
4347

4448
g = P.make_grammar(
4549
[:expr], # the top-level rule
46-
P.flatten(rules),
50+
P.flatten(rules, Char), # process the rules into a single level and specialize them for crunching Chars
4751
)
4852
```
4953

5054
The grammar is now prepared for parsing.
5155

5256
### Parsing text
5357

54-
Pika parsers require frequent indexing of the input, Strings thus need to be
55-
converted to character vectors to be usable as parser input. (To improve
56-
performance, it is advisable to lex your input into a vector of more complex
57-
tokens.)
58+
Parsing is executed simply by running your grammar on any indexable input using
59+
`parse`.
60+
61+
(Notably, PikaParsers require frequent indexing of inputs, and incremental
62+
parsing of streams is thus complicated. To improve the performance, it is also
63+
advisable to lex your input into a vector of more complex tokens, using e.g.
64+
`parse_lex`.)
5865

5966
```julia
60-
input = collect("12-(34+567-8)")
67+
input = "12-(34+567-8)"
6168
p = P.parse(g, input)
6269
```
6370

@@ -67,7 +74,7 @@ P.find_match_at!(p, :expr, 1)
6774
```
6875
...which returns an index in the match table (if found), such as `45`.
6976

70-
You can have a look at the match. `p.matches[45]` should return:
77+
You can have a look at the match: `p.matches[45]` should return:
7178
```julia
7279
PikaParser.Match(10, 1, 13, 2, [44])
7380
```
@@ -89,25 +96,25 @@ JuliaFormatter, you will get something like:
8996
```julia
9097
expr(
9198
minusexpr(
92-
expr(digits(digit('1'), digit('2'))),
93-
var"minusexpr-2"('-'),
99+
expr(digits(digit("1"), digit("2"))),
100+
var"minusexpr-2"("-"),
94101
expr(
95102
parens(
96-
var"parens-1"('('),
103+
var"parens-1"("("),
97104
expr(
98105
plusexpr(
99-
expr(digits(digit('3'), digit('4'))),
100-
var"plusexpr-2"('+'),
106+
expr(digits(digit("3"), digit("4"))),
107+
var"plusexpr-2"("+"),
101108
expr(
102109
minusexpr(
103-
expr(digits(digit('5'), digit('6'), digit('7'))),
104-
var"minusexpr-2"('-'),
105-
expr(digits(digit('8'))),
110+
expr(digits(digit("5"), digit("6"), digit("7"))),
111+
var"minusexpr-2"("-"),
112+
expr(digits(digit("8"))),
106113
),
107114
),
108115
),
109116
),
110-
var"parens-3"(')'),
117+
var"parens-3"(")"),
111118
),
112119
),
113120
),
@@ -120,7 +127,7 @@ evaluate the expression as follows:
120127
```julia
121128
P.traverse_match(p, P.find_match_at!(p, :expr, 1),
122129
fold = (m, p, subvals) ->
123-
m.rule == :digits ? parse(Int, String(m.view)) :
130+
m.rule == :digits ? parse(Int, m.view) :
124131
m.rule == :expr ? subvals[1] :
125132
m.rule == :parens ? subvals[2] :
126133
m.rule == :plusexpr ? subvals[1] + subvals[3] :

docs/src/json.jl

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
import PikaParser as P
1919

2020
rules = Dict(
21-
:t => P.tokens(collect("true")),
22-
:f => P.tokens(collect("false")),
23-
:null => P.tokens(collect("null")),
21+
:t => P.tokens("true"),
22+
:f => P.tokens("false"),
23+
:null => P.tokens("null"),
2424
:digit => P.satisfy(isdigit),
2525
:number => P.seq(
2626
P.first(P.token('-'), P.epsilon),
@@ -49,12 +49,12 @@ folds = Dict(
4949
:t => (v, s) -> true,
5050
:f => (v, s) -> false,
5151
:null => (v, s) -> nothing,
52-
:number => (v, s) -> parse(Float64, String(v)),
52+
:number => (v, s) -> parse(Float64, v),
5353
:quote => (v, s) -> v[1],
5454
:esc => (v, s) -> v[1],
5555
:escaped => (v, s) -> s[2],
5656
:notescaped => (v, s) -> v[1],
57-
:string => (v, s) -> String(Vector{Char}(s[2])),
57+
:string => (v, s) -> String(Char.(s[2])),
5858
:instrings => (v, s) -> s,
5959
:array => (v, s) -> s[2],
6060
:inarray => (v, s) -> s,
@@ -63,16 +63,14 @@ folds = Dict(
6363
:pair => (v, s) -> (s[1] => s[3]),
6464
:sepobj => (v, s) -> s[2],
6565
:inobj => (v, s) -> s,
66-
)
66+
);
6767

6868
default_fold(v, subvals) = isempty(subvals) ? nothing : subvals[1]
6969

70-
g = P.make_grammar([:json], P.flatten(rules));
70+
g = P.make_grammar([:json], P.flatten(rules, Char));
7171

7272
# Let's parse a simple JSONish string that demonstrates most of the rules:
73-
input = collect(
74-
"""{"something":123,"other":false,"refs":[1,-2.345,[],{},true,false,null,[1,2,3,"haha"],{"is\\"Finished\\"":true}]}""",
75-
);
73+
input = """{"something":123,"other":false,"refs":[1,-2.345,[],{},true,false,null,[1,2,3,"haha"],{"is\\"Finished\\"":true}]}""";
7674

7775
p = P.parse(g, input);
7876

docs/src/scheme.jl

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@ rules = Dict(
4242
# Let's test the grammar on a piece of source code that contains lots of
4343
# whitespace and some errors.
4444

45-
p = P.parse(P.make_grammar([:top], P.flatten(rules)), collect("""
45+
p = P.parse(
46+
P.make_grammar([:top], P.flatten(rules, Char)),
47+
"""
4648
(plus 1 2 3)
4749
(minus 1 2(plus 3 2) ) woohoo extra parenthesis here )
4850
(complex
@@ -54,40 +56,37 @@ p = P.parse(P.make_grammar([:top], P.flatten(rules)), collect("""
5456
valid)
5557
(straight (out (missing(parenthesis error))
5658
(apply (make-function) (make-data))
57-
"""));
58-
59-
# To traverse the input, we'll try to find the `top` matches. If the `top`
60-
# match cannot be found, we will try to match at least something and report it.
61-
# The memo table is conveniently ordered by match position.
62-
63-
top_matches =
64-
[(key.pos, mid) for (key, mid) = p.memo if p.grammar.names[key.clause] == :top]
59+
""",
60+
);
6561

6662
# Prepare a folding function:
6763

6864
fold_scheme(m, p, s) =
69-
m.rule == :number ? parse(Int, String(m.view)) :
70-
m.rule == :ident ? Symbol(String(m.view)) :
65+
m.rule == :number ? parse(Int, m.view) :
66+
m.rule == :ident ? Symbol(m.view) :
7167
m.rule == :insexpr ? Expr(:call, :S, s...) :
7268
m.rule == :sexpr ? s[2] : m.rule == :top ? s[2] : length(s) > 0 ? s[1] : nothing;
7369

7470
# We can run through all `top` matches, tracking the position where we would
7571
# expect the next match:
7672

7773
next_pos = 1
78-
for (pos, mid) in top_matches
74+
while next_pos <= lastindex(p.input)
7975
global next_pos
80-
m = p.matches[mid]
81-
if pos < next_pos # this match is a part of another that was already processed
82-
continue
83-
end
84-
if pos > next_pos # something was not parsed!
85-
@warn "Could not parse input, skipping!" unrecognized =
86-
String(p.input[next_pos:m.pos-1])
76+
pos = next_pos
77+
mid = 0
78+
while pos <= lastindex(p.input) # try to find a match
79+
mid = P.find_match_at!(p, :top, pos)
80+
mid != 0 && break
81+
pos += 1
8782
end
83+
pos > next_pos && # if we skipped something, report it
84+
@error "Got parsing problems" p.input[next_pos:prevind(p.input, pos)]
85+
mid == 0 && break # in case we have found a match, print its AST
8886
value = P.traverse_match(p, mid, fold = fold_scheme)
8987
@info "Got a command" value
90-
next_pos = m.pos + m.len # skip behind the match
88+
m = p.matches[mid] # skip the whole match and continue
89+
next_pos = m.pos + m.len
9190
end
9291

9392
# We can see that the unparseable parts of input were correctly skipped, while

src/PikaParser.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@ $(README)
33
"""
44
module PikaParser
55

6-
using DataStructures
76
using DocStringExtensions
87

98
include("structs.jl")
109
include("clauses.jl")
1110
include("frontend.jl")
1211
include("grammar.jl")
12+
include("memo.jl")
1313
include("parse.jl")
14+
include("q.jl")
1415
include("traverse.jl")
1516

1617
end # module

0 commit comments

Comments
 (0)