LCSB-BioCore
diff --git a/‎.github/workflows/pr-format.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/pr-format.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Project.toml‎
Lines changed: 1 addition & 2 deletions b/‎Project.toml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎README.md‎
Lines changed: 24 additions & 17 deletions b/‎README.md‎
Lines changed: 24 additions & 17 deletions
diff --git a/‎docs/src/json.jl‎
Lines changed: 8 additions & 10 deletions b/‎docs/src/json.jl‎
Lines changed: 8 additions & 10 deletions
diff --git a/‎docs/src/scheme.jl‎
Lines changed: 19 additions & 20 deletions b/‎docs/src/scheme.jl‎
Lines changed: 19 additions & 20 deletions
diff --git a/‎src/PikaParser.jl‎
Lines changed: 2 additions & 1 deletion b/‎src/PikaParser.jl‎
Lines changed: 2 additions & 1 deletion
@@ -7,7 +7,7 @@ name: Formatting
 
 jobs:
   formatting:
-    if: github.event_name == 'pull_request' || (github.event_name == 'issue_comment' && github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER' || github.event.issue.user.id == github.event.comment.user.id) && startsWith(github.event.comment.body, '/format') )
+    if: github.event_name == 'pull_request' || (github.event_name == 'issue_comment' && github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'OWNER' || github.event.issue.user.id == github.event.comment.user.id) && startsWith(github.event.comment.body, '/format') )
     runs-on: ubuntu-latest
     steps:
       - name: Clone the repository
@@ -57,6 +57,6 @@ jobs:
               ":x: Auto-formatting triggered by [this comment](${{ github.event.comment.html_url }}) failed, perhaps someone pushed to the PR in the meantime?"
             fi
           else
-            then gh pr comment ${{ github.event.issue.number }} --body \
+            gh pr comment ${{ github.event.issue.number }} --body \
               ":sunny: Auto-formatting triggered by [this comment](${{ github.event.comment.html_url }}) succeeded, but the code was already formatted correctly."
           fi
@@ -1,14 +1,13 @@
 name = "PikaParser"
 uuid = "3bbf5609-3e7b-44cd-8549-7c69f321e792"
 authors = ["The developers of PikaParser.jl"]
-version = "0.3.0"
+version = "0.4.0"
 
 [deps]
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 
 [compat]
-DataStructures = "0.18"
 DocStringExtensions = "0.8, 0.9"
 julia = "1.6"
 
 
@@ -1,6 +1,10 @@
 
 # PikaParser.jl
 
+| Build status | Documentation |
+|:---:|:---:|
+| ![CI status](https://github.com/LCSB-BioCore/PikaParser.jl/workflows/CI/badge.svg?branch=master) [![codecov](https://codecov.io/gh/LCSB-BioCore/PikaParser.jl/branch/master/graph/badge.svg?token=A2ui7exGIH)](https://codecov.io/gh/LCSB-BioCore/PikaParser.jl) | [![stable documentation](https://img.shields.io/badge/docs-stable-blue)](https://lcsb-biocore.github.io/PikaParser.jl/stable) [![dev documentation](https://img.shields.io/badge/docs-dev-cyan)](https://lcsb-biocore.github.io/PikaParser.jl/dev) |
+
 A simple straightforward implementation of PikaParser in pure Julia, following
 the specification by Luke A. D. Hutchison (see
 https://github.com/lukehutch/pikaparser).
@@ -43,21 +47,24 @@ rules = Dict(
 
 g = P.make_grammar(
     [:expr], # the top-level rule
-    P.flatten(rules),
+    P.flatten(rules, Char), # process the rules into a single level and specialize them for crunching Chars
 )
 ```
 
 The grammar is now prepared for parsing.
 
 ### Parsing text
 
-Pika parsers require frequent indexing of the input, Strings thus need to be
-converted to character vectors to be usable as parser input. (To improve
-performance, it is advisable to lex your input into a vector of more complex
-tokens.)
+Parsing is executed simply by running your grammar on any indexable input using
+`parse`.
+
+(Notably, PikaParsers require frequent indexing of inputs, and incremental
+parsing of streams is thus complicated. To improve the performance, it is also
+advisable to lex your input into a vector of more complex tokens, using e.g.
+`parse_lex`.)
 
 ```julia
-input = collect("12-(34+567-8)")
+input = "12-(34+567-8)"
 p = P.parse(g, input)
 ```
 
@@ -67,7 +74,7 @@ P.find_match_at!(p, :expr, 1)
 ```
 ...which returns an index in the match table (if found), such as `45`.
 
-You can have a look at the match. `p.matches[45]` should return:
+You can have a look at the match: `p.matches[45]` should return:
 ```julia
 PikaParser.Match(10, 1, 13, 2, [44])
 ```
@@ -89,25 +96,25 @@ JuliaFormatter, you will get something like:
 ```julia
 expr(
     minusexpr(
-        expr(digits(digit('1'), digit('2'))),
-        var"minusexpr-2"('-'),
+        expr(digits(digit("1"), digit("2"))),
+        var"minusexpr-2"("-"),
         expr(
             parens(
-                var"parens-1"('('),
+                var"parens-1"("("),
                 expr(
                     plusexpr(
-                        expr(digits(digit('3'), digit('4'))),
-                        var"plusexpr-2"('+'),
+                        expr(digits(digit("3"), digit("4"))),
+                        var"plusexpr-2"("+"),
                         expr(
                             minusexpr(
-                                expr(digits(digit('5'), digit('6'), digit('7'))),
-                                var"minusexpr-2"('-'),
-                                expr(digits(digit('8'))),
+                                expr(digits(digit("5"), digit("6"), digit("7"))),
+                                var"minusexpr-2"("-"),
+                                expr(digits(digit("8"))),
                             ),
                         ),
                     ),
                 ),
-                var"parens-3"(')'),
+                var"parens-3"(")"),
             ),
         ),
     ),
@@ -120,7 +127,7 @@ evaluate the expression as follows:
 ```julia
 P.traverse_match(p, P.find_match_at!(p, :expr, 1),
     fold = (m, p, subvals) ->
-        m.rule == :digits ? parse(Int, String(m.view)) :
+        m.rule == :digits ? parse(Int, m.view) :
         m.rule == :expr ? subvals[1] :
         m.rule == :parens ? subvals[2] :
         m.rule == :plusexpr ? subvals[1] + subvals[3] :
 
@@ -18,9 +18,9 @@
 import PikaParser as P
 
 rules = Dict(
-    :t => P.tokens(collect("true")),
-    :f => P.tokens(collect("false")),
-    :null => P.tokens(collect("null")),
+    :t => P.tokens("true"),
+    :f => P.tokens("false"),
+    :null => P.tokens("null"),
     :digit => P.satisfy(isdigit),
     :number => P.seq(
         P.first(P.token('-'), P.epsilon),
@@ -49,12 +49,12 @@ folds = Dict(
     :t => (v, s) -> true,
     :f => (v, s) -> false,
     :null => (v, s) -> nothing,
-    :number => (v, s) -> parse(Float64, String(v)),
+    :number => (v, s) -> parse(Float64, v),
     :quote => (v, s) -> v[1],
     :esc => (v, s) -> v[1],
     :escaped => (v, s) -> s[2],
     :notescaped => (v, s) -> v[1],
-    :string => (v, s) -> String(Vector{Char}(s[2])),
+    :string => (v, s) -> String(Char.(s[2])),
     :instrings => (v, s) -> s,
     :array => (v, s) -> s[2],
     :inarray => (v, s) -> s,
@@ -63,16 +63,14 @@ folds = Dict(
     :pair => (v, s) -> (s[1] => s[3]),
     :sepobj => (v, s) -> s[2],
     :inobj => (v, s) -> s,
-)
+);
 
 default_fold(v, subvals) = isempty(subvals) ? nothing : subvals[1]
 
-g = P.make_grammar([:json], P.flatten(rules));
+g = P.make_grammar([:json], P.flatten(rules, Char));
 
 # Let's parse a simple JSONish string that demonstrates most of the rules:
-input = collect(
-    """{"something":123,"other":false,"refs":[1,-2.345,[],{},true,false,null,[1,2,3,"haha"],{"is\\"Finished\\"":true}]}""",
-);
+input = """{"something":123,"other":false,"refs":[1,-2.345,[],{},true,false,null,[1,2,3,"haha"],{"is\\"Finished\\"":true}]}""";
 
 p = P.parse(g, input);
 
 
@@ -42,7 +42,9 @@ rules = Dict(
 # Let's test the grammar on a piece of source code that contains lots of
 # whitespace and some errors.
 
-p = P.parse(P.make_grammar([:top], P.flatten(rules)), collect("""
+p = P.parse(
+    P.make_grammar([:top], P.flatten(rules, Char)),
+    """
 (plus 1 2 3)
 (minus 1 2(plus 3 2)  ) woohoo extra parenthesis here )
 (complex
@@ -54,40 +56,37 @@ p = P.parse(P.make_grammar([:top], P.flatten(rules)), collect("""
   valid)
 (straight (out (missing(parenthesis error))
 (apply (make-function) (make-data))
-"""));
-
-# To traverse the input, we'll try to find the `top` matches. If the `top`
-# match cannot be found, we will try to match at least something and report it.
-# The memo table is conveniently ordered by match position.
-
-top_matches =
-    [(key.pos, mid) for (key, mid) = p.memo if p.grammar.names[key.clause] == :top]
+""",
+);
 
 # Prepare a folding function:
 
 fold_scheme(m, p, s) =
-    m.rule == :number ? parse(Int, String(m.view)) :
-    m.rule == :ident ? Symbol(String(m.view)) :
+    m.rule == :number ? parse(Int, m.view) :
+    m.rule == :ident ? Symbol(m.view) :
     m.rule == :insexpr ? Expr(:call, :S, s...) :
     m.rule == :sexpr ? s[2] : m.rule == :top ? s[2] : length(s) > 0 ? s[1] : nothing;
 
 # We can run through all `top` matches, tracking the position where we would
 # expect the next match:
 
 next_pos = 1
-for (pos, mid) in top_matches
+while next_pos <= lastindex(p.input)
     global next_pos
-    m = p.matches[mid]
-    if pos < next_pos # this match is a part of another that was already processed
-        continue
-    end
-    if pos > next_pos # something was not parsed!
-        @warn "Could not parse input, skipping!" unrecognized =
-            String(p.input[next_pos:m.pos-1])
+    pos = next_pos
+    mid = 0
+    while pos <= lastindex(p.input) # try to find a match
+        mid = P.find_match_at!(p, :top, pos)
+        mid != 0 && break
+        pos += 1
     end
+    pos > next_pos && # if we skipped something, report it
+        @error "Got parsing problems" p.input[next_pos:prevind(p.input, pos)]
+    mid == 0 && break # in case we have found a match, print its AST
     value = P.traverse_match(p, mid, fold = fold_scheme)
     @info "Got a command" value
-    next_pos = m.pos + m.len # skip behind the match
+    m = p.matches[mid] # skip the whole match and continue
+    next_pos = m.pos + m.len
 end
 
 # We can see that the unparseable parts of input were correctly skipped, while
 
@@ -3,14 +3,15 @@ $(README)
 """
 module PikaParser
 
-using DataStructures
 using DocStringExtensions
 
 include("structs.jl")
 include("clauses.jl")
 include("frontend.jl")
 include("grammar.jl")
+include("memo.jl")
 include("parse.jl")
+include("q.jl")
 include("traverse.jl")
 
 end # module