forked from mna/pigeon
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
301 lines (264 loc) · 8.43 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
package main
import (
"bufio"
"bytes"
"errors"
"flag"
"fmt"
"io"
"os"
"strconv"
"strings"
"golang.org/x/tools/imports"
"github.com/mna/pigeon/ast"
"github.com/mna/pigeon/builder"
)
// exit function mockable for tests
var exit = os.Exit
// ruleNamesFlag is a custom flag that parses a comma-separated
// list of rule names. It implements flag.Value.
type ruleNamesFlag []string
func (r *ruleNamesFlag) String() string {
return fmt.Sprint(*r)
}
func (r *ruleNamesFlag) Set(value string) error {
names := strings.Split(value, ",")
*r = append(*r, names...)
return nil
}
func main() {
fs := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
// define command-line flags
var (
cacheFlag = fs.Bool("cache", false, "cache parsing results")
dbgFlag = fs.Bool("debug", false, "set debug mode")
shortHelpFlag = fs.Bool("h", false, "show help page")
longHelpFlag = fs.Bool("help", false, "show help page")
nolint = fs.Bool("nolint", false, "add '// nolint: ...' comments to suppress warnings by gometalinter or golangci-lint")
noRecoverFlag = fs.Bool("no-recover", false, "do not recover from panic")
outputFlag = fs.String("o", "", "output file, defaults to stdout")
optimizeBasicLatinFlag = fs.Bool("optimize-basic-latin", false, "generate optimized parser for Unicode Basic Latin character sets")
optimizeGrammar = fs.Bool("optimize-grammar", false, "optimize the given grammar (EXPERIMENTAL FEATURE)")
optimizeParserFlag = fs.Bool("optimize-parser", false, "generate optimized parser without Debug and Memoize options")
recvrNmFlag = fs.String("receiver-name", "c", "receiver name for the generated methods")
noBuildFlag = fs.Bool("x", false, "do not build, only parse")
supportLeftRecursion = fs.Bool("support-left-recursion", false, "add support left recursion (EXPERIMENTAL FEATURE)")
altEntrypointsFlag ruleNamesFlag
)
fs.Var(&altEntrypointsFlag, "alternate-entrypoints", "comma-separated list of rule names that may be used as entrypoints")
fs.Usage = usage
err := fs.Parse(os.Args[1:])
if err != nil {
fmt.Fprintln(os.Stderr, "args parse error:\n", err)
exit(6)
}
if *shortHelpFlag || *longHelpFlag {
fs.Usage()
exit(0)
}
if fs.NArg() > 1 {
argError(1, "expected one argument, got %q", strings.Join(fs.Args(), " "))
}
// get input source
infile := ""
if fs.NArg() == 1 {
infile = fs.Arg(0)
}
nm, rc := input(infile)
defer func() {
err = rc.Close()
if err != nil {
fmt.Fprintln(os.Stderr, "close file error:\n", err)
}
if r := recover(); r != nil {
panic(r)
}
if err != nil {
exit(7)
}
}()
// parse input
g, err := ParseReader(nm, rc, Debug(*dbgFlag), Memoize(*cacheFlag), Recover(!*noRecoverFlag))
if err != nil {
fmt.Fprintln(os.Stderr, "parse error(s):\n", err)
exit(3)
}
// validate alternate entrypoints
grammar := g.(*ast.Grammar)
rules := make(map[string]struct{}, len(grammar.Rules))
for _, rule := range grammar.Rules {
rules[rule.Name.Val] = struct{}{}
}
for _, entrypoint := range altEntrypointsFlag {
if entrypoint == "" {
continue
}
if _, ok := rules[entrypoint]; !ok {
fmt.Fprintf(os.Stderr, "argument error:\nunknown rule name %s used as alternate entrypoint\n", entrypoint)
exit(9)
}
}
if !*noBuildFlag {
if *optimizeGrammar {
ast.Optimize(grammar, altEntrypointsFlag...)
}
// generate parser
out := output(*outputFlag)
defer func() {
err := out.Close()
if err != nil {
fmt.Fprintln(os.Stderr, "close file error:\n", err)
exit(8)
}
}()
outBuf := bytes.NewBuffer([]byte{})
curNmOpt := builder.ReceiverName(*recvrNmFlag)
optimizeParser := builder.Optimize(*optimizeParserFlag)
basicLatinOptimize := builder.BasicLatinLookupTable(*optimizeBasicLatinFlag)
nolintOpt := builder.Nolint(*nolint)
leftRecursionSupporter := builder.SupportLeftRecursion(*supportLeftRecursion)
if err := builder.BuildParser(
outBuf, grammar, curNmOpt, optimizeParser, basicLatinOptimize,
nolintOpt, leftRecursionSupporter); err != nil {
fmt.Fprintln(os.Stderr, "build error: ", err)
exit(5)
}
// Defaults from golang.org/x/tools/cmd/goimports
options := &imports.Options{
TabWidth: 8,
TabIndent: true,
Comments: true,
Fragment: true,
}
formattedBuf, err := imports.Process("filename", outBuf.Bytes(), options)
if err != nil {
if _, err := out.Write(outBuf.Bytes()); err != nil {
fmt.Fprintln(os.Stderr, "write error: ", err)
exit(7)
}
fmt.Fprintln(os.Stderr, "format error: ", err)
exit(6)
}
if _, err := out.Write(formattedBuf); err != nil {
fmt.Fprintln(os.Stderr, "write error: ", err)
exit(7)
}
}
}
var usagePage = `usage: %s [options] [GRAMMAR_FILE]
Pigeon generates a parser based on a PEG grammar.
By default, pigeon reads the grammar from stdin and writes the
generated parser to stdout. If GRAMMAR_FILE is specified, the
grammar is read from this file instead. If the -o flag is set,
the generated code is written to this file instead.
-cache
cache parser results to avoid exponential parsing time in
pathological cases. Can make the parsing slower for typical
cases and uses more memory.
-debug
output debugging information while parsing the grammar.
-h -help
display this help message.
-nolint
add '// nolint: ...' comments for generated parser to suppress
warnings by gometalinter (https://github.com/alecthomas/gometalinter) or
golangci-lint (https://golangci-lint.run/).
-no-recover
do not recover from a panic. Useful to access the panic stack
when debugging, otherwise the panic is converted to an error.
-o OUTPUT_FILE
write the generated parser to OUTPUT_FILE. Defaults to stdout.
-optimize-basic-latin
generate optimized parser for Unicode Basic Latin character set
-optimize-grammar
perform several performance optimizations on the grammar (EXPERIMENTAL FEATURE)
-optimize-parser
generate optimized parser without Debug and Memoize options and
with some other optimizations applied.
-receiver-name NAME
use NAME as for the receiver name of the generated methods
for the grammar's code blocks. Defaults to "c".
-x
do not generate the parser, only parse the grammar.
-alternate-entrypoints RULE[,RULE...]
comma-separated list of rule names that may be used as alternate
entrypoints for the parser, in addition to the first rule in the
grammar.
-support-left-recursion
add support left recursion (EXPERIMENTAL FEATURE)
See https://godoc.org/github.com/mna/pigeon for more information.
`
// usage prints the help page of the command-line tool.
func usage() {
fmt.Printf(usagePage, os.Args[0])
}
// argError prints an error message to stderr, prints the command usage
// and exits with the specified exit code.
func argError(exitCode int, msg string, args ...any) {
fmt.Fprintf(os.Stderr, msg, args...)
fmt.Fprintln(os.Stderr)
usage()
exit(exitCode)
}
// input gets the name and reader to get input text from.
func input(filename string) (nm string, rc io.ReadCloser) {
nm = "stdin"
inf := os.Stdin
if filename != "" {
f, err := os.Open(filename)
if err != nil {
fmt.Fprintln(os.Stderr, err)
exit(2)
}
inf = f
nm = filename
}
r := bufio.NewReader(inf)
return nm, makeReadCloser(r, inf)
}
// output gets the writer to write the generated parser to.
func output(filename string) io.WriteCloser {
out := os.Stdout
if filename != "" {
f, err := os.Create(filename)
if err != nil {
fmt.Fprintln(os.Stderr, err)
exit(4)
}
out = f
}
return out
}
// create a ReadCloser that reads from r and closes c.
func makeReadCloser(r io.Reader, c io.Closer) io.ReadCloser {
rc := struct {
io.Reader
io.Closer
}{r, c}
return io.ReadCloser(rc)
}
// astPos is a helper method for the PEG grammar parser. It returns the
// position of the current match as an ast.Pos.
func (c *current) astPos() ast.Pos {
return ast.Pos{Line: c.pos.line, Col: c.pos.col, Off: c.pos.offset}
}
// toAnySlice is a helper function for the PEG grammar parser. It converts
// v to a slice of empty interfaces.
func toAnySlice(v any) []any {
if v == nil {
return nil
}
return v.([]any)
}
// validateUnicodeEscape checks that the provided escape sequence is a
// valid Unicode escape sequence.
func validateUnicodeEscape(escape, errMsg string) (any, error) {
r, _, _, err := strconv.UnquoteChar("\\"+escape, '"')
if err != nil {
return nil, errors.New(errMsg)
}
if 0xD800 <= r && r <= 0xDFFF {
return nil, errors.New(errMsg)
}
return nil, nil
}