-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathlex.go
143 lines (127 loc) · 3.45 KB
/
lex.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package markdown
import (
"strings"
"unicode"
)
// isPunct reports whether c is Markdown punctuation.
func isPunct(c byte) bool {
return '!' <= c && c <= '/' || ':' <= c && c <= '@' || '[' <= c && c <= '`' || '{' <= c && c <= '~'
}
// isLetter reports whether c is an ASCII letter.
func isLetter(c byte) bool {
return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
}
// isDigit reports whether c is an ASCII digit.
func isDigit(c byte) bool {
return '0' <= c && c <= '9'
}
// isLetterDigit reports whether c is an ASCII letter or digit.
func isLetterDigit(c byte) bool {
return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9'
}
// isLDH reports whether c is an ASCII letter, digit, or hyphen.
func isLDH(c byte) bool {
return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '-'
}
// isHexDigit reports whether c is an ASCII hexadecimal digit.
func isHexDigit(c byte) bool {
return 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' || '0' <= c && c <= '9'
}
// isUnocdeSpace reports whether r is a Unicode space as defined by Markdown.
// This is not the same as unicode.IsSpace.
// For example, U+0085 does not satisfy isUnicodeSpace
// but does satisfy unicode.IsSpace.
func isUnicodeSpace(r rune) bool {
if r < 0x80 {
return r == ' ' || r == '\t' || r == '\f' || r == '\n'
}
return unicode.In(r, unicode.Zs)
}
// isUnocdeSpace reports whether r is Unicode punctuation as defined by Markdown.
// This is not the same as unicode.Punct; it also includes unicode.Symbol.
func isUnicodePunct(r rune) bool {
if r < 0x80 {
return isPunct(byte(r))
}
return unicode.In(r, unicode.Punct, unicode.Symbol)
}
// skipSpace returns i + the number of spaces, tabs, carriage returns, and newlines
// at the start of s[i:]. That is, it skips i past any such characters, returning the new i.
func skipSpace(s string, i int) int {
// Note: Blank lines have already been removed.
for i < len(s) && (s[i] == ' ' || s[i] == '\t' || s[i] == '\n') {
i++
}
return i
}
// mdEscaper escapes symbols that are used in inline Markdown sequences.
// TODO(rsc): There is a better way to do this.
var mdEscaper = strings.NewReplacer(
`(`, `\(`,
`)`, `\)`,
`[`, `\[`,
`]`, `\]`,
`*`, `\*`,
`_`, `\_`,
`<`, `\<`,
`>`, `\>`,
)
// mdLinkEscaper escapes symbols that have meaning inside a link target.
var mdLinkEscaper = strings.NewReplacer(
`(`, `\(`,
`)`, `\)`,
`<`, `\<`,
`>`, `\>`,
)
// mdUnscape returns the Markdown unescaping of s.
func mdUnescape(s string) string {
if !strings.Contains(s, `\`) && !strings.Contains(s, `&`) {
return s
}
return mdUnescaper.Replace(s)
}
// mdUnescaper unescapes Markdown escape sequences and HTML entities.
// TODO(rsc): Perhaps there is a better way to do this.
var mdUnescaper = func() *strings.Replacer {
var list = []string{
`\!`, `!`,
`\"`, `"`,
`\#`, `#`,
`\$`, `$`,
`\%`, `%`,
`\&`, `&`,
`\'`, `'`,
`\(`, `(`,
`\)`, `)`,
`\*`, `*`,
`\+`, `+`,
`\,`, `,`,
`\-`, `-`,
`\.`, `.`,
`\/`, `/`,
`\:`, `:`,
`\;`, `;`,
`\<`, `<`,
`\=`, `=`,
`\>`, `>`,
`\?`, `?`,
`\@`, `@`,
`\[`, `[`,
`\\`, `\`,
`\]`, `]`,
`\^`, `^`,
`\_`, `_`,
"\\`", "`",
`\{`, `{`,
`\|`, `|`,
`\}`, `}`,
`\~`, `~`,
}
for name, repl := range htmlEntity {
list = append(list, name, repl)
}
return strings.NewReplacer(list...)
}()