-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtracking_numbers.go.rl
228 lines (190 loc) · 5.94 KB
/
tracking_numbers.go.rl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
// WARNING: tracking_numbers.go is generated from tracking_numbers.go.rl.
// This notice appears in both files.
//
// If you are editing tracking_numbers.go.rl, be sure to `go generate` afterwards.
// If you are editing tracking_numbers.go, please stop, since your changes will be overwritten.
package trackr
type TrackingNumber struct {
Carrier string
Number string
}
%%{
machine tracking_number;
write data;
}%%
func Find(data string) []TrackingNumber {
// Ragel counters
cs, p, pe, eof := 0, 0, len(data), len(data)
// Output
var found []TrackingNumber
// Matcher states
var wordStart int
// FedEx Express
var fe struct {
sum int
end int
}
// FedEx Ground
var fg struct {
sum int
end int
}
// UPS
var ups struct {
sum int
end int
}
// USPS uses several schemes, which we check simultaneously
// First is USS128:
var uss128 struct {
sumA, sumB int
end int
}
// Second is USS39:
var uss39 struct {
sum10, sum11 int
sumOK bool
end int
}
%%{
# FedEx Express uses a checksum that multiplies digits by 3 coefficients
# Use one action for each type of digit
action fe1 { fe.sum += 1*(int(fc) - '0') }
action fe3 { fe.sum += 3*(int(fc) - '0') }
action fe7 { fe.sum += 7*(int(fc) - '0') }
action festart { fe.sum = 0 }
action feend {
if ((fe.sum-7*(int(fc) - '0')) % 11) % 10 == (int(fc) - '0') {
fe.end = p+1
}
}
# FedEx Express is either 12 or 15 digits, in the 317317317317[317] pattern
fe = ( ( digit@fe3 digit@fe1 digit@fe7 ){4,5} @feend) >festart;
# FedEx Ground uses a checksum that multiplies digits by 2 coefficients
action fg1 { fg.sum += 1*(int(fc) - '0') }
action fg3 { fg.sum += 3*(int(fc) - '0') }
action fgstart { fg.sum = 0 }
action fgend {
if (10-(fg.sum % 10)) % 10 == (int(fc) - '0') {
fg.end = p+1
}
}
# Yes, FedEx Ground is really 15 digits, just like FedEx Express
fg = ( (digit@fg1 digit@fg3){7} digit@fgend) >fgstart;
# UPS uses a checksum containing both alphabetic and numeric characters with two coefficients
# Use one action for each {coefficient, chartype}
action ups1n { ups.sum += 1*(int(fc) - '0') }
action ups1a { ups.sum += 1*((int(fc) - '?') % 10) }
action ups2n { ups.sum += 2*(int(fc) - '0') }
action ups2a { ups.sum += 2*((int(fc) - '?') % 10) }
action upsstart { ups.sum = 0 }
action upsend {
if (10-(ups.sum % 10)) % 10 == (int(fc) - '0') {
ups.end = p+1
}
}
ups1 = (digit@ups1n) | ('A'..'Z'@ups1a);
ups2 = (digit@ups2n) | ('A'..'Z'@ups2a);
ups = ('1Z' (ups1 ups2){7} ups1 digit@upsend) >upsstart;
# USPS uses a couple different checksum schemes
# We can match them simultaneously
# USS128 matches on either 20 or 22-digit strings:
action uss128start { uss128.sumA = 0; uss128.sumB = 0 }
action uss128A { uss128.sumA += (int(fc) - '0') }
action uss128B { uss128.sumB += (int(fc) - '0') }
action uss128checkA {
{
lastDigit := (int(fc) - '0')
sum := 1*uss128.sumA + 3*(uss128.sumB-lastDigit)
if (10 - (sum % 10)) % 10 == lastDigit {
uss128.end = p+1
}
}
}
action uss128checkB {
{
lastDigit := (int(fc) - '0')
sum := 3*uss128.sumA + 1*(uss128.sumB-lastDigit)
if (10 - (sum % 10)) % 10 == lastDigit {
uss128.end = p+1
}
}
}
uss128 = ((digit@uss128A digit@uss128B){10,11}) >uss128start @uss128checkB;
# USS39 matches sets of 8 digits, with a 2-alpha prefix and a mandatory "US" suffix,
# and can use either of two checksums
action uss39start { uss39.sum10 = 0; uss39.sum11 = 0; uss39.sumOK = false }
action uss39d1 { uss39.sum11 += 8*(int(fc) - '0'); uss39.sum10 += 1*(int(fc) - '0') }
action uss39d2 { uss39.sum11 += 6*(int(fc) - '0'); uss39.sum10 += 3*(int(fc) - '0') }
action uss39d3 { uss39.sum11 += 4*(int(fc) - '0'); uss39.sum10 += 1*(int(fc) - '0') }
action uss39d4 { uss39.sum11 += 2*(int(fc) - '0'); uss39.sum10 += 3*(int(fc) - '0') }
action uss39d5 { uss39.sum11 += 3*(int(fc) - '0'); uss39.sum10 += 1*(int(fc) - '0') }
action uss39d6 { uss39.sum11 += 5*(int(fc) - '0'); uss39.sum10 += 3*(int(fc) - '0') }
action uss39d7 { uss39.sum11 += 9*(int(fc) - '0'); uss39.sum10 += 1*(int(fc) - '0') }
action uss39d8 { uss39.sum11 += 7*(int(fc) - '0'); uss39.sum10 += 3*(int(fc) - '0') }
action uss39check {
{
var checkDigit10, checkDigit11 int
checkDigit10 = (10-(uss39.sum10 % 10)) % 10
// because of course it needs to be complicated
remainder := uss39.sum11 % 11
if remainder == 0 {
checkDigit11 = 5
} else if remainder == 1 {
checkDigit11 = 0
} else {
checkDigit11 = 11 - remainder
}
uss39.sumOK = (checkDigit10 == (int(fc) - '0')) || (checkDigit11 == (int(fc) - '0'))
}
}
action uss39complete {
if uss39.sumOK {
uss39.end = p+1
}
}
uss39 = (
('A'..'Z'{2})
digit@uss39d1
digit@uss39d2
digit@uss39d3
digit@uss39d4
digit@uss39d5
digit@uss39d6
digit@uss39d7
digit@uss39d8
digit@uss39check
'US'@uss39complete
) >uss39start;
# Tracking numbers are any of our matchers
tracking = fe | fg | ups | uss128 | uss39;
# Match and emit on whole words only
action start { wordStart = p }
action emit {
if fe.end == p {
found = append(found, TrackingNumber{"FedEx", data[wordStart:p]})
} else if fg.end == p { // don't emit both FedEx Express and FedEx Ground
found = append(found, TrackingNumber{"FedEx", data[wordStart:p]})
}
if ups.end == p {
found = append(found, TrackingNumber{"UPS", data[wordStart:p]})
}
if uss128.end == p {
found = append(found, TrackingNumber{"USPS", data[wordStart:p]})
} else if uss39.end == p {
found = append(found, TrackingNumber{"USPS", data[wordStart:p]})
}
}
# Words could be tracking numbers (ignoring errors) or just alphanumeric strings
word = (tracking)>start%emit | (alnum+);
# separators are non-alphanumeric strings
separator = ^alnum+;
main := separator? (word separator)* word?;
write init;
write exec;
}%%
if len(found) == 0 {
found = nil
}
return found
}