-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathsorting.go
392 lines (333 loc) · 12.9 KB
/
sorting.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
package rivescript
// Data sorting functions
import (
"errors"
"sort"
"strconv"
"strings"
)
// Sort buffer data, for RiveScript.SortReplies()
type sortBuffer struct {
topics map[string][]sortedTriggerEntry // Topic name -> array of triggers
thats map[string][]sortedTriggerEntry
sub []string // Substitutions
person []string // Person substitutions
}
// Holds a sorted trigger and the pointer to that trigger's data
type sortedTriggerEntry struct {
trigger string
pointer *astTrigger
}
// Temporary categorization of triggers while sorting
type sortTrack struct {
atomic map[int][]sortedTriggerEntry // Sort by number of whole words
option map[int][]sortedTriggerEntry // Sort optionals by number of words
alpha map[int][]sortedTriggerEntry // Sort alpha wildcards by no. of words
number map[int][]sortedTriggerEntry // Sort numeric wildcards by no. of words
wild map[int][]sortedTriggerEntry // Sort wildcards by no. of words
pound []sortedTriggerEntry // Triggers of just '#'
under []sortedTriggerEntry // Triggers of just '_'
star []sortedTriggerEntry // Triggers of just '*'
}
/*
SortReplies sorts the reply structures in memory for optimal matching.
After you have finished loading your RiveScript code, call this method to
populate the various sort buffers. This is absolutely necessary for reply
matching to work efficiently!
If the bot has loaded no topics, or if it ends up with no sorted triggers at
the end, it will return an error saying such. This usually means the bot didn't
load any RiveScript code, for example because it looked in the wrong directory.
*/
func (rs *RiveScript) SortReplies() error {
// (Re)initialize the sort cache.
rs.sorted.topics = map[string][]sortedTriggerEntry{}
rs.sorted.thats = map[string][]sortedTriggerEntry{}
rs.say("Sorting triggers...")
// If there are no topics, give an error.
if len(rs.topics) == 0 {
return errors.New("SortReplies: no topics were found; did you load any RiveScript code?")
}
// Loop through all the topics.
for topic := range rs.topics {
rs.say("Analyzing topic %s", topic)
// Collect a list of all the triggers we're going to worry about. If this
// topic inherits another topic, we need to recursively add those to the
// list as well.
allTriggers := rs.getTopicTriggers(topic, false)
// Sort these triggers.
rs.sorted.topics[topic] = rs.sortTriggerSet(allTriggers, true)
// Get all of the %Previous triggers for this topic.
thatTriggers := rs.getTopicTriggers(topic, true)
// And sort them, too.
rs.sorted.thats[topic] = rs.sortTriggerSet(thatTriggers, false)
}
// Sort the substitution lists.
rs.sorted.sub = sortList(rs.sub)
rs.sorted.person = sortList(rs.person)
// Did we sort anything at all?
if len(rs.sorted.topics) == 0 && len(rs.sorted.thats) == 0 {
return errors.New("SortReplies: ended up with empty trigger lists; did you load any RiveScript code?")
}
return nil
}
/*
sortTriggerSet sorts a group of triggers in an optimal sorting order.
This function has two use cases:
1. Create a sort buffer for "normal" (matchable) triggers, which are triggers
that are NOT accompanied by a %Previous tag.
2. Create a sort buffer for triggers that had %Previous tags.
Use the `excludePrevious` parameter to control which one is being done. This
function will return a list of sortedTriggerEntry items, and it's intended to
have no duplicate trigger patterns (unless the source RiveScript code explicitly
uses the same duplicate pattern twice, which is a user error).
*/
func (rs *RiveScript) sortTriggerSet(triggers []sortedTriggerEntry, excludePrevious bool) []sortedTriggerEntry {
// Create a priority map, of priority numbers -> their triggers.
prior := map[int][]sortedTriggerEntry{}
// Go through and bucket each trigger by weight (priority).
for _, trig := range triggers {
if excludePrevious && trig.pointer.previous != "" {
continue
}
// Check the trigger text for any {weight} tags, default being 0
match := reWeight.FindStringSubmatch(trig.trigger)
weight := 0
if len(match) > 0 {
weight, _ = strconv.Atoi(match[1])
}
// First trigger of this priority? Initialize the weight map.
if _, ok := prior[weight]; !ok {
prior[weight] = []sortedTriggerEntry{}
}
prior[weight] = append(prior[weight], trig)
}
// Keep a running list of sorted triggers for this topic.
running := []sortedTriggerEntry{}
// Sort the priorities with the highest number first.
var sortedPriorities []int
for k := range prior {
sortedPriorities = append(sortedPriorities, k)
}
sort.Sort(sort.Reverse(sort.IntSlice(sortedPriorities)))
// Go through each priority set.
for _, p := range sortedPriorities {
rs.say("Sorting triggers with priority %d", p)
// So, some of these triggers may include an {inherits} tag, if they
// came from a topic which inherits another topic. Lower inherits values
// mean higher priority on the stack. Triggers that have NO inherits
// value at all (which will default to -1), will be moved to the END of
// the stack at the end (have the highest number/lowest priority).
inherits := -1 // -1 means no {inherits} tag
highestInherits := -1 // Highest number seen so far
// Loop through and categorize these triggers.
track := map[int]*sortTrack{}
track[inherits] = initSortTrack()
// Loop through all the triggers.
for _, trig := range prior[p] {
pattern := trig.trigger
rs.say("Looking at trigger: %s", pattern)
// See if the trigger has an {inherits} tag.
match := reInherits.FindStringSubmatch(pattern)
if len(match) > 0 {
inherits, _ = strconv.Atoi(match[1])
if inherits > highestInherits {
highestInherits = inherits
}
rs.say("Trigger belongs to a topic that inherits other topics. "+
"Level=%d", inherits)
pattern = reInherits.ReplaceAllString(pattern, "")
} else {
inherits = -1
}
// If this is the first time we've seen this inheritance level,
// initialize its sort track structure.
if _, ok := track[inherits]; !ok {
track[inherits] = initSortTrack()
}
// Start inspecting the trigger's contents.
if strings.Contains(pattern, "_") {
// Alphabetic wildcard included.
cnt := wordCount(pattern, false)
rs.say("Has a _ wildcard with %d words", cnt)
if cnt > 0 {
if _, ok := track[inherits].alpha[cnt]; !ok {
track[inherits].alpha[cnt] = []sortedTriggerEntry{}
}
track[inherits].alpha[cnt] = append(track[inherits].alpha[cnt], trig)
} else {
track[inherits].under = append(track[inherits].under, trig)
}
} else if strings.Contains(pattern, "#") {
// Numeric wildcard included.
cnt := wordCount(pattern, false)
rs.say("Has a # wildcard with %d words", cnt)
if cnt > 0 {
if _, ok := track[inherits].number[cnt]; !ok {
track[inherits].number[cnt] = []sortedTriggerEntry{}
}
track[inherits].number[cnt] = append(track[inherits].number[cnt], trig)
} else {
track[inherits].pound = append(track[inherits].pound, trig)
}
} else if strings.Contains(pattern, "*") {
// Wildcard included.
cnt := wordCount(pattern, false)
rs.say("Has a * wildcard with %d words", cnt)
if cnt > 0 {
if _, ok := track[inherits].wild[cnt]; !ok {
track[inherits].wild[cnt] = []sortedTriggerEntry{}
}
track[inherits].wild[cnt] = append(track[inherits].wild[cnt], trig)
} else {
track[inherits].star = append(track[inherits].star, trig)
}
} else if strings.Contains(pattern, "[") {
// Optionals included.
cnt := wordCount(pattern, false)
rs.say("Has optionals with %d words", cnt)
if _, ok := track[inherits].option[cnt]; !ok {
track[inherits].option[cnt] = []sortedTriggerEntry{}
}
track[inherits].option[cnt] = append(track[inherits].option[cnt], trig)
} else {
// Totally atomic.
cnt := wordCount(pattern, false)
rs.say("Totally atomic trigger with %d words", cnt)
if _, ok := track[inherits].atomic[cnt]; !ok {
track[inherits].atomic[cnt] = []sortedTriggerEntry{}
}
track[inherits].atomic[cnt] = append(track[inherits].atomic[cnt], trig)
}
}
// Move the no-{inherits} triggers to the bottom of the stack.
track[highestInherits+1] = track[-1]
delete(track, -1)
// Sort the track from the lowest to the highest.
var trackSorted []int
for k := range track {
trackSorted = append(trackSorted, k)
}
sort.Ints(trackSorted)
// Go through each priority level from greatest to smallest.
for _, ip := range trackSorted {
rs.say("ip=%d", ip)
// Sort each of the main kinds of triggers by their word counts.
running = sortByWords(running, track[ip].atomic)
running = sortByWords(running, track[ip].option)
running = sortByWords(running, track[ip].alpha)
running = sortByWords(running, track[ip].number)
running = sortByWords(running, track[ip].wild)
// Add the single wildcard triggers, sorted by length.
running = sortByLength(running, track[ip].under)
running = sortByLength(running, track[ip].pound)
running = sortByLength(running, track[ip].star)
}
}
return running
}
// sortList sorts lists (like substitutions) from a string:string map.
func sortList(dict map[string]string) []string {
output := []string{}
// Track by number of words.
track := map[int][]string{}
// Loop through each item.
for item := range dict {
cnt := wordCount(item, true)
if _, ok := track[cnt]; !ok {
track[cnt] = []string{}
}
track[cnt] = append(track[cnt], item)
}
// Sort them by word count, descending.
sortedCounts := []int{}
for cnt := range track {
sortedCounts = append(sortedCounts, cnt)
}
sort.Sort(sort.Reverse(sort.IntSlice(sortedCounts)))
for _, cnt := range sortedCounts {
// Sort the strings of this word-count by their lengths.
sortedLengths := track[cnt]
sort.Sort(sort.Reverse(byLength(sortedLengths)))
output = append(output, sortedLengths...)
}
return output
}
/*
sortByWords sorts a set of triggers by word count and overall length.
This is a helper function for sorting the `atomic`, `option`, `alpha`, `number`
and `wild` attributes of the sortTrack and adding them to the running sort
buffer in that specific order. Since attribute lookup by reflection is expensive
in Go, this function is given the relevant sort buffer directly, and the current
running sort buffer to add the results to.
The `triggers` parameter is a map between word counts and the triggers that
fit that number of words.
*/
func sortByWords(running []sortedTriggerEntry, triggers map[int][]sortedTriggerEntry) []sortedTriggerEntry {
// Sort the triggers by their word counts from greatest to smallest.
var sortedWords []int
for wc := range triggers {
sortedWords = append(sortedWords, wc)
}
sort.Sort(sort.Reverse(sort.IntSlice(sortedWords)))
for _, wc := range sortedWords {
// Triggers with equal word lengths should be sorted by overall trigger length.
var sortedPatterns []string
patternMap := map[string][]sortedTriggerEntry{}
for _, trig := range triggers[wc] {
sortedPatterns = append(sortedPatterns, trig.trigger)
if _, ok := patternMap[trig.trigger]; !ok {
patternMap[trig.trigger] = []sortedTriggerEntry{}
}
patternMap[trig.trigger] = append(patternMap[trig.trigger], trig)
}
sort.Sort(sort.Reverse(byLength(sortedPatterns)))
// Add the triggers to the running triggers bucket.
for _, pattern := range sortedPatterns {
running = append(running, patternMap[pattern]...)
}
}
return running
}
/*
sortByLength sorts a set of triggers purely by character length.
This is like `sortByWords`, but it's intended for triggers that consist solely
of wildcard-like symbols with no real words. For example a trigger of `* * *`
qualifies for this, and it has no words, so we sort by length so it gets a
higher priority than simply `*`.
*/
func sortByLength(running []sortedTriggerEntry, triggers []sortedTriggerEntry) []sortedTriggerEntry {
var sortedPatterns []string
patternMap := map[string][]sortedTriggerEntry{}
for _, trig := range triggers {
sortedPatterns = append(sortedPatterns, trig.trigger)
if _, ok := patternMap[trig.trigger]; !ok {
patternMap[trig.trigger] = []sortedTriggerEntry{}
}
patternMap[trig.trigger] = append(patternMap[trig.trigger], trig)
}
sort.Sort(sort.Reverse(byLength(sortedPatterns)))
// Only loop through unique patterns.
patternSet := map[string]bool{}
// Add them to the running triggers bucket.
for _, pattern := range sortedPatterns {
if _, ok := patternSet[pattern]; ok {
continue
}
patternSet[pattern] = true
running = append(running, patternMap[pattern]...)
}
return running
}
// initSortTrack initializes a new, empty sortTrack object.
func initSortTrack() *sortTrack {
return &sortTrack{
atomic: map[int][]sortedTriggerEntry{},
option: map[int][]sortedTriggerEntry{},
alpha: map[int][]sortedTriggerEntry{},
number: map[int][]sortedTriggerEntry{},
wild: map[int][]sortedTriggerEntry{},
pound: []sortedTriggerEntry{},
under: []sortedTriggerEntry{},
star: []sortedTriggerEntry{},
}
}