-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhs_difficulty.go
411 lines (323 loc) · 11.9 KB
/
hs_difficulty.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
package sudoku
import (
"crypto/sha1"
"encoding/hex"
"fmt"
"log"
"sort"
"strconv"
"strings"
)
//DifficultySignals is a collection of names to float64 values, representing
//the various signals extracted from a SolveDirections, and used for the
//Difficulty calculation. Generally not useful to package users.
type DifficultySignals map[string]float64
//A difficulty signal generator can return more than one difficutly signal, so
//it doesn't just return float64 Each signal generator should always return a
//map with the SAME keys--so if you've called it once you know what the next
//calls will have as keys.
type difficultySignalGenerator func(directions SolveDirections) DifficultySignals
const _DIFFICULTY_WEIGHT_FILENAME = "difficulties.csv"
var difficultySignalGenerators []difficultySignalGenerator
//These are the weights that will be used to turn a list of signals into a
//difficulty. starting weights are set in hs_difficulty_weights.go, which is
//auto-generated. Generate those now:
//go:generate cmd/dokugen-analysis/internal/gendifficulties/gendifficulties
var difficultySignalWeights map[string]float64
//difficultyModelHashValue stashes the value of the hash of the model, so we
//don't have to calculate it too often.
var difficultyModelHashValue string
func init() {
difficultySignalGenerators = []difficultySignalGenerator{
signalTechnique,
signalNumberOfSteps,
signalTechniquePercentage,
signalPercentageFilledSteps,
signalNumberUnfilled,
signalStepsUntilNonFill,
signalPrecursorStepsLength,
}
}
//LoadDifficultyModel loads in a new difficulty model to use to score puzzles'
//difficulties. This will automatically change what DifficultyModelHash will
//return.
func LoadDifficultyModel(model map[string]float64) {
difficultySignalWeights = model
//Reset the stored model hash value so the next call to
//DifficultyModelHash will recacluate it.
difficultyModelHashValue = ""
}
//DifficultyModelHash is a unique string representing the exact difficulty
//model in use. Every time a new model is trained or loaded, this value will change.
//Therefore, if the value is different than last time you checked, the model has changed.
//This is useful for throwing out caches that assume the same difficulty model is in use.
func DifficultyModelHash() string {
if difficultyModelHashValue == "" {
//Generate a string with keys, vals in a known sequence, then hash.
//first, generate a list of all keys
var keys []string
for k := range difficultySignalWeights {
keys = append(keys, k)
}
sort.Strings(keys)
hash := sha1.New()
for _, k := range keys {
hash.Write([]byte(k + ":" + strconv.FormatFloat(difficultySignalWeights[k], 'f', -1, 64) + "\n"))
}
hashBytes := hash.Sum(nil)
base32str := strings.ToUpper(hex.EncodeToString(hashBytes))
difficultyModelHashValue = base32str
}
return difficultyModelHashValue
}
//Stats returns a printout of interesting statistics about the
//SolveDirections, including number of steps, difficulty (based on this solve
//description alone), how unrelated the cells in subsequent steps are, and the
//values of all of the signals used to generate the difficulty.
func (self SolveDirections) Stats() []string {
//TODO: test this. dokugen has a method that effectively tests this; just use that.
techniqueCount := make(map[string]int)
var lastStep *SolveStep
similarityAccum := 0.0
steps := self.Steps()
for _, step := range steps {
if lastStep != nil {
similarityAccum += step.TargetCells.chainSimilarity(lastStep.TargetCells)
}
techniqueCount[step.TechniqueVariant()] += 1
lastStep = step
}
similarityAccum /= float64(len(steps))
var result []string
//TODO: use a standard divider across the codebase
divider := "-------------------------"
result = append(result, divider)
//TODO: we shouldn't even include this... it's not meaningful to report the difficulty of a single solve.
result = append(result, fmt.Sprintf("Difficulty: %f", self.Signals().difficulty()))
result = append(result, divider)
result = append(result, fmt.Sprintf("Step count: %d", len(steps)))
result = append(result, divider)
result = append(result, fmt.Sprintf("Avg Similarity: %f", similarityAccum))
result = append(result, divider)
//We want a stable ordering for technique counts.
for _, technique := range AllTechniqueVariants {
//TODO: pad the technique name with enough spaces so the colon lines up.
result = append(result, fmt.Sprintf("%s: %d", technique, techniqueCount[technique]))
}
result = append(result, divider)
return result
}
//Description returns a comprehensive prose description of the
//SolveDirections, including reasoning for each step, that if followed would
//lead to the grid being solved. Unlike Walkthrough, Description() does not
//include diagrams for each step.
func (self SolveDirections) Description() []string {
//TODO: the IsHint directions don't sound as good as the old ones in OnlineSudoku did.
if len(self.CompoundSteps) == 0 {
return []string{"The puzzle is already solved."}
}
var descriptions []string
for i, compound := range self.CompoundSteps {
intro := ""
description := compound.Description()
if len(self.CompoundSteps) > 1 {
description = strings.ToLower(string(description[0])) + description[1:]
switch i {
case 0:
intro = "First, "
case len(self.CompoundSteps) - 1:
intro = "Finally, "
default:
//TODO: switch between "then" and "next" randomly.
intro = "Next, "
}
}
descriptions = append(descriptions, intro+description)
}
return descriptions
}
//Walkthrough prints an exhaustive set of human-readable directions that
//includes diagrams at each step to make it easier to follow.
func (self SolveDirections) Walkthrough() string {
//TODO: test this.
if len(self.CompoundSteps) == 0 {
return "The puzzle could not be solved with any of the techniques we're aware of."
}
steps := self.Steps()
clone := self.Grid().MutableCopy()
DIVIDER := "\n\n--------------------------------------------\n\n"
intro := fmt.Sprintf("This will take %d steps to solve.", len(steps))
intro += "\nWhen you start, your grid looks like this:\n"
intro += clone.Diagram(false)
intro += "\n"
intro += DIVIDER
descriptions := self.Description()
results := make([]string, len(steps))
for i, description := range descriptions {
result := description + "\n"
result += "After doing that, your grid will look like: \n\n"
steps[i].Apply(clone)
result += clone.Diagram(false)
results[i] = result
}
return intro + strings.Join(results, DIVIDER) + DIVIDER + "Now the puzzle is solved."
}
//Signals returns the DifficultySignals for this set of SolveDirections.
func (self SolveDirections) Signals() DifficultySignals {
//Because of the contract of a DifficultySignalGenerator (that it always
//returns the same keys), as long as DifficultySignalGenerators stays
//constant it's reasonable for callers to assume that one call to
//Signals() will return all of the string keys you'll see any time you
//call Signals()
result := DifficultySignals{}
for _, generator := range difficultySignalGenerators {
result.add(generator(self))
}
return result
}
//This will overwrite colliding values
//TODO: this is confusingly named
func (self DifficultySignals) add(other DifficultySignals) {
for key, val := range other {
self[key] = val
}
}
//For keys in both, will sum them together.
//TODO: this is confusingly named (compared to Add) Do we really need both Sum and Add?
func (self DifficultySignals) sum(other DifficultySignals) {
for key, val := range other {
self[key] += val
}
}
func (self DifficultySignals) difficulty() float64 {
accum := 0.0
if constant, ok := difficultySignalWeights["Constant"]; ok {
accum = constant
} else {
log.Println("Didn't have the constant term loaded.")
}
for signal, val := range self {
//We can discard the OK because 0 is a reasonable thing to do with weights we aren't aware of.
weight, _ := difficultySignalWeights[signal]
accum += val * weight
}
if accum < 0.0 {
log.Println("Accumuldated difficulty snapped to 0.0:", accum)
accum = 0.0
}
if accum > 1.0 {
log.Println("Accumulated difficulty snapped to 1.0:", accum)
accum = 1.0
}
return accum
}
//Rest of file is different Signals
//TODO: now that SolveDirections includes gridSnapshot, think if there are any
//additional Signals we can generate.
//This technique returns a count of how many each type of technique is seen.
//Different techniques are different "difficulties" so seeing more of a hard
//technique will Lead to a higher overall difficulty.
func signalTechnique(directions SolveDirections) DifficultySignals {
//Our contract is to always return every signal name, even if it's 0.0.
result := DifficultySignals{}
for _, techniqueName := range AllTechniqueVariants {
result[techniqueName+" Count"] = 0.0
}
for _, step := range directions.Steps() {
result[step.TechniqueVariant()+" Count"]++
}
return result
}
//This signal is just number of steps. More steps is PROBABLY a harder puzzle.
func signalNumberOfSteps(directions SolveDirections) DifficultySignals {
return DifficultySignals{
"Number of Steps": float64(len(directions.Steps())),
}
}
//This signal is like signalTechnique, except it returns the count divided by
//the TOTAL number of steps.
func signalTechniquePercentage(directions SolveDirections) DifficultySignals {
//Our contract is to always return every signal name, even if it's 0.0.
result := DifficultySignals{}
for _, techniqueName := range AllTechniqueVariants {
result[techniqueName+" Percentage"] = 0.0
}
count := len(directions.Steps())
if count == 0 {
return result
}
for _, step := range directions.Steps() {
result[step.TechniqueVariant()+" Percentage"]++
}
//Now normalize all of them
for name := range result {
result[name] /= float64(count)
}
return result
}
//This signal is how many steps are filled out of all steps. Presumably harder
//puzzles will have more non-fill steps.
func signalPercentageFilledSteps(directions SolveDirections) DifficultySignals {
numerator := 0.0
denominator := float64(len(directions.Steps()))
for _, step := range directions.Steps() {
if step.Technique.IsFill() {
numerator += 1.0
}
}
return DifficultySignals{
"Percentage Fill Steps": numerator / denominator,
}
}
//This signal is how many cells are unfilled at the beginning. Presumably
//harder puzzles will have fewer cells filled (although obviously this isn't
//necessarily true)
func signalNumberUnfilled(directions SolveDirections) DifficultySignals {
//We don't have access to the underlying grid, so we'll just count how
//many fill steps (since each can only add one number, and no numbers are
//ever unfilled)
count := 0.0
for _, step := range directions.Steps() {
if step.Technique.IsFill() {
count++
}
}
return DifficultySignals{
"Number Unfilled Cells": count,
}
}
//signalPrecursorStepsLength returns the length of the longest run of
//non-fill steps, and the average length of any run.
func signalPrecursorStepsLength(directions SolveDirections) DifficultySignals {
longestRun := 0
averageRunAccum := 0
for _, compoundStep := range directions.CompoundSteps {
length := len(compoundStep.PrecursorSteps)
if length > longestRun {
longestRun = length
}
averageRunAccum += length
}
averageRun := float64(averageRunAccum) / float64(len(directions.CompoundSteps))
return DifficultySignals{
"Average PrecursorSteps Length": averageRun,
"Longest PrecursorSteps Length": float64(longestRun),
}
}
//This signal is how many steps into the solve directions before you encounter
//your first non-fill step. Non-fill steps are harder, so this signal captures
//how easy the start of the puzzle is.
func signalStepsUntilNonFill(directions SolveDirections) DifficultySignals {
//TODO: should we get rid of this now that we have
//signalPrecursorStepsLength?
count := 0.0
for _, step := range directions.Steps() {
if !step.Technique.IsFill() {
break
}
count++
}
return DifficultySignals{
"Steps Until Nonfill": count,
}
}