-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_cases.yaml
93 lines (86 loc) · 3.42 KB
/
test_cases.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# AP is computed by first computing the precision at each unique score. Then,
# we ensure that precision is monotonically decreasing as recall increases (by
# setting precision[i] = max(precision[i], precision[i+1])). Finally, we sum
# the areas between successive unique recalls, which is computed by multiplying
# the difference between the current and previous unique recall with the
# precision at the current recall.
- name: 'simple_1'
predictions: [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
groundtruth: [1, 0, 1, 1, 1, 1, 0, 0, 0, 1]
# Steps to compute:
# score thresholds: [ 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
# precisions: [ 1, 1/2, 2/3, 3/4, 4/5, 5/6, 5/7, 5/8, 5/9, 6/10]
# mon. dec. precs: [ 1, 5/6, 5/6, 5/6, 5/6, 5/6, 5/7, 5/8, 6/10, 6/10]
# recalls: [1/6, 1/6, 2/6, 3/6, 4/6, 5/6, 5/6, 5/6, 5/6, 1]
# recall_changes: [1/6, 0, 1/6, 1/6, 1/6, 1/6, 0, 0, 0, 1/6]
# expected ap: sum(1/6, 5/36, 5/36, 5/36, 5/36, 1/10])
expected_ap: 0.822222222
- name: 'sort_stable_1'
predictions: [1, 1, 0, 0, 0]
groundtruth: [1, 0, 1, 1, 0]
# Steps to compute:
# score thresholds: [1 , 0 ]
# precisions: [1/2, 3/5]
# mon. dec. precs: [3/5, 3/5]
# recalls: [1/3, 1 ]
# recall_changes: [1/3, 2/3]
# expected ap: sum(1/5, 2/5)
expected_ap: 0.6
- name: 'sort_stable_2'
predictions: [1, 1, 0, 0, 0]
groundtruth: [1, 0, 1, 0, 1]
# Steps to compute:
# score thresholds: [1 , 0 ]
# precisions: [1/2, 3/5]
# mon. dec. precs: [3/5, 3/5]
# recalls: [1/3, 1 ]
# recall_changes: [1/3, 2/3]
# expected ap: sum(1/5, 2/5)
expected_ap: 0.6
- name: 'simple_2'
predictions: [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
groundtruth: [1, 0, 0, 0, 1, 1, 0, 0, 0, 1]
# Steps to compute:
# score thresholds: [ 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
# precisions: [ 1, 1/2, 1/3, 1/4, 2/5, 3/6, 3/7, 3/8, 3/9 , 4/10]
# mon. dec. precs: [ 1, 1/2, 3/6, 3/6, 3/6, 3/6, 3/7, 4/10, 4/10, 4/10]
# recalls: [1/4, 1/4, 1/4, 1/4, 2/4, 3/4, 3/4, 3/4, 3/4, 1]
# recall_changes: [1/4, 0, 0, 0, 1/4, 1/4, 0, 0, 0, 1/4]
# expected ap: sum(1/4, 1/8, 1/8, 1/10)
expected_ap: 0.6
- name: 'all_ties'
predictions: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
groundtruth: [1, 0, 1, 0, 1, 1, 0, 1, 0, 0]
# Steps to compute:
# score thresholds: [ 0 ]
# precisions: [ 5/10]
# mon. dec. precs: [ 5/10]
# recalls: [ 1]
# recall_changes: [ 1]
# expected ap: sum(1 * 5/10)
expected_ap: 0.5
# This is broken in the PASCAL VOC code, as far as I know.
- name: 'all_ties_1'
predictions: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
groundtruth: [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
# Steps to compute:
# score thresholds: [ 0 ]
# precisions: [ 1/10]
# mon. dec. precs: [ 1/10]
# recalls: [ 1]
# recall_changes: [ 1]
# expected ap: sum(1 * 1/10)
expected_ap: 0.1
# This is broken in the PASCAL VOC code, as far as I know.
- name: 'all_ties_1_with_false_negatives'
predictions: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
groundtruth: [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
false_negatives: 3
# Steps to compute:
# score thresholds: [ 0 ]
# precisions: [ 1/10]
# mon. dec. precs: [ 1/10]
# recalls: [ 1/4]
# recall_changes: [ 1]
# expected ap: sum(1/4 * 1/10)
expected_ap: 0.025