@@ -10,6 +10,7 @@ use nexus_db_queries::db::model::RendezvousDebugDataset;
10
10
use nexus_db_queries:: db:: DataStore ;
11
11
use nexus_types:: deployment:: BlueprintDatasetConfig ;
12
12
use nexus_types:: deployment:: BlueprintDatasetDisposition ;
13
+ use nexus_types:: internal_api:: background:: DebugDatasetsRendezvousStats ;
13
14
use omicron_common:: api:: internal:: shared:: DatasetKind ;
14
15
use omicron_uuid_kinds:: BlueprintUuid ;
15
16
use omicron_uuid_kinds:: DatasetUuid ;
@@ -23,7 +24,7 @@ pub(crate) async fn reconcile_debug_datasets(
23
24
blueprint_id : BlueprintUuid ,
24
25
blueprint_datasets : impl Iterator < Item = & BlueprintDatasetConfig > ,
25
26
inventory_datasets : & BTreeSet < DatasetUuid > ,
26
- ) -> anyhow:: Result < ( ) > {
27
+ ) -> anyhow:: Result < DebugDatasetsRendezvousStats > {
27
28
// We expect basically all executions of this task to do nothing: we're
28
29
// activated periodically, and only do work when a dataset has been
29
30
// newly-added or newly-expunged.
@@ -39,27 +40,42 @@ pub(crate) async fn reconcile_debug_datasets(
39
40
. map ( |d| ( d. id ( ) , d) )
40
41
. collect :: < BTreeMap < _ , _ > > ( ) ;
41
42
43
+ let mut stats = DebugDatasetsRendezvousStats :: default ( ) ;
44
+
42
45
for dataset in blueprint_datasets. filter ( |d| d. kind == DatasetKind :: Debug ) {
43
46
match dataset. disposition {
44
47
BlueprintDatasetDisposition :: InService => {
45
48
// Only attempt to insert this dataset if it has shown up in
46
49
// inventory (required for correctness) and isn't already
47
50
// present in the db (performance optimization only). Inserting
48
51
// an already-present row is a no-op, so it's safe to skip.
49
- if inventory_datasets. contains ( & dataset. id )
50
- && !existing_db_datasets. contains_key ( & dataset. id )
51
- {
52
+ if existing_db_datasets. contains_key ( & dataset. id ) {
53
+ stats. num_already_exist += 1 ;
54
+ } else if !inventory_datasets. contains ( & dataset. id ) {
55
+ stats. num_not_in_inventory += 1 ;
56
+ } else {
52
57
let db_dataset = RendezvousDebugDataset :: new (
53
58
dataset. id ,
54
59
dataset. pool . id ( ) ,
55
60
blueprint_id,
56
61
) ;
57
- datastore
62
+ let did_insert = datastore
58
63
. debug_dataset_insert_if_not_exists ( opctx, db_dataset)
59
64
. await
60
65
. with_context ( || {
61
66
format ! ( "failed to insert dataset {}" , dataset. id)
62
- } ) ?;
67
+ } ) ?
68
+ . is_some ( ) ;
69
+
70
+ if did_insert {
71
+ stats. num_inserted += 1 ;
72
+ } else {
73
+ // This means we hit the TOCTOU race mentioned above:
74
+ // when we queried the DB this row didn't exist, but
75
+ // another Nexus must have beat us to actually inserting
76
+ // it.
77
+ stats. num_already_exist += 1 ;
78
+ }
63
79
}
64
80
}
65
81
BlueprintDatasetDisposition :: Expunged => {
@@ -81,7 +97,9 @@ pub(crate) async fn reconcile_debug_datasets(
81
97
. get ( & dataset. id )
82
98
. map ( |d| d. is_tombstoned ( ) )
83
99
. unwrap_or ( false ) ;
84
- if !already_tombstoned {
100
+ if already_tombstoned {
101
+ stats. num_already_tombstoned += 1 ;
102
+ } else {
85
103
if datastore
86
104
. debug_dataset_tombstone (
87
105
opctx,
@@ -96,17 +114,23 @@ pub(crate) async fn reconcile_debug_datasets(
96
114
)
97
115
} ) ?
98
116
{
117
+ stats. num_tombstoned += 1 ;
99
118
info ! (
100
119
opctx. log, "tombstoned expunged dataset" ;
101
120
"dataset_id" => %dataset. id,
102
121
) ;
122
+ } else {
123
+ // Similar TOCTOU race lost as above; this dataset was
124
+ // either already tombstoned by another racing Nexus, or
125
+ // has been hard deleted.
126
+ stats. num_already_tombstoned += 1 ;
103
127
}
104
128
}
105
129
}
106
130
}
107
131
}
108
132
109
- Ok ( ( ) )
133
+ Ok ( stats )
110
134
}
111
135
112
136
#[ cfg( test) ]
@@ -213,15 +237,15 @@ mod tests {
213
237
) ) | {
214
238
let blueprint_id = BlueprintUuid :: new_v4( ) ;
215
239
216
- let datastore_datasets = runtime. block_on( async {
240
+ let ( result_stats , datastore_datasets) = runtime. block_on( async {
217
241
let ( blueprint_datasets, inventory_datasets) = proptest_do_prep(
218
242
opctx,
219
243
datastore,
220
244
blueprint_id,
221
245
& prep,
222
246
) . await ;
223
247
224
- reconcile_debug_datasets(
248
+ let result_stats = reconcile_debug_datasets(
225
249
opctx,
226
250
datastore,
227
251
blueprint_id,
@@ -231,15 +255,19 @@ mod tests {
231
255
. await
232
256
. expect( "reconciled debug dataset" ) ;
233
257
234
- datastore
258
+ let datastore_datasets = datastore
235
259
. debug_dataset_list_all_batched( opctx)
236
260
. await
237
261
. unwrap( )
238
262
. into_iter( )
239
263
. map( |d| ( d. id( ) , d) )
240
- . collect:: <BTreeMap <_, _>>( )
264
+ . collect:: <BTreeMap <_, _>>( ) ;
265
+
266
+ ( result_stats, datastore_datasets)
241
267
} ) ;
242
268
269
+ let mut expected_stats = DebugDatasetsRendezvousStats :: default ( ) ;
270
+
243
271
for ( id, prep) in prep {
244
272
let id: DatasetUuid = u32_to_id( id) ;
245
273
@@ -252,6 +280,32 @@ mod tests {
252
280
prep. disposition == ArbitraryDisposition :: InService ;
253
281
let in_inventory = prep. in_inventory;
254
282
283
+ // Validate rendezvous output
284
+ match ( in_db_before, in_service, in_inventory) {
285
+ // "Not in database and expunged" is consistent with "hard
286
+ // deleted", which we can't separate from "already
287
+ // tombstoned".
288
+ ( false , false , _) => {
289
+ expected_stats. num_already_tombstoned += 1 ;
290
+ }
291
+ // "In database and expunged" should result in tombstoning.
292
+ ( true , false , _) => {
293
+ expected_stats. num_tombstoned += 1 ;
294
+ }
295
+ // In service but already existed
296
+ ( true , true , _) => {
297
+ expected_stats. num_already_exist += 1 ;
298
+ }
299
+ // In service, not in db yet, but not in inventory
300
+ ( false , true , false ) => {
301
+ expected_stats. num_not_in_inventory += 1 ;
302
+ }
303
+ // In service, not in db yet, present in inventory
304
+ ( false , true , true ) => {
305
+ expected_stats. num_inserted += 1 ;
306
+ }
307
+ }
308
+
255
309
// Validate database state
256
310
match ( in_db_before, in_service, in_inventory) {
257
311
// Wasn't in DB, isn't in service: should still not be in db
@@ -298,6 +352,8 @@ mod tests {
298
352
}
299
353
}
300
354
}
355
+
356
+ assert_eq!( result_stats, expected_stats) ;
301
357
} ) ;
302
358
303
359
runtime. block_on ( db. terminate ( ) ) ;
0 commit comments