Skip to content

Commit

Permalink
validator: fix potential infinite loop
Browse files Browse the repository at this point in the history
  • Loading branch information
davidpanderson committed Sep 20, 2024
1 parent 29af35c commit 266c050
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 10 deletions.
13 changes: 10 additions & 3 deletions sched/script_validator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,11 @@ void validate_handler_usage() {
);
}

// see validate_util2.h for return values
// run script to check a result
// if script exits with VAL_RESULT_TRANSIENT_ERROR, return that;
// the WU will be validated again after a delay.
//
// any other nonzero return means the result is not valid
//
int init_result(RESULT& result, void*&) {
if (init_script.empty()) {
Expand Down Expand Up @@ -144,10 +148,13 @@ int init_result(RESULT& result, void*&) {
int s = WEXITSTATUS(retval);
if (!s) return 0;
if (s == VAL_RESULT_TRANSIENT_ERROR) {
log_messages.printf(MSG_NORMAL,
"init script return transient error"
);
return VAL_RESULT_TRANSIENT_ERROR;
}
log_messages.printf(MSG_CRITICAL,
"init script %s failed: %d\n", cmd, s
log_messages.printf(MSG_NORMAL,
"init script %s returned: %d\n", cmd, s
);
return -1;
}
Expand Down
17 changes: 10 additions & 7 deletions sched/validate_util2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ using std::vector;
// (i.e. there was a broken NFS mount).
// Should call this again after a while.
//
// always return zero
//
int check_set(
vector<RESULT>& results, WORKUNIT& wu,
DB_ID_TYPE& canonicalid, double&, bool& retry
Expand All @@ -74,6 +76,9 @@ int check_set(
had_error.resize(n);

// Initialize results
// For each one we potentially allocate data,
// so always exit via goto cleanup:
// to free this mem

for (i=0; i<n; i++) {
data[i] = NULL;
Expand Down Expand Up @@ -134,29 +139,29 @@ int check_set(

if (good_results < wu.min_quorum) goto cleanup;

// Compare results
// for each result, count how many it matches (including itself)
// If this is at least min_valid, it's the canonical result

for (i=0; i<n; i++) {
if (had_error[i]) continue;
vector<bool> matches;
matches.resize(n);
neq = 0;
for (j=0; j!=n; j++) {
for (j=0; j<n; j++) {
if (had_error[j]) continue;
bool match = false;
if (i == j) {
++neq;
matches[j] = true;
continue;
}
bool match = false;
retval = compare_results(
results[i], data[i], results[j], data[j], match
);
switch (retval) {
case ERR_OPENDIR:
case VAL_RESULT_TRANSIENT_ERROR:
retry = true;
retval = 0;
goto cleanup;
case 0:
if (match) {
Expand All @@ -169,7 +174,6 @@ int check_set(
"check_set(): compare_results([RESULT#%lu %s], [RESULT#%lu %s]) failed\n",
results[i].id, results[i].name, results[j].id, results[j].name
);
goto cleanup;
}
}
if (neq >= min_valid) {
Expand All @@ -185,12 +189,11 @@ int check_set(
}
}

retval = 0;
cleanup:
for (i=0; i<n; i++) {
cleanup_result(results[i], data[i]);
}
return retval;
return 0;
}

// a straggler instance has arrived after the WU is already validated.
Expand Down

0 comments on commit 266c050

Please sign in to comment.