Skip to content

Commit

Permalink
[7.4.0] Make it possible to get a disk/remote cache hit for a test fo…
Browse files Browse the repository at this point in the history
…llowing … (#23673)

…a failure of the same test.

A source edit might have occurred in the interim, putting the test back
into a passing state that has been previously stored in the cache. The
current logic disables disk/remote caching when rerunning a test
following a failure, which causes spurious reexecutions (see issue
#11057).

Although this code has been around for a very long time, it seems that
it was originally added to ensure that a flaky test failure doesn't
become sticky. However, for this purpose it suffices to bust the local
cache, as a disk/remote cache should never store failures (if it did,
this would not be a complete fix, and we'd have much bigger problems
elsewhere). Note that Bazel itself never uploads local failures to a
disk/remote cache, but can't control whether a remote executor does.

The fix is to split the `TestRunnerAction.canBeCached` logic into two
versions: one that takes into account a previous test result (for the
local cache check) and another that doesn't (to determine whether we
should attempt to hit the disk/remote cache).

Fixes #11057.

PiperOrigin-RevId: 661319835
Change-Id: I9248cbfa31dd135b2bda971e48bb17d7f828889c 
(cherry picked from commit e9709b7)

Fixes #23259

Co-authored-by: Googler <[email protected]>
  • Loading branch information
fmeum and tjgq authored Sep 19, 2024
1 parent f367f81 commit 56fedfd
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -501,11 +501,46 @@ protected void computeKey(
fp.addStringMap(getExecutionInfo());
}

/**
* Returns whether the test should be executed unconditionally based on the test configuration,
* the test properties, and the previous test result when known.
*/
@Override
public boolean executeUnconditionally() {
// Note: isVolatile must return true if executeUnconditionally can ever return true
// for this instance.
return computeExecuteUnconditionallyFromTestStatus();
return executeUnconditionally(
testConfiguration.cacheTestResults(),
this::maybeReadCacheStatus,
testProperties.isExternal(),
executionSettings.getTotalRuns());
}

@VisibleForTesting
static boolean executeUnconditionally(
TriState cacheTestResults,
Supplier<Optional<TestResultData>> prevStatus, // lazy to avoid I/O if possible
boolean isExternal,
int runsPerTest) {
if (!shouldCacheResult(cacheTestResults, isExternal, runsPerTest)) {
return true;
}
Optional<TestResultData> status = prevStatus.get();
if (status.isEmpty()) {
// Execute unconditionally if a previous test result is not available.
return true;
}
if (!status.get().getCachable()) {
// Execute unconditionally if the previous test result was marked non-cacheable.
// It seems that this can only happen with --experimental_cancel_concurrent_tests.
return true;
}
if (cacheTestResults == TriState.AUTO && !status.get().getTestPassed()) {
// Execute unconditionally if the previous test result was a failure, as otherwise we can
// get stuck forever in the event of a flaky failure.
return true;
}
return false;
}

@Override
Expand Down Expand Up @@ -553,72 +588,41 @@ TestResultData readCacheStatus() throws IOException {
}
}

private boolean computeExecuteUnconditionallyFromTestStatus() {
CacheableTest cacheStatus =
canBeCached(
testConfiguration.cacheTestResults(),
this::maybeReadCacheStatus,
testProperties.isExternal(),
executionSettings.getTotalRuns());
switch (cacheStatus) {
case NO_STATUS_ON_DISK:
// execute unconditionally if no status available on disk
case NO:
return true;
case YES:
return false;
}
throw new IllegalStateException("Unreachable. Bad cache status: " + cacheStatus);
/**
* Returns whether a cached result should be accepted from a disk/remote cache, depending on the
* test configuration and test properties.
*
* <p>This should *not* be used to determine whether to accept a cached result from the action
* cache. Call {@link #executeUnconditionally} instead.
*
* <p>Unlike {@link #executeUnconditionally}, this decision does not depend on the previous test
* result, as otherwise we wouldn't attempt to hit the disk/remote cache when the test has changed
* from a failing to a passing state since the last execution without causing the action to be
* reanalyzed (for example, by editing a source file into a passing state that has been previously
* seen).
*
* <p>We're not concerned about a flaky failure becoming sticky in the disk/remote cache, because
* it's impossible to solve this problem generally. In any case, this can only occur with a remote
* execution implementation that caches failures, as we never upload them to a disk/remote cache
* ourselves.
*/
public boolean shouldCacheResult() {
return shouldCacheResult(
testConfiguration.cacheTestResults(),
testProperties.isExternal(),
executionSettings.getTotalRuns());
}

@VisibleForTesting
static CacheableTest canBeCached(
TriState cacheTestResults,
Supplier<Optional<TestResultData>>
prevStatus, // Lazy evaluation to avoid a disk read if possible.
boolean isExternal,
int runsPerTest) {
static boolean shouldCacheResult(
TriState cacheTestResults, boolean isExternal, int runsPerTest) {
if (isExternal || cacheTestResults == TriState.NO) {
return CacheableTest.NO;
return false;
}
if (cacheTestResults == TriState.AUTO && runsPerTest > 1) {
return CacheableTest.NO;
}
Optional<TestResultData> status = prevStatus.get();
// unable to read status from disk
if (status.isEmpty()) {
return CacheableTest.NO_STATUS_ON_DISK;
}
if (!status.get().getCachable()) {
return CacheableTest.NO;
}
if (cacheTestResults == TriState.AUTO && !status.get().getTestPassed()) {
return CacheableTest.NO;
}
return CacheableTest.YES;
}

/**
* Returns whether caching has been deemed safe by looking at the previous test run (for local
* caching). If the previous run is not present or cached status was retrieved unsuccessfully,
* return "true" here, as remote execution caching should be safe.
*/
public boolean shouldCacheResult() {
CacheableTest cacheStatus =
canBeCached(
testConfiguration.cacheTestResults(),
this::maybeReadCacheStatus,
testProperties.isExternal(),
executionSettings.getTotalRuns());
switch (cacheStatus) {
// optimistically cache results if status unavailable
case YES:
case NO_STATUS_ON_DISK:
return true;
case NO:
return false;
return false;
}
throw new IllegalStateException("Unreachable. Bad cache status: " + cacheStatus);
return true;
}

@Override
Expand Down Expand Up @@ -1298,11 +1302,4 @@ public boolean isEmpty() {
return false;
}
}

@VisibleForTesting
enum CacheableTest {
YES,
NO,
NO_STATUS_ON_DISK
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ public TestRunnerSpawn createTestRunnerSpawn(
Map<String, String> executionInfo =
new TreeMap<>(action.getTestProperties().getExecutionInfo());
if (!action.shouldCacheResult()) {
// TODO(tjgq): We want to reject a previously cached result, but not prevent the result of the
// current execution from being uploaded. We should introduce a separate execution requirement
// for this.
executionInfo.put(ExecutionRequirements.NO_CACHE, "");
}
executionInfo.put(ExecutionRequirements.TIMEOUT, "" + getTimeout(action).getSeconds());
Expand Down
38 changes: 37 additions & 1 deletion src/test/shell/bazel/disk_cache_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,40 @@ EOF

}

run_suite "local action cache test"
function test_cache_hit_on_source_edit_after_test_failure() {
# Regression test for https://github.com/bazelbuild/bazel/issues/11057.

local -r CACHE_DIR="${TEST_TMPDIR}/cache"
rm -rf "$CACHE_DIR"

mkdir -p a
cat > a/BUILD <<'EOF'
sh_test(
name = "test",
srcs = ["test.sh"],
)
EOF
echo "exit 0" > a/test.sh
chmod +x a/test.sh

# Populate the cache with a passing test.
bazel test --disk_cache="$CACHE_DIR" //a:test >& $TEST_log \
|| fail "Expected test to pass"

# Turn the test into a failing one.
echo "exit 1" > a/test.sh

# Check that the test fails.
bazel test --disk_cache="$CACHE_DIR" //a:test >& $TEST_log \
&& fail "Expected test to fail"

# Turn the test into a passing one again.
echo "exit 0" > a/test.sh

# Check that we hit the previously populated cache.
bazel test --disk_cache="$CACHE_DIR" //a:test >& $TEST_log \
|| fail "Expected test to pass"
expect_log "(cached) PASSED"
}

run_suite "disk cache test"

0 comments on commit 56fedfd

Please sign in to comment.