diff --git a/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java b/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java index 05bf13996af656..10e4258f4b482e 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java +++ b/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java @@ -289,17 +289,26 @@ public ListenableFuture prefetchInputs() Priority.MEDIUM), BulkTransferException.class, (BulkTransferException e) -> { - if (BulkTransferException.allCausedByCacheNotFoundException(e)) { - var code = - (executionOptions.useNewExitCodeForLostInputs - || executionOptions.remoteRetryOnCacheEviction > 0) - ? Code.REMOTE_CACHE_EVICTED - : Code.REMOTE_CACHE_FAILED; + if (executionOptions.useNewExitCodeForLostInputs + || executionOptions.remoteRetryOnTransientCacheError > 0) { + var message = + BulkTransferException.allCausedByCacheNotFoundException(e) + ? "Failed to fetch blobs because they do not exist remotely." + : "Failed to fetch blobs because of a remote cache error."; + throw new EnvironmentalExecException( + e, + FailureDetail.newBuilder() + .setMessage(message) + .setSpawn( + FailureDetails.Spawn.newBuilder().setCode(Code.REMOTE_CACHE_EVICTED)) + .build()); + } else if (BulkTransferException.allCausedByCacheNotFoundException(e)) { throw new EnvironmentalExecException( e, FailureDetail.newBuilder() .setMessage("Failed to fetch blobs because they do not exist remotely.") - .setSpawn(FailureDetails.Spawn.newBuilder().setCode(code)) + .setSpawn( + FailureDetails.Spawn.newBuilder().setCode(Code.REMOTE_CACHE_FAILED)) .build()); } else { throw e; diff --git a/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java b/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java index 6210747c99286a..bb4fdc52c06ffd 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java +++ b/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java @@ -511,24 +511,28 @@ public boolean usingLocalTestJobs() { effectTags = {OptionEffectTag.UNKNOWN}, metadataTags = {OptionMetadataTag.INCOMPATIBLE_CHANGE}, help = - "If set to true, Bazel will use new exit code 39 instead of 34 if remote cache evicts" - + " blobs during the build.") + "If set to true, Bazel will use new exit code 39 instead of 34 if remote cache" + + "errors, including cache evictions, cause the build to fail.") public boolean useNewExitCodeForLostInputs; @Option( + // TODO: when this flag is moved to non-experimental, rename it to a more general name + // to reflect the new logic - it's not only about cache evictions. name = "experimental_remote_cache_eviction_retries", defaultValue = "0", documentationCategory = OptionDocumentationCategory.REMOTE, effectTags = {OptionEffectTag.EXECUTION}, help = - "The maximum number of attempts to retry if the build encountered remote cache eviction" - + " error. A non-zero value will implicitly set" + "The maximum number of attempts to retry if the build encountered a transient remote" + + " cache error that would otherwise fail the build. Applies for example when" + + " artifacts are evicted from the remote cache, or in certain cache failure" + + " conditions. A non-zero value will implicitly set" + " --incompatible_remote_use_new_exit_code_for_lost_inputs to true. A new invocation" + " id will be generated for each attempt. If you generate invocation id and provide" + " it to Bazel with --invocation_id, you should not use this flag. Instead, set flag" + " --incompatible_remote_use_new_exit_code_for_lost_inputs and check for the exit" + " code 39.") - public int remoteRetryOnCacheEviction; + public int remoteRetryOnTransientCacheError; /** An enum for specifying different formats of test output. */ public enum TestOutputFormat { diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java index 934cdff7382f15..0d7d6c7110e8d3 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java @@ -616,7 +616,7 @@ private SpawnResult handleError( } else if (remoteCacheFailed) { status = Status.REMOTE_CACHE_FAILED; if (executionOptions.useNewExitCodeForLostInputs - || executionOptions.remoteRetryOnCacheEviction > 0) { + || executionOptions.remoteRetryOnTransientCacheError > 0) { detailedCode = FailureDetails.Spawn.Code.REMOTE_CACHE_EVICTED; } else { detailedCode = FailureDetails.Spawn.Code.REMOTE_CACHE_FAILED; diff --git a/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java b/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java index b20a1bb41bba6c..509e1524393000 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java +++ b/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java @@ -254,7 +254,7 @@ public BlazeCommandResult exec( attemptedCommandIds, lastResult); break; - } catch (RemoteCacheEvictedException e) { + } catch (RemoteCacheTransientErrorException e) { attemptedCommandIds.add(e.getCommandId()); lastResult = e.getResult(); } @@ -308,7 +308,7 @@ private BlazeCommandResult execExclusively( List commandExtensions, Set attemptedCommandIds, @Nullable BlazeCommandResult lastResult) - throws RemoteCacheEvictedException { + throws RemoteCacheTransientErrorException { // Record the start time for the profiler. Do not put anything before this! long execStartTimeNanos = runtime.getClock().nanoTime(); @@ -344,7 +344,7 @@ private BlazeCommandResult execExclusively( env.getCommandId())); return Preconditions.checkNotNull(lastResult); } else { - outErr.printErrLn("Found remote cache eviction error, retrying the build..."); + outErr.printErrLn("Found transient remote cache error, retrying the build..."); } } @@ -689,13 +689,13 @@ private BlazeCommandResult execExclusively( if (newResult.getExitCode().equals(ExitCode.REMOTE_CACHE_EVICTED)) { var executionOptions = Preconditions.checkNotNull(options.getOptions(ExecutionOptions.class)); - if (attemptedCommandIds.size() < executionOptions.remoteRetryOnCacheEviction) { - throw new RemoteCacheEvictedException(env.getCommandId(), newResult); + if (attemptedCommandIds.size() < executionOptions.remoteRetryOnTransientCacheError) { + throw new RemoteCacheTransientErrorException(env.getCommandId(), newResult); } } return newResult; - } catch (RemoteCacheEvictedException e) { + } catch (RemoteCacheTransientErrorException e) { throw e; } catch (Throwable e) { logger.atSevere().withCause(e).log("Shutting down due to exception"); @@ -730,11 +730,11 @@ private BlazeCommandResult execExclusively( } } - private static class RemoteCacheEvictedException extends IOException { + private static class RemoteCacheTransientErrorException extends IOException { private final UUID commandId; private final BlazeCommandResult result; - private RemoteCacheEvictedException(UUID commandId, BlazeCommandResult result) { + private RemoteCacheTransientErrorException(UUID commandId, BlazeCommandResult result) { this.commandId = commandId; this.result = result; } diff --git a/src/main/protobuf/failure_details.proto b/src/main/protobuf/failure_details.proto index 1e85251ba12a3d..3886f627521f90 100644 --- a/src/main/protobuf/failure_details.proto +++ b/src/main/protobuf/failure_details.proto @@ -226,6 +226,11 @@ message Spawn { // refactored to prohibit undetailed failures UNSPECIFIED_EXECUTION_FAILURE = 12 [(metadata) = { exit_code: 1 }]; FORBIDDEN_INPUT = 13 [(metadata) = { exit_code: 1 }]; + // This also includes other remote cache errors, not just evictions, + // if --incompatible_remote_use_new_exit_code_for_lost_inputs is set. + // TODO: Rename it to a more general name when + // --experimental_remote_cache_eviction_retries is moved to + // non-experimental. REMOTE_CACHE_EVICTED = 14 [(metadata) = { exit_code: 39 }]; SPAWN_LOG_IO_EXCEPTION = 15 [(metadata) = { exit_code: 36 }]; } diff --git a/src/test/shell/bazel/remote/build_without_the_bytes_test.sh b/src/test/shell/bazel/remote/build_without_the_bytes_test.sh index 45dd0007c60151..d800d7a50c6b85 100755 --- a/src/test/shell/bazel/remote/build_without_the_bytes_test.sh +++ b/src/test/shell/bazel/remote/build_without_the_bytes_test.sh @@ -2022,7 +2022,7 @@ EOF //a:bar >& $TEST_log || fail "Failed to build" expect_log 'Failed to fetch blobs because they do not exist remotely.' - expect_log "Found remote cache eviction error, retrying the build..." + expect_log "Found transient remote cache error, retrying the build..." local invocation_ids=$(grep "Invocation ID:" $TEST_log) local first_id=$(echo "$invocation_ids" | head -n 1)