diff --git a/src/workerd/api/global-scope.c++ b/src/workerd/api/global-scope.c++ index 402a6456430..ba4cb0c9f8e 100644 --- a/src/workerd/api/global-scope.c++ +++ b/src/workerd/api/global-scope.c++ @@ -565,12 +565,20 @@ kj::Promise ServiceWorkerGlobalScope::runAlarm(kj: // We only want to retry against limits if it's a user error. By default let's check if the // output gate is broken. - auto shouldRetryCountsAgainstLimits = !context.isOutputGateBroken(); - - // We want to alert if we aren't going to count this alarm retry against limits - if (log && context.isOutputGateBroken()) { + // + // Special case: when a user throws inside blockConcurrencyWhile after starting a storage + // operation, the output gate may also appear broken as a secondary side-effect. Treat it + // as a user error so retries count against the limit and the alarm is eventually deleted. + auto isInputGateBrokenByUser = jsg::isExceptionFromInputGateBroken(description); + auto shouldRetryCountsAgainstLimits = + !context.isOutputGateBroken() || isInputGateBrokenByUser; + + // We want to alert if we aren't going to count this alarm retry against limits. + // Skip logging when the output gate broke as a secondary effect of a user throw inside + // blockConcurrencyWhile: that is expected behaviour and already counted as a user error. + if (!isInputGateBrokenByUser && log && context.isOutputGateBroken()) { LOG_NOSENTRY(ERROR, "output lock broke during alarm execution", actorId, description); - } else if (context.isOutputGateBroken()) { + } else if (!isInputGateBrokenByUser && context.isOutputGateBroken()) { if (isUserError) { // The handler failed because the user overloaded the object. It's their fault, we'll not // retry forever. @@ -605,13 +613,20 @@ kj::Promise ServiceWorkerGlobalScope::runAlarm(kj: } // We only want to retry against limits if it's a user error. By default let's assume it's our // fault. - auto shouldRetryCountsAgainstLimits = false; + // + // Special case: when a user throws inside blockConcurrencyWhile after starting a storage + // operation, the output gate also appears broken as a secondary side-effect. Treat it + // as a user error so retries count against the limit and the alarm is eventually deleted. + auto isInputGateBrokenByUser = jsg::isExceptionFromInputGateBroken(e.getDescription()); + auto shouldRetryCountsAgainstLimits = isInputGateBrokenByUser; if (auto desc = e.getDescription(); !jsg::isTunneledException(desc) && !jsg::isDoNotLogException(desc)) { - if (isInterestingException(e)) { - LOG_EXCEPTION("alarmOutputLock"_kj, e); - } else { - LOG_NOSENTRY(ERROR, "output lock broke after executing alarm", actorId, e); + if (!isInputGateBrokenByUser) { + if (isInterestingException(e)) { + LOG_EXCEPTION("alarmOutputLock"_kj, e); + } else { + LOG_NOSENTRY(ERROR, "output lock broke after executing alarm", actorId, e); + } } } else { if (e.getDetail(jsg::EXCEPTION_IS_USER_ERROR) != kj::none) { diff --git a/src/workerd/jsg/exception.c++ b/src/workerd/jsg/exception.c++ index af5005985f6..181171cb3c4 100644 --- a/src/workerd/jsg/exception.c++ +++ b/src/workerd/jsg/exception.c++ @@ -165,4 +165,10 @@ kj::String annotateBroken(kj::StringPtr internalMessage, kj::StringPtr brokennes tunneledInfo.message); } +bool isExceptionFromInputGateBroken(kj::StringPtr description) { + // annotateBroken() produces "broken.inputGateBroken; {message}", optionally prefixed with + // "remote." when crossing RPC boundaries. Strip the remote prefix first, then check the tag. + return stripRemoteExceptionPrefix(description).startsWith("broken.inputGateBroken; "_kj); +} + } // namespace workerd::jsg diff --git a/src/workerd/jsg/exception.h b/src/workerd/jsg/exception.h index 3aa694df9fb..2aa977d785d 100644 --- a/src/workerd/jsg/exception.h +++ b/src/workerd/jsg/exception.h @@ -153,6 +153,10 @@ TunneledErrorType tunneledErrorType(kj::StringPtr internalMessage); // Annotate an internal message with the corresponding brokenness reason. kj::String annotateBroken(kj::StringPtr internalMessage, kj::StringPtr brokennessReason); +// Returns true if the exception description originated from user code throwing inside +// blockConcurrencyWhile. Handles any leading "remote." prefixes transparently. +bool isExceptionFromInputGateBroken(kj::StringPtr description); + constexpr kj::Exception::DetailTypeId EXCEPTION_IS_USER_ERROR = 0x82aff7d637c30e47ull; struct ExceptionToJsOptions {