diff --git a/.github/workflows/smoketest-dotnet-isolated-v4.yml b/.github/workflows/smoketest-dotnet-isolated-v4.yml
index f818ff7ae..474f48448 100644
--- a/.github/workflows/smoketest-dotnet-isolated-v4.yml
+++ b/.github/workflows/smoketest-dotnet-isolated-v4.yml
@@ -19,7 +19,79 @@ jobs:
steps:
- uses: actions/checkout@v2
- # Validation is blocked on https://github.com/Azure/azure-functions-host/issues/7995
- - name: Run V4 .NET Isolated Smoke Test
- run: test/SmokeTests/e2e-test.ps1 -DockerfilePath test/SmokeTests/OOProcSmokeTests/DotNetIsolated/Dockerfile -HttpStartPath api/StartHelloCitiesTyped -NoValidation
+ # Install .NET versions
+ - name: Set up .NET Core 3.1
+ uses: actions/setup-dotnet@v3
+ with:
+ dotnet-version: '3.1.x'
+
+ - name: Set up .NET Core 2.1
+ uses: actions/setup-dotnet@v3
+ with:
+ dotnet-version: '2.1.x'
+
+ - name: Set up .NET Core 6.x
+ uses: actions/setup-dotnet@v3
+ with:
+ dotnet-version: '6.x'
+
+ - name: Set up .NET Core 8.x
+ uses: actions/setup-dotnet@v3
+ with:
+ dotnet-version: '8.x'
+
+ # Install Azurite
+ - name: Set up Node.js (needed for Azurite)
+ uses: actions/setup-node@v3
+ with:
+ node-version: '18.x' # Azurite requires at least Node 18
+
+ - name: Install Azurite
+ run: npm install -g azurite
+
+ - name: Restore WebJobs extension
+ run: dotnet restore $solution
+
+ - name: Build and pack WebJobs extension
+ run: cd ./src/WebJobs.Extensions.DurableTask &&
+ mkdir ./out &&
+ dotnet build -c Release WebJobs.Extensions.DurableTask.csproj --output ./out &&
+ mkdir ~/packages &&
+ dotnet nuget push ./out/Microsoft.Azure.WebJobs.Extensions.DurableTask.*.nupkg --source ~/packages &&
+ dotnet nuget add source ~/packages
+
+ - name: Build .NET Isolated Smoke Test
+ run: cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated &&
+ dotnet restore --verbosity normal &&
+ dotnet build -c Release
+
+ - name: Install core tools
+ run: npm i -g azure-functions-core-tools@4 --unsafe-perm true
+
+ # Run smoke tests
+ # Unlike other smoke tests, the .NET isolated smoke tests run outside of a docker container, but to race conditions
+ # when building the smoke test app in docker, causing the build to fail. This is a temporary workaround until the
+ # root cause is identified and fixed.
+
+ - name: Run smoke tests (Hello Cities)
+ shell: pwsh
+ run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 &
+ cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 &
+ ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/StartHelloCitiesTyped
+
+ - name: Run smoke tests (Process Exit)
+ shell: pwsh
+ run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 &
+ ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/durable_HttpStartProcessExitOrchestrator
+
+ - name: Run smoke tests (Timeout)
+ shell: pwsh
+ run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 &
+ cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 &
+ ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/durable_HttpStartTimeoutOrchestrator
+
+ - name: Run smoke tests (OOM)
shell: pwsh
+ run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 &
+ cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 &
+ ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/durable_HttpStartOOMOrchestrator
\ No newline at end of file
diff --git a/src/WebJobs.Extensions.DurableTask/ContextImplementations/RemoteOrchestratorContext.cs b/src/WebJobs.Extensions.DurableTask/ContextImplementations/RemoteOrchestratorContext.cs
index 9c4d6a02e..86cb21c84 100644
--- a/src/WebJobs.Extensions.DurableTask/ContextImplementations/RemoteOrchestratorContext.cs
+++ b/src/WebJobs.Extensions.DurableTask/ContextImplementations/RemoteOrchestratorContext.cs
@@ -120,6 +120,31 @@ internal void SetResult(string orchestratorResponseJsonText)
this.SetResultInternal(result);
}
+ private void ThrowIfPlatformLevelException(FailureDetails failureDetails)
+ {
+ // Recursively inspect the FailureDetails of the failed orchestrator and throw if a platform-level exception is detected.
+ //
+ // Today, this method only checks for . In the future, we may want to add more cases.
+ // Other known platform-level exceptions, like timeouts or process exists due to `Environment.FailFast`, do not yield
+ // a `OrchestratorExecutionResult` as the isolated invocation is abruptly terminated. Therefore, they don't need to be
+ // handled in this method.
+ // However, our tests reveal that OOMs are, surprisngly, caught and returned as a `OrchestratorExecutionResult`
+ // by the isolated process, and thus need special handling.
+ //
+ // It's unclear if all OOMs are caught by the isolated process (probably not), and also if there are other platform-level
+ // errors that are also caught in the isolated process and returned as a `OrchestratorExecutionResult`. Let's add them
+ // to this method as we encounter them.
+ if (failureDetails.InnerFailure?.IsCausedBy() ?? false)
+ {
+ throw new SessionAbortedException(failureDetails.ErrorMessage);
+ }
+
+ if (failureDetails.InnerFailure != null)
+ {
+ this.ThrowIfPlatformLevelException(failureDetails.InnerFailure);
+ }
+ }
+
private void SetResultInternal(OrchestratorExecutionResult result)
{
// Look for an orchestration completion action to see if we need to grab the output.
@@ -133,6 +158,14 @@ private void SetResultInternal(OrchestratorExecutionResult result)
if (completeAction.OrchestrationStatus == OrchestrationStatus.Failed)
{
+ // If the orchestrator failed due to a platform-level error in the isolated process,
+ // we should re-throw that exception in the host (this process) invocation pipeline,
+ // so the invocation can be retried.
+ if (completeAction.FailureDetails != null)
+ {
+ this.ThrowIfPlatformLevelException(completeAction.FailureDetails);
+ }
+
string message = completeAction switch
{
{ FailureDetails: { } f } => f.ErrorMessage,
diff --git a/src/WebJobs.Extensions.DurableTask/OutOfProcMiddleware.cs b/src/WebJobs.Extensions.DurableTask/OutOfProcMiddleware.cs
index 4d514c670..88a7612dc 100644
--- a/src/WebJobs.Extensions.DurableTask/OutOfProcMiddleware.cs
+++ b/src/WebJobs.Extensions.DurableTask/OutOfProcMiddleware.cs
@@ -138,10 +138,15 @@ await this.LifeCycleNotificationHelper.OrchestratorStartingAsync(
byte[] triggerReturnValueBytes = Convert.FromBase64String(triggerReturnValue);
P.OrchestratorResponse response = P.OrchestratorResponse.Parser.ParseFrom(triggerReturnValueBytes);
+
+ // TrySetResult may throw if a platform-level error is encountered (like an out of memory exception).
context.SetResult(
response.Actions.Select(ProtobufUtils.ToOrchestratorAction),
response.CustomStatus);
+ // Here we throw if the orchestrator completed with an application-level error. When we do this,
+ // the function's result type will be of type `OrchestrationFailureException` which is reserved
+ // for application-level errors that do not need to be re-tried.
context.ThrowIfFailed();
},
#pragma warning restore CS0618 // Type or member is obsolete (not intended for general public use)
@@ -159,6 +164,19 @@ await this.LifeCycleNotificationHelper.OrchestratorStartingAsync(
// Re-throw so we can abort this invocation.
this.HostLifetimeService.OnStopping.ThrowIfCancellationRequested();
}
+
+ // we abort the invocation on "platform level errors" such as:
+ // - a timeout
+ // - an out of memory exception
+ // - a worker process exit
+ if (functionResult.Exception is Host.FunctionTimeoutException
+ || functionResult.Exception?.InnerException is SessionAbortedException // see RemoteOrchestrationContext.TrySetResultInternal for details on OOM-handling
+ || (functionResult.Exception?.InnerException?.GetType().ToString().Contains("WorkerProcessExitException") ?? false))
+ {
+ // TODO: the `WorkerProcessExitException` type is not exposed in our dependencies, it's part of WebJobs.Host.Script.
+ // Should we add that dependency or should it be exposed in WebJobs.Host?
+ throw functionResult.Exception;
+ }
}
catch (Exception hostRuntimeException)
{
@@ -238,8 +256,7 @@ await this.LifeCycleNotificationHelper.OrchestratorFailedAsync(
else
{
// the function failed for some other reason
-
- string exceptionDetails = functionResult.Exception.ToString();
+ string exceptionDetails = functionResult.Exception?.ToString() ?? "Framework-internal message: exception details could not be extracted";
this.TraceHelper.FunctionFailed(
this.Options.HubName,
@@ -258,7 +275,7 @@ await this.LifeCycleNotificationHelper.OrchestratorFailedAsync(
orchestratorResult = OrchestratorExecutionResult.ForFailure(
message: $"Function '{functionName}' failed with an unhandled exception.",
- functionResult.Exception);
+ functionResult.Exception ?? new Exception($"Function '{functionName}' failed with an unknown unhandled exception"));
}
// Send the result of the orchestrator function to the DTFx dispatch pipeline.
diff --git a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/DotNetIsolated.sln b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/DotNetIsolated.sln
new file mode 100644
index 000000000..a93cc6f6e
--- /dev/null
+++ b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/DotNetIsolated.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 17
+VisualStudioVersion = 17.5.002.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DotNetIsolated", "DotNetIsolated.csproj", "{B2DBA49D-9D25-46DB-8968-15D5E83B4060}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Any CPU = Debug|Any CPU
+ Release|Any CPU = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {B2DBA49D-9D25-46DB-8968-15D5E83B4060}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {B2DBA49D-9D25-46DB-8968-15D5E83B4060}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {B2DBA49D-9D25-46DB-8968-15D5E83B4060}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {B2DBA49D-9D25-46DB-8968-15D5E83B4060}.Release|Any CPU.Build.0 = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {0954D7B4-582F-4F85-AE3E-5D503FB07DB1}
+ EndGlobalSection
+EndGlobal
diff --git a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/FaultyOrchestrators.cs b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/FaultyOrchestrators.cs
new file mode 100644
index 000000000..8332fa436
--- /dev/null
+++ b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/FaultyOrchestrators.cs
@@ -0,0 +1,165 @@
+using Microsoft.Azure.Functions.Worker;
+using Microsoft.Azure.Functions.Worker.Http;
+using Microsoft.DurableTask;
+using Microsoft.DurableTask.Client;
+using Microsoft.Extensions.Logging;
+using System;
+
+namespace FaultOrchestrators
+{
+ public static class FaultyOrchestrators
+ {
+ [Function(nameof(OOMOrchestrator))]
+ public static Task OOMOrchestrator(
+ [OrchestrationTrigger] TaskOrchestrationContext context)
+ {
+ // this orchestrator is not deterministic, on purpose.
+ // we use the non-determinism to force an OOM exception on only the first replay
+
+ // check if a file named "replayEvidence" exists in source code directory, create it if it does not.
+ // From experience, this code runs in `/bin/output/`, so we store the file two directories above.
+ // We do this because the /bin/output/ directory gets overridden during the build process, which happens automatically
+ // when `func host start` is re-invoked.
+ string evidenceFile = System.IO.Path.Combine(System.IO.Directory.GetCurrentDirectory(), "..", "..", "replayEvidence");
+ bool isTheFirstReplay = !System.IO.File.Exists(evidenceFile);
+ if (isTheFirstReplay)
+ {
+ System.IO.File.Create(evidenceFile).Close();
+
+ // force the process to run out of memory
+ List data = new List();
+
+ for (int i = 0; i < 10000000; i++)
+ {
+ data.Add(new byte[1024 * 1024 * 1024]);
+ }
+
+ // we expect the code to never reach this statement, it should OOM.
+ // we throw just in case the code does not time out. This should fail the test
+ throw new Exception("this should never be reached");
+ }
+ else {
+ // if it's not the first replay, delete the evidence file and return
+ System.IO.File.Delete(evidenceFile);
+ return Task.CompletedTask;
+ }
+ }
+
+ [Function(nameof(ProcessExitOrchestrator))]
+ public static Task ProcessExitOrchestrator(
+ [OrchestrationTrigger] TaskOrchestrationContext context)
+ {
+ // this orchestrator is not deterministic, on purpose.
+ // we use the non-determinism to force a sudden process exit on only the first replay
+
+ // check if a file named "replayEvidence" exists in source code directory, create it if it does not.
+ // From experience, this code runs in `/bin/output/`, so we store the file two directories above.
+ // We do this because the /bin/output/ directory gets overridden during the build process, which happens automatically
+ // when `func host start` is re-invoked.
+ string evidenceFile = System.IO.Path.Combine(System.IO.Directory.GetCurrentDirectory(), "..", "..", "replayEvidence");
+ bool isTheFirstReplay = !System.IO.File.Exists(evidenceFile);
+ if (isTheFirstReplay)
+ {
+ System.IO.File.Create(evidenceFile).Close();
+
+ // force sudden crash
+ Environment.FailFast("Simulating crash!");
+ throw new Exception("this should never be reached");
+ }
+ else {
+ // if it's not the first replay, delete the evidence file and return
+ System.IO.File.Delete(evidenceFile);
+ return Task.CompletedTask;
+ }
+ }
+
+ [Function(nameof(TimeoutOrchestrator))]
+ public static Task TimeoutOrchestrator(
+ [OrchestrationTrigger] TaskOrchestrationContext context)
+ {
+ // this orchestrator is not deterministic, on purpose.
+ // we use the non-determinism to force a timeout on only the first replay
+
+ // check if a file named "replayEvidence" exists in source code directory, create it if it does not.
+ // From experience, this code runs in `/bin/output/`, so we store the file two directories above.
+ // We do this because the /bin/output/ directory gets overridden during the build process, which happens automatically
+ // when `func host start` is re-invoked.
+ string evidenceFile = System.IO.Path.Combine(System.IO.Directory.GetCurrentDirectory(), "..", "..", "replayEvidence");
+ bool isTheFirstReplay = !System.IO.File.Exists(evidenceFile);
+
+ if (isTheFirstReplay)
+ {
+ System.IO.File.Create(evidenceFile).Close();
+
+ // force the process to timeout after a 1 minute wait
+ System.Threading.Thread.Sleep(TimeSpan.FromMinutes(1));
+
+ // we expect the code to never reach this statement, it should time out.
+ // we throw just in case the code does not time out. This should fail the test
+ throw new Exception("this should never be reached");
+ }
+ else {
+ // if it's not the first replay, delete the evidence file and return
+ System.IO.File.Delete(evidenceFile);
+ return Task.CompletedTask;
+ }
+ }
+
+ [Function("durable_HttpStartOOMOrchestrator")]
+ public static async Task HttpStartOOMOrchestrator(
+ [HttpTrigger(AuthorizationLevel.Anonymous, "get", "post")] HttpRequestData req,
+ [DurableClient] DurableTaskClient client,
+ FunctionContext executionContext)
+ {
+ ILogger logger = executionContext.GetLogger("durable_HttpStartOOMOrchestrator");
+
+ // Function input comes from the request content.
+ string instanceId = await client.ScheduleNewOrchestrationInstanceAsync(
+ nameof(OOMOrchestrator));
+
+ logger.LogInformation("Started orchestration with ID = '{instanceId}'.", instanceId);
+
+ // Returns an HTTP 202 response with an instance management payload.
+ // See https://learn.microsoft.com/azure/azure-functions/durable/durable-functions-http-api#start-orchestration
+ return await client.CreateCheckStatusResponseAsync(req, instanceId);
+ }
+
+ [Function("durable_HttpStartProcessExitOrchestrator")]
+ public static async Task HttpStartProcessExitOrchestrator(
+ [HttpTrigger(AuthorizationLevel.Anonymous, "get", "post")] HttpRequestData req,
+ [DurableClient] DurableTaskClient client,
+ FunctionContext executionContext)
+ {
+ ILogger logger = executionContext.GetLogger("durable_HttpStartProcessExitOrchestrator");
+
+ // Function input comes from the request content.
+ string instanceId = await client.ScheduleNewOrchestrationInstanceAsync(
+ nameof(ProcessExitOrchestrator));
+
+ logger.LogInformation("Started orchestration with ID = '{instanceId}'.", instanceId);
+
+ // Returns an HTTP 202 response with an instance management payload.
+ // See https://learn.microsoft.com/azure/azure-functions/durable/durable-functions-http-api#start-orchestration
+ return await client.CreateCheckStatusResponseAsync(req, instanceId);
+ }
+
+ [Function("durable_HttpStartTimeoutOrchestrator")]
+ public static async Task HttpStartTimeoutOrchestrator(
+ [HttpTrigger(AuthorizationLevel.Anonymous, "get", "post")] HttpRequestData req,
+ [DurableClient] DurableTaskClient client,
+ FunctionContext executionContext)
+ {
+ ILogger logger = executionContext.GetLogger("durable_HttpStartTimeoutOrchestrator");
+
+ // Function input comes from the request content.
+ string instanceId = await client.ScheduleNewOrchestrationInstanceAsync(
+ nameof(TimeoutOrchestrator));
+
+ logger.LogInformation("Started orchestration with ID = '{instanceId}'.", instanceId);
+
+ // Returns an HTTP 202 response with an instance management payload.
+ // See https://learn.microsoft.com/azure/azure-functions/durable/durable-functions-http-api#start-orchestration
+ return await client.CreateCheckStatusResponseAsync(req, instanceId);
+ }
+ }
+}
diff --git a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/host.json b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/host.json
index 278b52cde..0ec9c6a89 100644
--- a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/host.json
+++ b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/host.json
@@ -7,5 +7,14 @@
"excludedTypes": "Request"
}
}
- }
+ },
+ "extensions": {
+ "durableTask": {
+ "storageProvider": {
+ "maxQueuePollingInterval": "00:00:01",
+ "controlQueueVisibilityTimeout": "00:01:00"
+ }
+ }
+ },
+ "functionTimeout": "00:00:30"
}
\ No newline at end of file
diff --git a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1
new file mode 100644
index 000000000..79d679b80
--- /dev/null
+++ b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1
@@ -0,0 +1,119 @@
+# This is a simple test runner to validate the .NET isolated smoke tests.
+# It supercedes the usual e2e-tests.ps1 script for the .NET isolated scenario because building the snmoke test app
+# on the docker image is unreliable. For more details, see: https://github.com/Azure/azure-functions-host/issues/7995
+
+# This script is designed specifically to test cases where the isolated worker process experiences a platform failure:
+# timeouts, OOMs, etc. For that reason, it is careful to check that the Functions Host is running and healthy at regular
+# intervals. This makes these tests run more slowly than other test categories.
+
+param(
+ [Parameter(Mandatory=$true)]
+ [string]$HttpStartPath
+)
+
+$retryCount = 0;
+$statusUrl = $null;
+$success = $false;
+$haveManuallyRestartedHost = $false;
+
+Do {
+ $testIsRunning = $true;
+
+ # Start the functions host if it's not running already.
+ # Then give it up to 1 minute to start up.
+ # This is a long wait, but from experience the CI can be slow to start up the host, especially after a platform-error.
+ $isFunctionsHostRunning = (Get-Process -Name func -ErrorAction SilentlyContinue)
+ if ($isFunctionsHostRunning -eq $null) {
+ Write-Host "Starting the Functions host..." -ForegroundColor Yellow
+
+ # The '&' operator is used to run the command in the background
+ cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 &
+ Write-Host "Waiting for the Functions host to start up..." -ForegroundColor Yellow
+ Start-Sleep -Seconds 60
+ }
+
+
+ try {
+ # Make sure the Functions runtime is up and running
+ $pingUrl = "http://localhost:7071/admin/host/ping"
+ Write-Host "Pinging app at $pingUrl to ensure the host is healthy" -ForegroundColor Yellow
+ Invoke-RestMethod -Method Post -Uri "http://localhost:7071/admin/host/ping"
+ Write-Host "Host is healthy!" -ForegroundColor Green
+
+ # Start orchestrator if it hasn't been started yet
+ if ($statusUrl -eq $null){
+ $startOrchestrationUri = "http://localhost:7071/$HttpStartPath"
+ Write-Host "Starting a new orchestration instance via POST to $startOrchestrationUri..." -ForegroundColor Yellow
+
+ $result = Invoke-RestMethod -Method Post -Uri $startOrchestrationUri
+ Write-Host "Started orchestration with instance ID '$($result.id)'!" -ForegroundColor Yellow
+ Write-Host "Waiting for orchestration to complete..." -ForegroundColor Yellow
+
+ $statusUrl = $result.statusQueryGetUri
+
+ # sleep for a bit to give the orchestrator a chance to start,
+ # then loop once more in case the orchestrator ran quickly, made the host unhealthy,
+ # and the functions host needs to be restarted
+ Start-Sleep -Seconds 5
+ continue;
+ }
+
+ # Check the orchestrator status
+ $result = Invoke-RestMethod -Method Get -Uri $statusUrl
+ $runtimeStatus = $result.runtimeStatus
+ Write-Host "Orchestration is $runtimeStatus" -ForegroundColor Yellow
+ Write-Host $result
+
+ if ($result.runtimeStatus -eq "Completed") {
+ $success = $true
+ $testIsRunning = $false
+ break
+ }
+ if ($result.runtimeStatus -eq "Failed") {
+ $success = $false
+ $testIsRunning = $false
+ break
+ }
+
+ # If the orchestrator did not complete yet, wait for a bit before checking again
+ Start-Sleep -Seconds 2
+ $retryCount = $retryCount + 1
+
+ } catch {
+ # we expect to enter this 'catch' block if any of our HTTP requests to the host fail.
+ # Some failures observed during development include:
+ # - The host is not running/was restarting/was killed
+ # - The host is running but not healthy (OOMs may cause this), so it needs to be forcibly restarted
+ Write-Host "An error occurred:" -ForegroundColor Red
+ Write-Host $_ -ForegroundColor Red
+
+ # When testing for platform errors, we want to make sure the Functions host is healthy and ready to take requests.
+ # The Host can get into bad states (for example, in an OOM-inducing test) where it does not self-heal.
+ # For these cases, we manually restart the host to ensure it is in a good state. We only do this once per test.
+ if ($haveManuallyRestartedHost -eq $false) {
+
+ # We stop the host process and wait for a bit before checking if it is running again.
+ Write-Host "Restarting the Functions host..." -ForegroundColor Yellow
+ Stop-Process -Name "func" -Force
+ Start-Sleep -Seconds 5
+
+ # Log whether the process kill succeeded
+ $haveManuallyRestartedHost = $true
+ $isFunctionsHostRunning = ((Get-Process -Name func -ErrorAction SilentlyContinue) -eq $null)
+ Write-Host "Host process killed: $isFunctionsHostRunning" -ForegroundColor Yellow
+
+ # the beginning of the loop will restart the host
+ continue
+ }
+
+ # Rethrow the original exception
+ throw
+ }
+
+} while (($testIsRunning -eq $true) -and ($retryCount -lt 65))
+
+if ($success -eq $false) {
+ throw "Orchestration failed or did not compete in time! :("
+}
+
+Write-Host "Success!" -ForegroundColor Green
\ No newline at end of file