From 724ebea7d4071e60b31397fe0236dd5274454786 Mon Sep 17 00:00:00 2001 From: David Justo Date: Tue, 1 Oct 2024 11:44:13 -0700 Subject: [PATCH] checkpoint --- .../smoketest-dotnet-isolated-v4.yml | 83 +++++++++++-- Directory.Build.targets | 37 ++++++ WebJobs.Extensions.DurableTask.sln | 3 +- eng/ci/official-build.yml | 2 + eng/templates/build.yml | 1 + release_notes.md | 8 +- .../AzureStorageDurabilityProviderFactory.cs | 1 + .../Options/AzureStorageOptions.cs | 14 +++ .../WebJobs.Extensions.DurableTask.csproj | 4 +- .../AssemblyInfo.cs | 2 +- .../Worker.Extensions.DurableTask.csproj | 6 +- .../DotNetIsolated/Dockerfile | 5 + .../DotNetIsolated/FaultyOrchestrators.cs | 59 ++++----- .../OOProcSmokeTests/DotNetIsolated/host.json | 9 ++ .../DotNetIsolated/run-smoke-tests.ps1 | 115 ++++++++++++++++++ test/SmokeTests/e2e-test.ps1 | 19 +-- tools/triageHelper/function_app.py | 2 +- 17 files changed, 307 insertions(+), 63 deletions(-) create mode 100644 Directory.Build.targets create mode 100644 test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 diff --git a/.github/workflows/smoketest-dotnet-isolated-v4.yml b/.github/workflows/smoketest-dotnet-isolated-v4.yml index cf29e4e63..474f48448 100644 --- a/.github/workflows/smoketest-dotnet-isolated-v4.yml +++ b/.github/workflows/smoketest-dotnet-isolated-v4.yml @@ -19,22 +19,79 @@ jobs: steps: - uses: actions/checkout@v2 - # Validation is blocked on https://github.com/Azure/azure-functions-host/issues/7995 - - name: Run V4 .NET Isolated Smoke Test - run: test/SmokeTests/e2e-test.ps1 -DockerfilePath test/SmokeTests/OOProcSmokeTests/DotNetIsolated/Dockerfile -HttpStartPath api/StartHelloCitiesTyped -NoValidation + # Install .NET versions + - name: Set up .NET Core 3.1 + uses: actions/setup-dotnet@v3 + with: + dotnet-version: '3.1.x' + + - name: Set up .NET Core 2.1 + uses: actions/setup-dotnet@v3 + with: + dotnet-version: '2.1.x' + + - name: Set up .NET Core 6.x + uses: actions/setup-dotnet@v3 + with: + dotnet-version: '6.x' + + - name: Set up .NET Core 8.x + uses: actions/setup-dotnet@v3 + with: + dotnet-version: '8.x' + + # Install Azurite + - name: Set up Node.js (needed for Azurite) + uses: actions/setup-node@v3 + with: + node-version: '18.x' # Azurite requires at least Node 18 + + - name: Install Azurite + run: npm install -g azurite + + - name: Restore WebJobs extension + run: dotnet restore $solution + + - name: Build and pack WebJobs extension + run: cd ./src/WebJobs.Extensions.DurableTask && + mkdir ./out && + dotnet build -c Release WebJobs.Extensions.DurableTask.csproj --output ./out && + mkdir ~/packages && + dotnet nuget push ./out/Microsoft.Azure.WebJobs.Extensions.DurableTask.*.nupkg --source ~/packages && + dotnet nuget add source ~/packages + + - name: Build .NET Isolated Smoke Test + run: cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && + dotnet restore --verbosity normal && + dotnet build -c Release + + - name: Install core tools + run: npm i -g azure-functions-core-tools@4 --unsafe-perm true + + # Run smoke tests + # Unlike other smoke tests, the .NET isolated smoke tests run outside of a docker container, but to race conditions + # when building the smoke test app in docker, causing the build to fail. This is a temporary workaround until the + # root cause is identified and fixed. + + - name: Run smoke tests (Hello Cities) shell: pwsh + run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 & + cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 & + ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/StartHelloCitiesTyped - # Test that OOM errors are recoverable - - name: Run V4 .NET OOM Test - run: test/SmokeTests/e2e-test.ps1 -DockerfilePath test/SmokeTests/OOProcSmokeTests/DotNetIsolated/Dockerfile -HttpStartPath api/durable_HttpStartOOMOrchestrator -NoValidation + - name: Run smoke tests (Process Exit) shell: pwsh + run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 & + ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/durable_HttpStartProcessExitOrchestrator - # Test that FailFast errors are recoverable - - name: Run V4 .NET FailFast Test - run: test/SmokeTests/e2e-test.ps1 -DockerfilePath test/SmokeTests/OOProcSmokeTests/DotNetIsolated/Dockerfile -HttpStartPath api/durable_HttpStartProcessExitOrchestrator -NoValidation + - name: Run smoke tests (Timeout) shell: pwsh + run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 & + cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 & + ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/durable_HttpStartTimeoutOrchestrator - # Test that timeout errors are recoverable - - name: Run V4 .NET FailFast Test - run: test/SmokeTests/e2e-test.ps1 -DockerfilePath test/SmokeTests/OOProcSmokeTests/DotNetIsolated/Dockerfile -HttpStartPath api/durable_HttpStartTimeoutOrchestrator -NoValidation - shell: pwsh \ No newline at end of file + - name: Run smoke tests (OOM) + shell: pwsh + run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 & + cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 & + ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/durable_HttpStartOOMOrchestrator \ No newline at end of file diff --git a/Directory.Build.targets b/Directory.Build.targets new file mode 100644 index 000000000..47c2b86a2 --- /dev/null +++ b/Directory.Build.targets @@ -0,0 +1,37 @@ + + + + + + + + + false + <_TranslateUrlPattern>(https://azfunc%40dev\.azure\.com/azfunc/internal/_git|https://dev\.azure\.com/azfunc/internal/_git|https://azfunc\.visualstudio\.com/internal/_git|azfunc%40vs-ssh\.visualstudio\.com:v3/azfunc/internal|git%40ssh\.dev\.azure\.com:v3/azfunc/internal)/([^/\.]+)\.(.+) + <_TranslateUrlReplacement>https://github.com/$2/$3 + + + + + + $([System.Text.RegularExpressions.Regex]::Replace($(ScmRepositoryUrl), $(_TranslateUrlPattern), $(_TranslateUrlReplacement))) + + + + $([System.Text.RegularExpressions.Regex]::Replace(%(SourceRoot.ScmRepositoryUrl), $(_TranslateUrlPattern), $(_TranslateUrlReplacement))) + + + + + \ No newline at end of file diff --git a/WebJobs.Extensions.DurableTask.sln b/WebJobs.Extensions.DurableTask.sln index 353e83805..b710584c2 100644 --- a/WebJobs.Extensions.DurableTask.sln +++ b/WebJobs.Extensions.DurableTask.sln @@ -18,6 +18,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution .editorconfig = .editorconfig azure-pipelines-release-dotnet-isolated.yml = azure-pipelines-release-dotnet-isolated.yml azure-pipelines-release.yml = azure-pipelines-release.yml + Directory.Build.targets = Directory.Build.targets nuget.config = nuget.config README.md = README.md release_notes.md = release_notes.md @@ -94,7 +95,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "PerfTests", "PerfTests", "{ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DFPerfScenariosV4", "test\DFPerfScenarios\DFPerfScenariosV4.csproj", "{FC8AD123-F949-4D21-B817-E5A4BBF7F69B}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Worker.Extensions.DurableTask.Tests", "test\Worker.Extensions.DurableTask.Tests\Worker.Extensions.DurableTask.Tests.csproj", "{76DEC17C-BF6A-498A-8E8A-7D6CB2E03284}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Worker.Extensions.DurableTask.Tests", "test\Worker.Extensions.DurableTask.Tests\Worker.Extensions.DurableTask.Tests.csproj", "{76DEC17C-BF6A-498A-8E8A-7D6CB2E03284}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/eng/ci/official-build.yml b/eng/ci/official-build.yml index d0839ba79..e7a871026 100644 --- a/eng/ci/official-build.yml +++ b/eng/ci/official-build.yml @@ -6,6 +6,7 @@ trigger: branches: include: - main + - dev # CI only, does not trigger on PRs. pr: none @@ -19,6 +20,7 @@ schedules: branches: include: - main + - dev always: true resources: diff --git a/eng/templates/build.yml b/eng/templates/build.yml index d61357f0b..3e3e41040 100644 --- a/eng/templates/build.yml +++ b/eng/templates/build.yml @@ -46,6 +46,7 @@ jobs: solution: '**/WebJobs.Extensions.DurableTask.sln' vsVersion: "16.0" configuration: Release + msbuildArgs: /p:FileVersionRevision=$(Build.BuildId) /p:ContinuousIntegrationBuild=true # these flags make package build deterministic - template: ci/sign-files.yml@eng parameters: diff --git a/release_notes.md b/release_notes.md index ad44bb9b3..9b2804fe4 100644 --- a/release_notes.md +++ b/release_notes.md @@ -1,10 +1,10 @@ # Release Notes -## Microsoft.Azure.Functions.Worker.Extensions.DurableTask 1.2.1 +## Microsoft.Azure.Functions.Worker.Extensions.DurableTask 1.1.6 ### New Features -- Fix regression on `TerminateInstanceAsync` API causing invocations to fail with "unimplemented" exceptions (https://github.com/Azure/azure-functions-durable-extension/pull/2829). +- Support for new `AllowReplayingTerminalInstances` setting in Azure Storage backend (https://github.com/Azure/durabletask/pull/1159), settable via `host.json` ### Bug Fixes @@ -12,6 +12,10 @@ ### Dependency Updates +- Microsoft.DurableTask.Client.Grpc to 1.3.0 +- Microsoft.DurableTask.Worker.Grpc to 1.3.0 +- Microsoft.Azure.WebJobs.Extensions.DurableTask (in host process) to 2.13.6 + ## Microsoft.Azure.WebJobs.Extensions.DurableTask ### New Features diff --git a/src/WebJobs.Extensions.DurableTask/AzureStorageDurabilityProviderFactory.cs b/src/WebJobs.Extensions.DurableTask/AzureStorageDurabilityProviderFactory.cs index 0162d26b4..242bef777 100644 --- a/src/WebJobs.Extensions.DurableTask/AzureStorageDurabilityProviderFactory.cs +++ b/src/WebJobs.Extensions.DurableTask/AzureStorageDurabilityProviderFactory.cs @@ -217,6 +217,7 @@ internal AzureStorageOrchestrationServiceSettings GetAzureStorageOrchestrationSe UseSeparateQueueForEntityWorkItems = this.useSeparateQueueForEntityWorkItems, EntityMessageReorderWindowInMinutes = this.options.EntityMessageReorderWindowInMinutes, MaxEntityOperationBatchSize = this.options.MaxEntityOperationBatchSize, + AllowReplayingTerminalInstances = this.azureStorageOptions.AllowReplayingTerminalInstances, }; if (this.inConsumption) diff --git a/src/WebJobs.Extensions.DurableTask/Options/AzureStorageOptions.cs b/src/WebJobs.Extensions.DurableTask/Options/AzureStorageOptions.cs index 1667aabaf..4a6a506cb 100644 --- a/src/WebJobs.Extensions.DurableTask/Options/AzureStorageOptions.cs +++ b/src/WebJobs.Extensions.DurableTask/Options/AzureStorageOptions.cs @@ -179,6 +179,20 @@ public string TrackingStoreConnectionStringName /// A boolean indicating whether to use the table partition strategy. Defaults to false. public bool UseTablePartitionManagement { get; set; } = false; + /// + /// When false, when an orchestrator is in a terminal state (e.g. Completed, Failed, Terminated), events for that orchestrator are discarded. + /// Otherwise, events for a terminal orchestrator induce a replay. This may be used to recompute the state of the orchestrator in the "Instances Table". + /// + /// + /// Transactions across Azure Tables are not possible, so we independently update the "History table" and then the "Instances table" + /// to set the state of the orchestrator. + /// If a crash were to occur between these two updates, the state of the orchestrator in the "Instances table" would be incorrect. + /// By setting this configuration to true, you can recover from these inconsistencies by forcing a replay of the orchestrator in response + /// to a client event like a termination request or an external event, which gives the framework another opportunity to update the state of + /// the orchestrator in the "Instances table". To force a replay after enabling this configuration, just send any external event to the affected instanceId. + /// + public bool AllowReplayingTerminalInstances { get; set; } = false; + /// /// Throws an exception if the provided hub name violates any naming conventions for the storage provider. /// diff --git a/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj b/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj index e023c4d4c..c6bae32df 100644 --- a/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj +++ b/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj @@ -6,7 +6,7 @@ Microsoft.Azure.WebJobs.Extensions.DurableTask 2 13 - 5 + 6 $(PackageSuffix) $(MajorVersion).$(MinorVersion).$(PatchVersion) $(MajorVersion).0.0.0 @@ -114,7 +114,7 @@ - + diff --git a/src/Worker.Extensions.DurableTask/AssemblyInfo.cs b/src/Worker.Extensions.DurableTask/AssemblyInfo.cs index e7b781cf0..63fc22df6 100644 --- a/src/Worker.Extensions.DurableTask/AssemblyInfo.cs +++ b/src/Worker.Extensions.DurableTask/AssemblyInfo.cs @@ -5,5 +5,5 @@ using Microsoft.Azure.Functions.Worker.Extensions.Abstractions; // TODO: Find a way to generate this dynamically at build-time -[assembly: ExtensionInformation("Microsoft.Azure.WebJobs.Extensions.DurableTask", "2.13.5")] +[assembly: ExtensionInformation("Microsoft.Azure.WebJobs.Extensions.DurableTask", "2.13.6")] [assembly: InternalsVisibleTo("Worker.Extensions.DurableTask.Tests, PublicKey=0024000004800000940000000602000000240000525341310004000001000100cd1dabd5a893b40e75dc901fe7293db4a3caf9cd4d3e3ed6178d49cd476969abe74a9e0b7f4a0bb15edca48758155d35a4f05e6e852fff1b319d103b39ba04acbadd278c2753627c95e1f6f6582425374b92f51cca3deb0d2aab9de3ecda7753900a31f70a236f163006beefffe282888f85e3c76d1205ec7dfef7fa472a17b1")] diff --git a/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj b/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj index e6c954cb3..b310e80da 100644 --- a/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj +++ b/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj @@ -29,7 +29,7 @@ ..\..\sign.snk - 1.1.5 + 1.1.6 $(VersionPrefix).0 @@ -39,8 +39,8 @@ - - + + diff --git a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/Dockerfile b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/Dockerfile index 51ad96b16..3b24d9213 100644 --- a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/Dockerfile +++ b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/Dockerfile @@ -25,6 +25,11 @@ RUN cd /root/test/SmokeTests/OOProcSmokeTests/DotNetIsolated && \ ls -aR /home/site/wwwroot && \ cat /home/site/wwwroot/extensions.json # debugging +RUN cat /root/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/obj/Release/net6.0/WorkerExtensions/WorkerExtensions.csproj + +RUN ls /root/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/bin/ +# .azurefunctions/function.deps.json + # Step 3: Generate the final app image to run FROM mcr.microsoft.com/azure-functions/dotnet-isolated:4-dotnet-isolated6.0 diff --git a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/FaultyOrchestrators.cs b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/FaultyOrchestrators.cs index fc5a8fded..90006fd20 100644 --- a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/FaultyOrchestrators.cs +++ b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/FaultyOrchestrators.cs @@ -10,24 +10,22 @@ namespace FaultOrchestrators public static class FaultyOrchestrators { [Function(nameof(OOMOrchestrator))] - public static Task OOMOrchestrator( + public static Task OOMOrchestrator( [OrchestrationTrigger] TaskOrchestrationContext context) { // this orchestrator is not deterministic, on purpose. // we use the non-determinism to force an OOM exception on only the first replay - // check if a file named "replayEvidence" exists in the current directory. - // create it if it does not - string evidenceFile = "replayEvidence"; + // check if a file named "replayEvidence" exists in source code directory, create it if it does not. + // From experience, this code runs in `/bin/output/`, so we store the file two directories above. + // We do this because the /bin/output/ directory gets overridden during the build process, which happens automatically + // when `func host start` is re-invoked. + string evidenceFile = System.IO.Path.Combine(System.IO.Directory.GetCurrentDirectory(), "..", "..", "replayEvidence"); bool isTheFirstReplay = !System.IO.File.Exists(evidenceFile); if (isTheFirstReplay) { System.IO.File.Create(evidenceFile).Close(); - } - - // on the very first replay, OOM the process - if (isTheFirstReplay) - { + // force the process to run out of memory List data = new List(); @@ -39,57 +37,54 @@ public static Task OOMOrchestrator( // assuming the orchestrator survived the OOM, delete the evidence file and return System.IO.File.Delete(evidenceFile); - return "done!"; + return Task.CompletedTask; } [Function(nameof(ProcessExitOrchestrator))] - public static Task ProcessExitOrchestrator( + public static Task ProcessExitOrchestrator( [OrchestrationTrigger] TaskOrchestrationContext context) { // this orchestrator is not deterministic, on purpose. // we use the non-determinism to force a sudden process exit on only the first replay - // check if a file named "replayEvidence" exists in the current directory. - // create it if it does not - string evidenceFile = "replayEvidence"; + // check if a file named "replayEvidence" exists in source code directory, create it if it does not. + // From experience, this code runs in `/bin/output/`, so we store the file two directories above. + // We do this because the /bin/output/ directory gets overridden during the build process, which happens automatically + // when `func host start` is re-invoked. + string evidenceFile = System.IO.Path.Combine(System.IO.Directory.GetCurrentDirectory(), "..", "..", "replayEvidence"); bool isTheFirstReplay = !System.IO.File.Exists(evidenceFile); if (isTheFirstReplay) { System.IO.File.Create(evidenceFile).Close(); - } - - // on the very first replay, OOM the process - if (isTheFirstReplay) - { - // force the process to suddenly exit - Environment.FailFast(-1); + + // simulate sudden crash + Environment.FailFast("Simulating crash!"); } // assuming the orchestrator survived the OOM, delete the evidence file and return System.IO.File.Delete(evidenceFile); - return "done!"; + return Task.CompletedTask; } [Function(nameof(TimeoutOrchestrator))] - public static Task TimeoutOrchestrator( + public static Task TimeoutOrchestrator( [OrchestrationTrigger] TaskOrchestrationContext context) { // this orchestrator is not deterministic, on purpose. // we use the non-determinism to force a timeout on only the first replay - // check if a file named "replayEvidence" exists in the current directory. - // create it if it does not - string evidenceFile = "replayEvidence"; + // check if a file named "replayEvidence" exists in source code directory, create it if it does not. + // From experience, this code runs in `/bin/output/`, so we store the file two directories above. + // We do this because the /bin/output/ directory gets overridden during the build process, which happens automatically + // when `func host start` is re-invoked. + string evidenceFile = System.IO.Path.Combine(System.IO.Directory.GetCurrentDirectory(), "..", "..", "replayEvidence"); bool isTheFirstReplay = !System.IO.File.Exists(evidenceFile); + if (isTheFirstReplay) { System.IO.File.Create(evidenceFile).Close(); - } - - // on the very first replay, time out the execution - if (isTheFirstReplay) - { + // force the process to timeout after a 1 minute wait System.Threading.Thread.Sleep(TimeSpan.FromMinutes(1)); } @@ -97,7 +92,7 @@ public static Task TimeoutOrchestrator( // assuming the orchestrator survived the timeout, delete the evidence file and return System.IO.File.Delete(evidenceFile); - return "done!"; + return Task.CompletedTask; } [Function("durable_HttpStartOOMOrchestrator")] diff --git a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/host.json b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/host.json index e4b5cdf28..ae6c359f7 100644 --- a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/host.json +++ b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/host.json @@ -8,5 +8,14 @@ } } }, + "extensions": { + "durableTask": { + "hubName": "hubbbb1113324", + "storageProvider": { + "maxQueuePollingInterval": "00:00:01", + "controlQueueVisibilityTimeout": "00:01:00" + } + } + }, "functionTimeout": "00:00:30" } \ No newline at end of file diff --git a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 new file mode 100644 index 000000000..839cdea5d --- /dev/null +++ b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 @@ -0,0 +1,115 @@ +# This is a simple test runner to validate the .NET isolated smoke tests. +# It supercedes the usual e2e-tests.ps1 script for the .NET isolated scenario because building the snmoke test app +# on the docker image is unreliable. For more details, see: https://github.com/Azure/azure-functions-host/issues/7995 + +# This script is designed specifically to test cases where the isolated worker process experiences a platform failure: +# timeouts, OOMs, etc. For that reason, it is careful to check that the Functions Host is running and healthy at regular +# intervals. This makes these tests run more slowly than other test categories. + +param( + [Parameter(Mandatory=$true)] + [string]$HttpStartPath +) + +$retryCount = 0; +$statusUrl = $null; +$success = $false; +$haveManuallyRestartedHost = $false; + +Do { + $testIsRunning = $true; + + # Start the functions host if it's not running already. + # Then give it up to 1 minute to start up. + # This is a long wait, but from experience the CI can be slow to start up the host, especially after a platform-error. + $isFunctionsHostRunning = (Get-Process -Name func -ErrorAction SilentlyContinue) + if ($isFunctionsHostRunning -eq $null) { + Write-Host "Starting the Functions host..." -ForegroundColor Yellow + + # The '&' operator is used to run the command in the background + cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 & + Write-Host "Waiting for the Functions host to start up..." -ForegroundColor Yellow + Start-Sleep -Seconds 60 + } + + + try { + # Make sure the Functions runtime is up and running + $pingUrl = "http://localhost:7071/admin/host/ping" + Write-Host "Pinging app at $pingUrl to ensure the host is healthy" -ForegroundColor Yellow + Invoke-RestMethod -Method Post -Uri "http://localhost:7071/admin/host/ping" + Write-Host "Host is healthy!" -ForegroundColor Green + + # Start orchestrator if it hasn't been started yet + if ($statusUrl -eq $null){ + $startOrchestrationUri = "http://localhost:7071/$HttpStartPath" + Write-Host "Starting a new orchestration instance via POST to $startOrchestrationUri..." -ForegroundColor Yellow + + $result = Invoke-RestMethod -Method Post -Uri $startOrchestrationUri + Write-Host "Started orchestration with instance ID '$($result.id)'!" -ForegroundColor Yellow + Write-Host "Waiting for orchestration to complete..." -ForegroundColor Yellow + + $statusUrl = $result.statusQueryGetUri + + # sleep for a bit to give the orchestrator a chance to start, + # then loop once more in case the orchestrator ran quickly, made the host unhealthy, + # and the functions host needs to be restarted + Start-Sleep -Seconds 5 + continue; + } + + # Check the orchestrator status + $result = Invoke-RestMethod -Method Get -Uri $statusUrl + $runtimeStatus = $result.runtimeStatus + Write-Host "Orchestration is $runtimeStatus" -ForegroundColor Yellow + Write-Host $result + + if ($result.runtimeStatus -eq "Completed") { + $success = $true + $testIsRunning = $false + break + } + if ($result.runtimeStatus -eq "Failed") { + $success = $false + $testIsRunning = $false + break + } + + # If the orchestrator did not complete yet, wait for a bit before checking again + Start-Sleep -Seconds 2 + $retryCount = $retryCount + 1 + + } catch { + Write-Host "An error occurred:" -ForegroundColor Red + Write-Host $_ -ForegroundColor Red + + # When testing for platform errors, we want to make sure the Functions host is healthy and ready to take requests. + # The Host can get into bad states (for example, in an OOM-inducing test) where it does not self-heal. + # For these cases, we manually restart the host to ensure it is in a good state. We only do this once per test. + if ($haveManuallyRestartedHost -eq $false) { + + # We stop the host process and wait for a bit before checking if it is running again. + Write-Host "Restarting the Functions host..." -ForegroundColor Yellow + Stop-Process -Name "func" -Force + Start-Sleep -Seconds 5 + + # Log whether the process kill succeeded + $haveManuallyRestartedHost = $true + $isFunctionsHostRunning = ((Get-Process -Name func -ErrorAction SilentlyContinue) -eq $null) + Write-Host "Host process killed: $isFunctionsHostRunning" -ForegroundColor Yellow + + # the beginning of the loop will restart the host + continue + } + + # Rethrow the original exception + throw + } + +} while (($testIsRunning -eq $true) -and ($retryCount -lt 65)) + +if ($success -eq $false) { + throw "Orchestration failed or did not compete in time! :(" +} + +Write-Host "Success!" -ForegroundColor Green \ No newline at end of file diff --git a/test/SmokeTests/e2e-test.ps1 b/test/SmokeTests/e2e-test.ps1 index e7a7aa8c1..f4ed28617 100644 --- a/test/SmokeTests/e2e-test.ps1 +++ b/test/SmokeTests/e2e-test.ps1 @@ -26,7 +26,7 @@ function Exit-OnError() { } $ErrorActionPreference = "Stop" -$AzuriteVersion = "3.26.0" +# $AzuriteVersion = "3.26.0" if ($NoSetup -eq $false) { # Build the docker image first, since that's the most critical step @@ -35,14 +35,15 @@ if ($NoSetup -eq $false) { Exit-OnError # Next, download and start the Azurite emulator Docker image - Write-Host "Pulling down the mcr.microsoft.com/azure-storage/azurite:$AzuriteVersion image..." -ForegroundColor Yellow - docker pull "mcr.microsoft.com/azure-storage/azurite:${AzuriteVersion}" + Write-Host "Pulling down the mcr.microsoft.com/azure-storage/azurite image..." -ForegroundColor Yellow + docker pull "mcr.microsoft.com/azure-storage/azurite" Exit-OnError Write-Host "Starting Azurite storage emulator using default ports..." -ForegroundColor Yellow - docker run --name 'azurite' -p 10000:10000 -p 10001:10001 -p 10002:10002 -d "mcr.microsoft.com/azure-storage/azurite:${AzuriteVersion}" + docker run --name 'azurite' -p 10000:10000 -p 10001:10001 -p 10002:10002 -d "mcr.microsoft.com/azure-storage/azurite" Exit-OnError - + # Author's note: we don't call 'Exit-OnError' here because this container may already be running + if ($SetupSQLServer -eq $true) { Write-Host "Pulling down the mcr.microsoft.com/mssql/server:$tag image..." docker pull mcr.microsoft.com/mssql/server:$tag @@ -51,7 +52,7 @@ if ($NoSetup -eq $false) { # Start the SQL Server docker container with the specified edition Write-Host "Starting SQL Server $tag $sqlpid docker container on port $port" -ForegroundColor DarkYellow docker run --name mssql-server -e 'ACCEPT_EULA=Y' -e "MSSQL_SA_PASSWORD=$pw" -e "MSSQL_PID=$sqlpid" -p ${port}:1433 -d mcr.microsoft.com/mssql/server:$tag - Exit-OnError + # Author's note: we don't call 'Exit-OnError' here because this container may already be running # Wait for SQL Server to be ready Write-Host "Waiting for SQL Server to be ready..." -ForegroundColor Yellow @@ -80,7 +81,7 @@ if ($NoSetup -eq $false) { --env 'AzureWebJobsStorage=UseDevelopmentStorage=true;DevelopmentStorageProxyUri=http://host.docker.internal' ` --env 'WEBSITE_HOSTNAME=localhost:8080' ` $ImageName - Exit-OnError + # Author's note: we don't call 'Exit-OnError' here because this container may already be running } else { Write-Host "Starting $ContainerName application container" -ForegroundColor Yellow @@ -89,7 +90,7 @@ if ($NoSetup -eq $false) { --env 'WEBSITE_HOSTNAME=localhost:8080' ` $ImageName } - Exit-OnError + # Author's note: we don't call 'Exit-OnError' here because this container may already be running } if ($sleep -gt 0) { @@ -107,6 +108,7 @@ try { $pingUrl = "http://localhost:8080/admin/host/ping" Write-Host "Pinging app at $pingUrl to ensure the host is healthy" -ForegroundColor Yellow Invoke-RestMethod -Method Post -Uri "http://localhost:8080/admin/host/ping" + Write-Host "Host is healthy!" -ForegroundColor Green Exit-OnError if ($NoValidation -eq $false) { @@ -129,6 +131,7 @@ try { if ($result.runtimeStatus -eq "Completed") { $success = $true + Write-Host $result break } diff --git a/tools/triageHelper/function_app.py b/tools/triageHelper/function_app.py index 12a6d77ff..c2f4d8aa1 100644 --- a/tools/triageHelper/function_app.py +++ b/tools/triageHelper/function_app.py @@ -12,6 +12,7 @@ "Azure/azure-functions-durable-extension", "Azure/azure-functions-durable-js", "Azure/azure-functions-durable-python", + "Azure/azure-functions-durable-powershell", powershell_worker_repo, "microsoft/durabletask-java", "microsoft/durabletask-dotnet", @@ -40,7 +41,6 @@ def get_triage_issues(repository): 'labels': label, } - payload_str = urllib.parse.urlencode(payload, safe=':+') # Define the GitHub API endpoint api_endpoint = f"https://api.github.com/repos/{repository}/issues" query_str1 = "?labels=Needs%3A%20Triage%20%3Amag%3A"