Skip to content

Commit

Permalink
passing tests
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmrdavid committed Oct 1, 2024
1 parent 3ea3e45 commit bbbb91f
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 65 deletions.
40 changes: 19 additions & 21 deletions .github/workflows/smoketest-dotnet-isolated-v4.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,31 +69,29 @@ jobs:
run: npm i -g azure-functions-core-tools@4 --unsafe-perm true

# Run smoke tests
# - name: Run smoke tests (Hello Cities)
# shell: pwsh
# run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 &
# cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 &
# ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/StartHelloCitiesTyped

# - name: Run smoke tests (Timeout)
# shell: pwsh
# run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 &
# cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 &
# ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/durable_HttpStartTimeoutOrchestrator

# - name: Run smoke tests (OOM)
# shell: pwsh
# run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 &
# cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 &
# ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/durable_HttpStartOOMOrchestrator
# Unlike other smoke tests, the .NET isolated smoke tests run outside of a docker container, but to race conditions
# when building the smoke test app in docker, causing the build to fail. This is a temporary workaround until the
# root cause is identified and fixed.

- name: Run smoke tests (Hello Cities)
shell: pwsh
run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 &
cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 &
./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/StartHelloCitiesTyped

- name: Run smoke tests (Process Exit)
shell: pwsh
run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 &
./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/durable_HttpStartProcessExitOrchestrator

- name: Run smoke tests (Timeout)
shell: pwsh
run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 &
cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 &
./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/durable_HttpStartTimeoutOrchestrator

# # Validation is blocked on https://github.com/Azure/azure-functions-host/issues/7995
# - name: Run V4 .NET Isolated Smoke Test
# run: test/SmokeTests/e2e-test.ps1 -DockerfilePath test/SmokeTests/OOProcSmokeTests/DotNetIsolated/Dockerfile -HttpStartPath api/StartHelloCitiesTyped
# shell: pwsh
- name: Run smoke tests (OOM)
shell: pwsh
run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 &
cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 &
./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/durable_HttpStartOOMOrchestrator
2 changes: 1 addition & 1 deletion src/WebJobs.Extensions.DurableTask/OutOfProcMiddleware.cs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ await this.LifeCycleNotificationHelper.OrchestratorStartingAsync(
// - a worker process exit
if (functionResult.Exception is Host.FunctionTimeoutException
|| functionResult.Exception?.InnerException is OutOfMemoryException // see RemoteOrchestrationContext.TrySetResultInternal for details on OOM-handling
|| (functionResult.Exception?.InnerException?.GetType().ToString().Contains("WorkerProcessExitExceptionn") ?? false))
|| (functionResult.Exception?.InnerException?.GetType().ToString().Contains("WorkerProcessExitException") ?? false))
{
// TODO: the `WorkerProcessExitException` type is not exposed in our dependencies, it's part of WebJobs.Host.Script.
// Should we add that dependency or should it be exposed in WebJobs.Host?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,16 @@ public static Task OOMOrchestrator(
// this orchestrator is not deterministic, on purpose.
// we use the non-determinism to force an OOM exception on only the first replay

// check if a file named "replayEvidence" exists in the current directory.
// create it if it does not
string evidenceFile = "replayEvidence";
// check if a file named "replayEvidence" exists in source code directory, create it if it does not.
// From experience, this code runs in `<sourceCodePath>/bin/output/`, so we store the file two directories above.
// We do this because the /bin/output/ directory gets overridden during the build process, which happens automatically
// when `func host start` is re-invoked.
string evidenceFile = System.IO.Path.Combine(System.IO.Directory.GetCurrentDirectory(), "..", "..", "replayEvidence");
bool isTheFirstReplay = !System.IO.File.Exists(evidenceFile);
if (isTheFirstReplay)
{
System.IO.File.Create(evidenceFile).Close();
}

// on the very first replay, OOM the process
if (isTheFirstReplay)
{

// force the process to run out of memory
List<byte[]> data = new List<byte[]>();

Expand All @@ -49,20 +47,18 @@ public static Task ProcessExitOrchestrator(
// this orchestrator is not deterministic, on purpose.
// we use the non-determinism to force a sudden process exit on only the first replay

// check if a file named "replayEvidence" exists in the current directory.
// create it if it does not
string evidenceFile = "replayEvidence";
// check if a file named "replayEvidence" exists in source code directory, create it if it does not.
// From experience, this code runs in `<sourceCodePath>/bin/output/`, so we store the file two directories above.
// We do this because the /bin/output/ directory gets overridden during the build process, which happens automatically
// when `func host start` is re-invoked.
string evidenceFile = System.IO.Path.Combine(System.IO.Directory.GetCurrentDirectory(), "..", "..", "replayEvidence");
bool isTheFirstReplay = !System.IO.File.Exists(evidenceFile);
if (isTheFirstReplay)
{
System.IO.File.Create(evidenceFile).Close();
}

// on the very first replay, OOM the process
if (isTheFirstReplay)
{
// simulate out of memory
throw new OutOfMemoryException();

// simulate sudden crash
Environment.FailFast("Simulating crash!");
}

// assuming the orchestrator survived the OOM, delete the evidence file and return
Expand All @@ -78,18 +74,17 @@ public static Task TimeoutOrchestrator(
// this orchestrator is not deterministic, on purpose.
// we use the non-determinism to force a timeout on only the first replay

// check if a file named "replayEvidence" exists in the current directory.
// create it if it does not
string evidenceFile = "replayEvidence";
// check if a file named "replayEvidence" exists in source code directory, create it if it does not.
// From experience, this code runs in `<sourceCodePath>/bin/output/`, so we store the file two directories above.
// We do this because the /bin/output/ directory gets overridden during the build process, which happens automatically
// when `func host start` is re-invoked.
string evidenceFile = System.IO.Path.Combine(System.IO.Directory.GetCurrentDirectory(), "..", "..", "replayEvidence");
bool isTheFirstReplay = !System.IO.File.Exists(evidenceFile);

if (isTheFirstReplay)
{
System.IO.File.Create(evidenceFile).Close();
}

// on the very first replay, time out the execution
if (isTheFirstReplay)
{

// force the process to timeout after a 1 minute wait
System.Threading.Thread.Sleep(TimeSpan.FromMinutes(1));
}
Expand Down
4 changes: 3 additions & 1 deletion test/SmokeTests/OOProcSmokeTests/DotNetIsolated/host.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
},
"extensions": {
"durableTask": {
"hubName": "hubbbb1113324",
"storageProvider": {
"maxQueuePollingInterval": "00:00:01"
"maxQueuePollingInterval": "00:00:01",
"controlQueueVisibilityTimeout": "00:01:00"
}
}
},
Expand Down
61 changes: 45 additions & 16 deletions test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# This is a simple smoke test runner to validate the .NET isolated smoke tests.
# It supercedes the usual e2e-tests.ps1 script for the .NET isolated scenario because building the tests on the docker image
# is unreliable. For more details, see: https://github.com/Azure/azure-functions-host/issues/7995
# This is a simple test runner to validate the .NET isolated smoke tests.
# It supercedes the usual e2e-tests.ps1 script for the .NET isolated scenario because building the snmoke test app
# on the docker image is unreliable. For more details, see: https://github.com/Azure/azure-functions-host/issues/7995

# This script is designed specifically to test cases where the isolated worker process experiences a platform failure:
# timeouts, OOMs, etc. For that reason, it is careful to check that the Functions Host is running and healthy at regular
# intervals. This makes these tests run more slowly than other test categories.

param(
[Parameter(Mandatory=$true)]
[string]$HttpStartPath
Expand All @@ -9,19 +14,20 @@ param(
$retryCount = 0;
$statusUrl = $null;
$success = $false;
$haveManuallyRestartedHost = $false;

Do {
$testIsRunning = $true;

# Start the functions host if it's not running already.
# Then give it 40 seconds to start up. This is a long wait, but from experience the CI can be slow to start up the host.
# Then give it up to 1 minute to start up.
# This is a long wait, but from experience the CI can be slow to start up the host, especially after a platform-error.
$isFunctionsHostRunning = (Get-Process -Name func -ErrorAction SilentlyContinue)
if ($isFunctionsHostRunning -eq $null) {
Write-Host "Starting the Functions host..." -ForegroundColor Yellow

# The '&' operator is used to run the command in the background
cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 >> Debug.txt &

cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 &
Write-Host "Waiting for the Functions host to start up..." -ForegroundColor Yellow
Start-Sleep -Seconds 60
}
Expand All @@ -34,7 +40,7 @@ Do {
Invoke-RestMethod -Method Post -Uri "http://localhost:7071/admin/host/ping"
Write-Host "Host is healthy!" -ForegroundColor Green

# Note that any HTTP protocol errors (e.g. HTTP 4xx or 5xx) will cause an immediate failure
# Start orchestrator if it hasn't been started yet
if ($statusUrl -eq $null){
$startOrchestrationUri = "http://localhost:7071/$HttpStartPath"
Write-Host "Starting a new orchestration instance via POST to $startOrchestrationUri..." -ForegroundColor Yellow
Expand All @@ -44,10 +50,15 @@ Do {
Write-Host "Waiting for orchestration to complete..." -ForegroundColor Yellow

$statusUrl = $result.statusQueryGetUri

# sleep for a bit to give the orchestrator a chance to start,
# then loop once more in case the orchestrator ran quickly, made the host unhealthy,
# and the functions host needs to be restarted
Start-Sleep -Seconds 5
continue;
}


# Check the orchestrator status
$result = Invoke-RestMethod -Method Get -Uri $statusUrl
$runtimeStatus = $result.runtimeStatus
Write-Host "Orchestration is $runtimeStatus" -ForegroundColor Yellow
Expand All @@ -58,29 +69,47 @@ Do {
$testIsRunning = $false
break
}
if ($result.runtimeStatus -eq "Failed") {
$success = $false
$testIsRunning = $false
break
}

# If the orchestrator did not complete yet, wait for a bit before checking again
Start-Sleep -Seconds 2
$retryCount = $retryCount + 1

} catch {
Write-Host "An error occurred:" -ForegroundColor Red
Write-Host $_ -ForegroundColor Red
$output = cat ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/Debug.txt
Write-Host $output
Write-Host "======================================" -ForegroundColor Red

# When testing for platform errors, we want to make sure the Functions host is healthy and ready to take requests.
# The Host can get into bad states (for example, in an OOM-inducing test) where it does not self-heal.
# For these cases, we manually restart the host to ensure it is in a good state. We only do this once per test.
if ($haveManuallyRestartedHost -eq $false) {

# We stop the host process and wait for a bit before checking if it is running again.
Write-Host "Restarting the Functions host..." -ForegroundColor Yellow
Stop-Process -Name "func" -Force
Start-Sleep -Seconds 5

# Log whether the process kill succeeded
$haveManuallyRestartedHost = $true
$isFunctionsHostRunning = ((Get-Process -Name func -ErrorAction SilentlyContinue) -eq $null)
Write-Host "Host process killed: $isFunctionsHostRunning" -ForegroundColor Yellow

# the beginning of the loop will restart the host
continue
}

# Rethrow the original exception
throw
}


# This delay a bit excessive, but we want to make sure the Functions runtime is up and running before we start the orchestration
# This number was determined, through trial and error, to be a safe amount of time to wait
Start-Sleep -Seconds 40
} while (($testIsRunning -eq $true) -and ($retryCount -lt 65))

if ($success -eq $false) {
throw "Orchestration didn't complete in time! :("
throw "Orchestration failed or did not compete in time! :("
}

Write-Host "Success!" -ForegroundColor Green

0 comments on commit bbbb91f

Please sign in to comment.