diff --git a/.github/regression.sh b/.github/regression.sh index 88fa20b53..aa42e0e7b 100755 --- a/.github/regression.sh +++ b/.github/regression.sh @@ -75,7 +75,8 @@ elif is_truthy "${CI_BYRON_CLUSTER:-}"; then export TESTNET_VARIANT="${CLUSTER_ERA:-conway}_slow" fi -export CARDANO_NODE_SOCKET_PATH_CI="$WORKDIR/state-cluster0/bft1.socket" +CARDANO_NODE_SOCKET_PATH_CI="${CARDANO_NODE_SOCKET_PATH_CI:-$WORKDIR/state-cluster0/bft1.socket}" +export CARDANO_NODE_SOCKET_PATH_CI # assume we run tests on testnet when `BOOTSTRAP_DIR` is set if [ -n "${BOOTSTRAP_DIR:-}" ]; then @@ -115,16 +116,25 @@ case "${CARDANO_CLI_REV:-}" in esac # setup cardano-node binaries -case "${NODE_REV:-}" in - "" | "none" ) - NODE_REV=master - ;; -esac -# shellcheck disable=SC1091 -. .github/source_cardano_node.sh -cardano_bins_build_all "$NODE_REV" "${CARDANO_CLI_REV:-}" -PATH_PREPEND="$(cardano_bins_print_path_prepend "${CARDANO_CLI_REV:-}")${PATH_PREPEND}" -export PATH_PREPEND +if [ -n "${CARDANO_PREBUILT_DIR:-}" ]; then + # Pre-built binaries were baked into the image (e.g. for Antithesis). + # Skip all nix builds and point PATH_PREPEND at the pre-built directories. + _d="${CARDANO_PREBUILT_DIR}" + PATH_PREPEND="${_d}/cardano-node/bin:${_d}/cardano-submit-api/bin:${_d}/cardano-cli/bin:${_d}/bech32/bin:${PATH_PREPEND}" + export PATH_PREPEND + unset _d +else + case "${NODE_REV:-}" in + "" | "none" ) + NODE_REV=master + ;; + esac + # shellcheck disable=SC1091 + . .github/source_cardano_node.sh + cardano_bins_build_all "$NODE_REV" "${CARDANO_CLI_REV:-}" + PATH_PREPEND="$(cardano_bins_print_path_prepend "${CARDANO_CLI_REV:-}")${PATH_PREPEND}" + export PATH_PREPEND +fi # optimize nix store if running in GitHub Actions if [ -n "${GITHUB_ACTIONS:-}" ]; then @@ -254,7 +264,14 @@ nix develop --accept-flake-config .#testenv --command bash -c ' echo "::group::Python venv setup" printf "start: %(%H:%M:%S)T\n" -1 - . .github/setup_venv.sh clean + # When _VENV_DIR points to a pre-built venv (e.g. baked into the image for + # Antithesis), skip the `clean` flag so the existing venv is reused as-is + # without re-downloading packages. + if [ -n "${_VENV_DIR:-}" ] && [ -e "${_VENV_DIR}" ]; then + . .github/setup_venv.sh + else + . .github/setup_venv.sh clean + fi echo "::endgroup::" # end group for "Python venv setup" echo "::group::๐Ÿงช Testrun" diff --git a/.github/run_tests.sh b/.github/run_tests.sh index 6672f7060..57f148530 100755 --- a/.github/run_tests.sh +++ b/.github/run_tests.sh @@ -58,7 +58,7 @@ EOF run_pytest() { if [ -n "${SESSION_TIMEOUT:-}" ]; then - local -a timeout_arr=( "--foreground" "--signal=INT" "--kill-after=0" "$SESSION_TIMEOUT" ) + local -a timeout_arr=( "--foreground" "--signal=INT" "--kill-after=120" "$SESSION_TIMEOUT" ) echo "Running: PYTEST_ADDOPTS='${PYTEST_ADDOPTS:-}' timeout ${timeout_arr[*]} pytest $*" timeout "${timeout_arr[@]}" pytest "$@" else diff --git a/cardano_node_tests/tests/conftest.py b/cardano_node_tests/tests/conftest.py index 6f4bc9b7c..a047da63a 100644 --- a/cardano_node_tests/tests/conftest.py +++ b/cardano_node_tests/tests/conftest.py @@ -222,6 +222,35 @@ def pytest_keyboard_interrupt() -> None: (session_basetemp / INTERRUPTED_NAME).touch() +def pytest_runtest_logreport(report: tp.Any) -> None: + """Emit an Antithesis SDK assertion for every test failure.""" + if report.when != "call" or not report.failed: + return + sdk_file = pl.Path(os.environ.get("ANTITHESIS_OUTPUT_DIR", "/tmp/antithesis")) / "sdk.jsonl" + sdk_file.parent.mkdir(parents=True, exist_ok=True) + longrepr = report.longrepr + reprcrash = getattr(longrepr, "reprcrash", None) + exc_message = reprcrash.message if reprcrash else (str(longrepr)[:2000] if longrepr else "") + assertion = { + "antithesis_assert": { + "type": "always", + "condition": False, + "display_name": report.nodeid, + "message": exc_message, + "details": {"traceback": str(longrepr)[-2000:] if longrepr else ""}, + "location": { + "function": report.nodeid, + "file": str(report.fspath), + "begin_line": 0, + "begin_column": 0, + "class": "", + }, + } + } + with sdk_file.open("a") as f: + f.write(json.dumps(assertion) + "\n") + + @pytest.fixture(scope="session") def init_pytest_temp_dirs(tmp_path_factory: TempPathFactory) -> None: """Init `PytestTempDirs`.""" diff --git a/cardano_node_tests/tests/test_tx_basic.py b/cardano_node_tests/tests/test_tx_basic.py index 53acce774..24c3a28b1 100644 --- a/cardano_node_tests/tests/test_tx_basic.py +++ b/cardano_node_tests/tests/test_tx_basic.py @@ -15,6 +15,7 @@ from cardano_node_tests.tests import common from cardano_node_tests.tests import issues from cardano_node_tests.tests import tx_common +from cardano_node_tests.utils import antithesis from cardano_node_tests.utils import cluster_nodes from cardano_node_tests.utils import clusterlib_utils from cardano_node_tests.utils import dbsync_utils @@ -181,13 +182,27 @@ def test_transfer_funds( ) out_utxos = cluster.g_query.get_utxo(tx_raw_output=tx_output) - assert ( - clusterlib.filter_utxos(utxos=out_utxos, address=src_addr.address)[0].amount - == clusterlib.calculate_utxos_balance(tx_output.txins) - tx_output.fee - amount - ), f"Incorrect balance for source address `{src_addr.address}`" - assert ( - clusterlib.filter_utxos(utxos=out_utxos, address=dst_addr.address)[0].amount == amount - ), f"Incorrect balance for destination address `{dst_addr.address}`" + + src_actual = clusterlib.filter_utxos(utxos=out_utxos, address=src_addr.address)[0].amount + src_expected = clusterlib.calculate_utxos_balance(tx_output.txins) - tx_output.fee - amount + antithesis.always( + src_actual == src_expected, + "Source balance decreased by transfer amount and fee", + {"src_addr": src_addr.address, "expected": src_expected, "actual": src_actual}, + ) + assert src_actual == src_expected, ( + f"Incorrect balance for source address `{src_addr.address}`" + ) + + dst_actual = clusterlib.filter_utxos(utxos=out_utxos, address=dst_addr.address)[0].amount + antithesis.always( + dst_actual == amount, + "Destination received exact transfer amount", + {"dst_addr": dst_addr.address, "expected": amount, "actual": dst_actual}, + ) + assert dst_actual == amount, ( + f"Incorrect balance for destination address `{dst_addr.address}`" + ) common.check_missing_utxos(cluster_obj=cluster, utxos=out_utxos) diff --git a/cardano_node_tests/utils/antithesis.py b/cardano_node_tests/utils/antithesis.py new file mode 100644 index 000000000..24c9df2fb --- /dev/null +++ b/cardano_node_tests/utils/antithesis.py @@ -0,0 +1,42 @@ +"""Antithesis SDK wrappers. + +All functions are no-ops when the ``antithesis`` package is not installed, +so tests that use them run normally outside the Antithesis environment. +Install the package only inside the Antithesis Docker image โ€” do not add it +to pyproject.toml. +""" + +import typing as tp + +try: + import antithesis.assertions as _ant + + def always(condition: bool, message: str, details: tp.Mapping[str, tp.Any]) -> None: + """Assert *condition* is true on every invocation.""" + _ant.always(condition, message, details) + + def sometimes(condition: bool, message: str, details: tp.Mapping[str, tp.Any]) -> None: + """Assert *condition* is true at least once across all calls.""" + _ant.sometimes(condition, message, details) + + def reachable(message: str, details: tp.Mapping[str, tp.Any]) -> None: + """Assert this code location is reached at least once.""" + _ant.reachable(message, details) + + def unreachable(message: str, details: tp.Mapping[str, tp.Any]) -> None: + """Assert this code location is never reached.""" + _ant.unreachable(message, details) + +except ImportError: + + def always(condition: bool, message: str, details: tp.Mapping[str, tp.Any]) -> None: # type: ignore[misc] + pass + + def sometimes(condition: bool, message: str, details: tp.Mapping[str, tp.Any]) -> None: # type: ignore[misc] + pass + + def reachable(message: str, details: tp.Mapping[str, tp.Any]) -> None: # type: ignore[misc] + pass + + def unreachable(message: str, details: tp.Mapping[str, tp.Any]) -> None: # type: ignore[misc] + pass diff --git a/docker-antithesis/Dockerfile b/docker-antithesis/Dockerfile new file mode 100644 index 000000000..e5609855d --- /dev/null +++ b/docker-antithesis/Dockerfile @@ -0,0 +1,77 @@ +# Dockerfile for cardano-node-tests (Antithesis-compatible driver image) +# +# All heavy dependencies are baked in at image build time so the container +# runs without any network access (required by Antithesis environments). +# +# Build args: +# GIT_REVISION โ€” git commit hash stored as $GIT_REVISION in the image +# NODE_REV โ€” cardano-node git ref to pre-build (default: master) +# +# Build and push to GHCR before submitting to Antithesis: +# docker build -f docker-antithesis/Dockerfile \ +# --build-arg GIT_REVISION=$(git rev-parse HEAD) \ +# --build-arg NODE_REV=master \ +# -t ghcr.io/saratomaz/cardano-node-tests-antithesis:latest . +# docker push ghcr.io/saratomaz/cardano-node-tests-antithesis:latest + +FROM nixos/nix:2.25.5 + +ARG GIT_REVISION +ARG NODE_REV=master + +ENV GIT_REVISION=${GIT_REVISION} +# Store the baked-in node revision for reference at runtime. +ENV BAKED_NODE_REV=${NODE_REV} + +# Configure Nix with IOG binary cache and required experimental features. +RUN mkdir -p /etc/nix && \ + printf 'extra-substituters = https://cache.iog.io\n\ +extra-trusted-public-keys = hydra.iohk.io:f/Ea+s+dFdN+3Y/G+FDgSq+a5NEWhJGzdjvKNGv0/EQ=\n\ +experimental-features = nix-command flakes\n\ +accept-flake-config = true\n' >> /etc/nix/nix.conf + +WORKDIR /work +COPY . /work/ + +# Pre-build cardano-node, cardano-submit-api, cardano-cli, and bech32 into /opt/cardano/. +# NODE_REV is fixed at image build time โ€” no network access is needed at runtime. +RUN mkdir -p /opt/cardano && \ + nix build \ + --accept-flake-config --no-write-lock-file \ + "github://github.com/IntersectMBO/cardano-node?ref=${NODE_REV}#cardano-node" \ + -o /opt/cardano/cardano-node && \ + nix build \ + --accept-flake-config --no-write-lock-file \ + "github://github.com/IntersectMBO/cardano-node?ref=${NODE_REV}#cardano-submit-api" \ + -o /opt/cardano/cardano-submit-api && \ + nix build \ + --accept-flake-config --no-write-lock-file \ + "github://github.com/IntersectMBO/cardano-node?ref=${NODE_REV}#cardano-cli" \ + -o /opt/cardano/cardano-cli && \ + nix build \ + --accept-flake-config --no-write-lock-file \ + "github://github.com/IntersectMBO/cardano-node?ref=${NODE_REV}#bech32" \ + -o /opt/cardano/bech32 + +# Pre-warm the testenv dev shell (pulls nixpkgs, postgres, uv, python313 into the +# nix store) and create the Python venv at /opt/tests-venv with all project +# dependencies installed. This is the same step regression.sh does at runtime +# but done here so no pip/uv network calls are needed in the Antithesis env. +RUN nix develop --accept-flake-config .#testenv --command \ + bash -c 'python3 -m venv /opt/tests-venv --prompt tests-venv && \ + . /opt/tests-venv/bin/activate && \ + cd /work && \ + uv sync --active --no-dev && \ + pip install "antithesis>=0.2.0,<0.3.0"' + +# Pre-warm the base dev shell (bash, coreutils, git, jq, โ€ฆ) so its store +# paths are cached and the regression.sh shebang resolves offline. +RUN nix develop --accept-flake-config .#base --command true + +# Create the Antithesis test driver directory and install the entry-points. +# singleton_driver_* files are run once per test run by Antithesis. +RUN mkdir -p /opt/antithesis/test/v1/quickstart && \ + cp /work/docker-antithesis/antithesis_run.sh \ + /opt/antithesis/test/v1/quickstart/singleton_driver_regression.sh && \ + chmod +x /opt/antithesis/test/v1/quickstart/singleton_driver_regression.sh && \ + chmod +x /work/docker-antithesis/node_run.sh diff --git a/docker-antithesis/Dockerfile.config b/docker-antithesis/Dockerfile.config new file mode 100644 index 000000000..ce0461dd7 --- /dev/null +++ b/docker-antithesis/Dockerfile.config @@ -0,0 +1,13 @@ +# Config image for Antithesis. +# +# Contains only the docker-compose.yaml that tells Antithesis how to run +# the services. Must be pushed to the Antithesis registry alongside the +# driver image. +# +# Build: +# docker build -f docker/Dockerfile.config \ +# -t us-central1-docker.pkg.dev//antithesis/config:latest . +# docker push us-central1-docker.pkg.dev//antithesis/config:latest + +FROM scratch +COPY docker/docker-compose.yaml /docker-compose.yaml diff --git a/docker-antithesis/Dockerfile.dockerignore b/docker-antithesis/Dockerfile.dockerignore new file mode 100644 index 000000000..ea2e6c752 --- /dev/null +++ b/docker-antithesis/Dockerfile.dockerignore @@ -0,0 +1,66 @@ +# Ignore unnecessary files during Docker build + +# Git +.git/ +.gitignore + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +dist/ +build/ +*.egg + +# Virtual environments +.venv/ +venv/ +ENV/ +env/ + +# Testing artifacts +run_workdir/ +.artifacts/ +.cli_coverage/ +.reports/ +allure-results/ +allure-results.tar.xz +testrun-report.* +*.log +*.json.log + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# Nix +result +result-* + +# Documentation +docs/_build/ +*.md + +# Temporary files +*.tmp +*.bak +.DS_Store + +# Scripts output +scripts/destination/ +scripts/destination_working/ + +# Coverage +.coverage +htmlcov/ +cli_coverage.json +requirements_coverage.json + +# CI specific +.bin/ diff --git a/docker-antithesis/README.md b/docker-antithesis/README.md new file mode 100644 index 000000000..e8767244f --- /dev/null +++ b/docker-antithesis/README.md @@ -0,0 +1,101 @@ +# Docker setup for cardano-node-tests (Antithesis) + +This directory contains the driver image and compose files for submitting +`cardano-node-tests` to Antithesis. + +## How it works + +Antithesis environments have **no internet access** at runtime, so all +dependencies are baked into the image at build time: + +- `Dockerfile` โ€” builds the driver image. At build time it: + 1. Pre-builds `cardano-node`, `cardano-submit-api`, `cardano-cli`, and + `bech32` from `NODE_REV` into `/opt/cardano/` via `nix build`. + 2. Pre-warms the `testenv` dev shell and creates the Python venv at + `/opt/tests-venv/` with all project dependencies installed. + 3. Pre-warms the `base` dev shell so the `regression.sh` shebang resolves + from the local nix store without network access. + 4. Installs `antithesis_run.sh` as the Antithesis test driver at + `/opt/antithesis/test/v1/quickstart/singleton_driver_regression.sh`. + +- `antithesis_run.sh` โ€” container entrypoint that: + 1. Forces nix into offline mode (`offline = true`). + 2. Exports `CARDANO_PREBUILT_DIR=/opt/cardano` and `_VENV_DIR=/opt/tests-venv` + so `regression.sh` skips all downloads and uses the pre-built artefacts. + 3. In multi-container mode (when `NODE_HOST` is set): + - Polls `NODE_HOST:NODE_PORT` until the node cluster reports ready. + - Starts a local `cardano-submit-api` in the driver container so that + submit-api tests can reach it via `localhost`. + - Starts a TCP proxy forwarding `localhost:` โ†’ + `NODE_HOST:` so `cardano-cli ping` tests work. + - Starts a local HTTP file server for anchor URLs used by governance + tests (`cardano-cli transaction build` fetches anchor hashes via HTTP). + 4. Emits the Antithesis `setup_complete` lifecycle signal. + 5. Hands off to `.github/regression.sh`. + +- `Dockerfile.config` โ€” builds the Antithesis config image (`FROM scratch`) + containing only `docker-compose.yaml`. + +- `docker-compose.yaml` โ€” two services: `node` (cardano-node cluster) and + `driver` (pytest). Both share a `cluster-state` Docker volume so the + driver accesses the node sockets without going over the network. An HTTP + health check on port 8090 provides cross-container traffic that satisfies + the Antithesis "Containers joined the Antithesis network" property. + +## Workflow + +### 1. Build and push the driver image + +```bash +docker build -f docker-antithesis/Dockerfile \ + --build-arg GIT_REVISION=$(git rev-parse HEAD) \ + --build-arg NODE_REV=master \ + -t ghcr.io/saratomaz/cardano-node-tests-antithesis:latest . + +docker push ghcr.io/saratomaz/cardano-node-tests-antithesis:latest +``` + +`NODE_REV` is locked at build time โ€” the same binaries are used every run +regardless of what is on the `master` branch when the container starts. + +### 2. Build and push the config image + +```bash +docker build -f docker-antithesis/Dockerfile.config \ + -t us-central1-docker.pkg.dev//antithesis/config:latest . + +docker push us-central1-docker.pkg.dev//antithesis/config:latest +``` + +### 3. Validate locally (internet-connected build, isolated network at runtime) + +```bash +docker compose -f docker-antithesis/docker-compose.yaml config +docker compose -f docker-antithesis/docker-compose.yaml up --build \ + --abort-on-container-exit --exit-code-from driver +``` + +To fully simulate the Antithesis no-internet constraint, run inside an +isolated network namespace on Linux: + +```bash +unshare -n docker compose -f docker-antithesis/docker-compose.yaml up +``` + +## Environment variables + +`NODE_REV` is baked into the image at build time and must **not** be set at +runtime. All other variables are passed through docker-compose as before. + +| Variable | Default | Description | +|--------------------|----------------|------------------------------------------------| +| `CARDANO_CLI_REV` | (built-in) | cardano-cli revision, empty = use node's | +| `DBSYNC_REV` | (disabled) | db-sync revision, empty = disabled | +| `RUN_TARGET` | `tests` | `tests`, `testpr`, or `testnets` | +| `MARKEXPR` | `smoke` | pytest marker expression | +| `SESSION_TIMEOUT` | `1h` | wall-clock limit passed to `timeout(1)` | +| `TESTNET_VARIANT` | `conway_fast` | cluster variant for `prepare_cluster_scripts` | +| `CLUSTERS_COUNT` | `1` | number of local cluster instances | +| `CLUSTER_ERA` | | e.g. `conway` | +| `PROTOCOL_VERSION` | | e.g. `11` | +| `UTXO_BACKEND` | | e.g. `disk`, `mem` | diff --git a/docker-antithesis/antithesis_run.sh b/docker-antithesis/antithesis_run.sh new file mode 100755 index 000000000..b85e404d4 --- /dev/null +++ b/docker-antithesis/antithesis_run.sh @@ -0,0 +1,247 @@ +#!/usr/bin/env bash +# Antithesis driver container entrypoint. +# +# Runs the full test suite without any network access by: +# 1. Forcing nix into offline mode (all store paths were pre-built into +# the image by docker-antithesis/Dockerfile). +# 2. Pointing regression.sh at the pre-built cardano binaries and Python +# venv so it skips all download / build steps. +# 3. When NODE_HOST is set (multi-container mode): waiting for the node +# container's health check on port 8090 before running tests, and +# setting DEV_CLUSTER_RUNNING=1 so pytest uses the pre-running cluster +# instead of starting its own. +# 3b. Starting a local cardano-submit-api in the driver container so that +# tests using submit_api can reach it via localhost. The test framework +# hard-codes http://localhost: for submit_api; since submit_api in +# the node container binds to 127.0.0.1 there (unreachable here), we +# run our own instance against the shared cluster-state socket. +# 3c. Starting a TCP proxy (Python, no extra packages) that forwards +# localhost: โ†’ NODE_HOST:. cardano-cli ping +# hardcodes --host localhost, so without this the ping tests fail in +# multi-container mode. +# 4. Emitting the Antithesis setup_complete lifecycle signal. +# 5. Handing off to regression.sh. +# +# Multi-container environment variables (set in docker-compose): +# NODE_HOST Hostname of the node container (default: unset). +# NODE_PORT Health check port on the node container (default: 8090). +# CLUSTER_STATE_DIR Mount point of the shared cluster-state volume +# (default: /cluster-state). +# +# This file is installed at: +# /opt/antithesis/test/v1/quickstart/singleton_driver_regression.sh +# and is also usable directly as the docker-compose command. + +set -Eeuo pipefail + +# --------------------------------------------------------------------------- +# 1. Force nix offline โ€” all required store paths were pre-built into the +# image. This prevents nix from attempting any network calls at runtime, +# which would fail inside the Antithesis environment. +# --------------------------------------------------------------------------- +echo "offline = true" >> /etc/nix/nix.conf + +# --------------------------------------------------------------------------- +# 2. Tell regression.sh to use the pre-built binaries and Python venv that +# were baked into the image at docker build time. +# --------------------------------------------------------------------------- +export CARDANO_PREBUILT_DIR=/opt/cardano +export _VENV_DIR=/opt/tests-venv + +_output_dir="${ANTITHESIS_OUTPUT_DIR:-/tmp/antithesis}" +mkdir -p "$_output_dir" + +# --------------------------------------------------------------------------- +# 3. Multi-container mode: wait for the node container and configure the +# driver to use the pre-running cluster. +# +# When NODE_HOST is set the driver polls the node's HTTP health endpoint +# (port 8090) until it responds "ready". This HTTP traffic is what makes +# both containers visible on the Antithesis network bridge. +# +# DEV_CLUSTER_RUNNING=1 tells pytest to skip cluster startup/shutdown and +# use the cluster already started by the node container. +# CARDANO_NODE_SOCKET_PATH_CI is pre-set to the shared volume socket path +# so regression.sh does not override it with its default workdir path. +# --------------------------------------------------------------------------- +if [ -n "${NODE_HOST:-}" ]; then + _node_port="${NODE_PORT:-8090}" + echo "Waiting for ${NODE_HOST}:${_node_port} to report ready..." + + _ready=0 + for _i in $(seq 1 120); do + # Use the venv's Python directly โ€” python3 is not in PATH outside a nix shell. + _resp="$("${_VENV_DIR}/bin/python3" -c " +import urllib.request, sys +try: + r = urllib.request.urlopen('http://${NODE_HOST}:${_node_port}/', timeout=5) + sys.stdout.write(r.read().decode()) +except Exception: + pass +" 2>/dev/null || true)" + if [ "$_resp" = "ready" ]; then + _ready=1 + break + fi + echo " attempt ${_i}/120: node reports '${_resp:-no response}', retrying in 5s..." + sleep 5 + done + + if [ "$_ready" -ne 1 ]; then + echo "ERROR: node container did not become ready within 10 minutes" >&2 + printf '{"antithesis_assert": {"type": "always", "condition": false, "display_name": "Node became ready", "message": "Node container did not become ready within 10 minutes", "details": {"node_host": "%s", "node_port": "%s"}, "location": {"function": "antithesis_run.sh", "file": "antithesis_run.sh", "begin_line": 1, "begin_column": 1, "class": ""}}}\n' \ + "${NODE_HOST:-}" "${_node_port:-8090}" >> "${ANTITHESIS_OUTPUT_DIR:-/tmp/antithesis}/sdk.jsonl" + exit 0 + fi + echo "Node is ready." + + CLUSTER_STATE_DIR="${CLUSTER_STATE_DIR:-/cluster-state}" + export DEV_CLUSTER_RUNNING=1 + export CLUSTERS_COUNT="${CLUSTERS_COUNT:-1}" + # Pre-set so regression.sh does not overwrite with its default workdir path. + export CARDANO_NODE_SOCKET_PATH_CI="${CLUSTER_STATE_DIR}/state-cluster0/bft1.socket" + + # ------------------------------------------------------------------------- + # 3b. Start a local cardano-submit-api so tests can reach it via localhost. + # + # start-cluster already generated state-cluster0/run-cardano-submit-api + # with the correct port, socket path, and testnet magic substituted in. + # We run it from CLUSTER_STATE_DIR so its relative paths resolve, and + # put the pre-built binary on PATH. + # ------------------------------------------------------------------------- + _submit_api_script="${CLUSTER_STATE_DIR}/state-cluster0/run-cardano-submit-api" + if [ -x "$_submit_api_script" ]; then + export PATH="/opt/cardano/cardano-submit-api/bin:${PATH}" + # Parse the port directly from the generated script so we are not + # tied to a specific cluster variant's port formula. + _submit_api_port=$(grep -oE -- '--port [0-9]+' "$_submit_api_script" | grep -oE '[0-9]+' | head -1) + + echo "Starting local cardano-submit-api on port ${_submit_api_port}..." + (cd "${CLUSTER_STATE_DIR}" && exec "${_submit_api_script}") & + _submit_api_pid=$! + + # Wait up to 30 s for the port to open. + _sa_ready=0 + for _i in $(seq 1 30); do + if (echo >/dev/tcp/127.0.0.1/"${_submit_api_port}") 2>/dev/null; then + _sa_ready=1 + echo "Local submit_api is ready." + break + fi + echo " waiting for local submit_api (${_i}/30)..." + sleep 1 + done + if [ "$_sa_ready" -ne 1 ]; then + echo "WARNING: local submit_api did not start within 30 s; submit_api tests will fail." >&2 + fi + unset _sa_ready _i + else + echo "WARNING: ${_submit_api_script} not found; submit_api tests will fail." >&2 + fi + unset _submit_api_script + + # ------------------------------------------------------------------------- + # 3c. Start a TCP proxy so ping tests using --host localhost can reach the + # cardano-node P2P port in the node container. + # + # cardano-cli ping hardcodes --host localhost --port . In + # multi-container mode the node's TCP P2P port lives in the node + # container and is unreachable on the driver's localhost. We forward + # localhost: โ†’ NODE_HOST:. + # + # pool1 port = PORTS_BASE + 5 (see cardonnay local_scripts.py). + # ------------------------------------------------------------------------- + _pool1_port=$(( ${PORTS_BASE:-23000} + 5 )) + echo "Starting TCP proxy localhost:${_pool1_port} โ†’ ${NODE_HOST}:${_pool1_port}..." + "${_VENV_DIR}/bin/python3" -c " +import socket, threading +def relay(a, b): + try: + while True: + d = a.recv(4096) + if not d: break + b.sendall(d) + except OSError: pass + finally: + for s in (a, b): + try: s.close() + except OSError: pass +def fwd(src, host, port): + try: dst = socket.create_connection((host, port), timeout=10) + except OSError: src.close(); return + for args in ((src, dst), (dst, src)): + threading.Thread(target=relay, args=args, daemon=True).start() +srv = socket.socket() +srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) +srv.bind(('127.0.0.1', ${_pool1_port})) +srv.listen(32) +while True: + conn, _ = srv.accept() + threading.Thread(target=fwd, args=(conn, '${NODE_HOST}', ${_pool1_port}), daemon=True).start() +" & + _proxy_pid=$! + unset _pool1_port + + # ------------------------------------------------------------------------- + # 3d. Start a local HTTP file server for anchor URLs. + # + # cardano-cli transaction build fetches and verifies anchor hashes at + # http://localhost:/p/. The cluster's webserver + # runs in the node container but binds to 127.0.0.1 there (Python 3.11+ + # http.server default), making it unreachable from the driver container. + # Since the webserver directory lives on the shared cluster-state volume, + # running our own http.server in the driver container serves the same + # files without going over the network bridge. + # + # webserver port = submit_api port + 2 (last_port in cardonnay; + # submit_api = last_port - 2). + # ------------------------------------------------------------------------- + # Derive webserver port from the already-parsed submit_api port. + # Fall back to PORTS_BASE + 99 (correct for a 3-pool cluster). + _webserver_port=$(( ${_submit_api_port:-$(( ${PORTS_BASE:-23000} + 97 ))} + 2 )) + unset _submit_api_port + + _webserver_dir="${CLUSTER_STATE_DIR}/state-cluster0/webserver" + mkdir -p "${_webserver_dir}" + echo "Starting local HTTP file server on port ${_webserver_port} (dir: ${_webserver_dir})..." + "${_VENV_DIR}/bin/python3" -m http.server \ + --bind 127.0.0.1 \ + --directory "${_webserver_dir}" \ + "${_webserver_port}" & + _webserver_pid=$! + unset _webserver_port _webserver_dir + + # Kill all background processes when this script exits. + trap 'kill "${_submit_api_pid:-}" "${_proxy_pid:-}" "${_webserver_pid:-}" 2>/dev/null || true' EXIT +fi +# _submit_api_pid, _proxy_pid, _webserver_pid are kept in scope for the EXIT trap above. + +# --------------------------------------------------------------------------- +# 4. Emit the Antithesis setup_complete signal. +# --------------------------------------------------------------------------- +printf '{"antithesis_setup": {"status": "complete", "details": {"info": ["cardano-node-tests driver ready, node_rev=%s"]}}}\n' \ + "${BAKED_NODE_REV:-unknown}" >> "$_output_dir/sdk.jsonl" +unset _output_dir + +# --------------------------------------------------------------------------- +# 5. Hand off to regression.sh. The shebang in that script will invoke +# `nix develop .#base` which now resolves entirely from the local nix +# store (offline = true). +# +# Do not exec directly: Antithesis treats any non-zero container exit +# code (other than 137/143) as an error property violation. Test +# failures are expected and communicated via SDK assertions, not the +# process exit code. Always exit 0 so the container is not flagged. +# +# Ignore SIGINT in this driver so that SESSION_TIMEOUT's `timeout +# --foreground --signal=INT` does not kill PID 1 before exit 0 is +# reached. regression.sh sets its own trap and handles SIGINT +# independently. +# --------------------------------------------------------------------------- +trap '' SIGINT +set +e +/work/.github/regression.sh +_rc=$? +set -e +echo "regression.sh finished with exit code ${_rc}" +exit 0 diff --git a/docker-antithesis/docker-compose.yaml b/docker-antithesis/docker-compose.yaml new file mode 100644 index 000000000..b8a8bddca --- /dev/null +++ b/docker-antithesis/docker-compose.yaml @@ -0,0 +1,87 @@ +# Docker Compose for Antithesis test submission. +# +# Two services share a cluster-state volume: +# +# node โ€” starts the cardano-node cluster (system under test). +# Serves a health check on port 8090 so the driver can detect +# when the cluster is ready. The traffic between driver and node +# over the antithesis-net bridge satisfies the Antithesis +# "Containers joined the Antithesis network" property. +# +# driver โ€” waits for the node health check, then runs the pytest test +# suite against the pre-running cluster via DEV_CLUSTER_RUNNING=1. +# +# Both images must be pre-built with all cardano binaries and the Python venv +# baked in (see docker-antithesis/Dockerfile). No internet access is available at +# runtime inside the Antithesis environment. +# +# Push images to the Antithesis registry before submitting: +# docker push us-central1-docker.pkg.dev//antithesis/cardano-node-tests:latest +# docker push us-central1-docker.pkg.dev//antithesis/config:latest +# +# Validate locally (requires internet โ€” use an isolated netns to simulate +# the Antithesis environment): +# docker compose -f docker-antithesis/docker-compose.yaml config +# docker compose -f docker-antithesis/docker-compose.yaml up --build + +networks: + antithesis-net: + driver: bridge + +volumes: + cluster-state: + +services: + node: + image: ghcr.io/saratomaz/cardano-node-tests-antithesis:latest + build: + context: .. + dockerfile: docker-antithesis/Dockerfile + command: ["/work/docker-antithesis/node_run.sh"] + networks: + - antithesis-net + volumes: + - cluster-state:/cluster-state + environment: + - CLUSTER_STATE_DIR=/cluster-state + - TESTNET_VARIANT=${TESTNET_VARIANT:-conway_fast} + healthcheck: + test: + - "CMD" + - "/opt/tests-venv/bin/python3" + - "-c" + - "import urllib.request; exit(0 if urllib.request.urlopen('http://localhost:8090/', timeout=5).read() == b'ready' else 1)" + interval: 15s + timeout: 6s + retries: 60 + start_period: 60s + + driver: + image: ghcr.io/saratomaz/cardano-node-tests-antithesis:latest + build: + context: .. + dockerfile: docker-antithesis/Dockerfile + # antithesis_run.sh sets nix offline, waits for the node health check, + # exports DEV_CLUSTER_RUNNING=1, emits setup_complete, then hands off + # to regression.sh. + command: ["/work/docker-antithesis/antithesis_run.sh"] + networks: + - antithesis-net + depends_on: + - node + volumes: + - cluster-state:/cluster-state + environment: + - CLUSTER_STATE_DIR=/cluster-state + - NODE_HOST=node + - NODE_PORT=8090 + # NODE_REV is baked into the image at build time; do not override here. + - CARDANO_CLI_REV=${CARDANO_CLI_REV:-} + - DBSYNC_REV=${DBSYNC_REV:-} + - RUN_TARGET=${RUN_TARGET:-tests} + - MARKEXPR=${MARKEXPR:-smoke} + - SESSION_TIMEOUT=${SESSION_TIMEOUT:-1h} + - CLUSTERS_COUNT=${CLUSTERS_COUNT:-1} + - CLUSTER_ERA=${CLUSTER_ERA:-} + - PROTOCOL_VERSION=${PROTOCOL_VERSION:-} + - UTXO_BACKEND=${UTXO_BACKEND:-} diff --git a/docker-antithesis/node_run.sh b/docker-antithesis/node_run.sh new file mode 100755 index 000000000..343e5f21d --- /dev/null +++ b/docker-antithesis/node_run.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +# Antithesis node container entrypoint. +# +# 1. Starts the cardano-node cluster on the shared 'cluster-state' volume so +# the driver container can reach the node sockets without going over the +# network (Unix socket on a shared Docker volume). +# 2. Serves a lightweight HTTP health check on port 8090 over the Antithesis +# network bridge. Returns "ready" once the cluster socket exists. +# This cross-container HTTP traffic satisfies the Antithesis +# "Containers joined the Antithesis network" property. +# +# Environment variables: +# CLUSTER_STATE_DIR Mount point of the shared cluster-state volume +# (default: /cluster-state). +# TESTNET_VARIANT Cluster variant passed to prepare_cluster_scripts +# (default: conway_fast). + +set -Eeuo pipefail + +# --------------------------------------------------------------------------- +# 1. Force nix offline โ€” all store paths are pre-built into the image. +# --------------------------------------------------------------------------- +echo "offline = true" >> /etc/nix/nix.conf + +# --------------------------------------------------------------------------- +# 2. Point at pre-built binaries and Python venv. +# All variables are exported so the inner nix shell inherits them. +# --------------------------------------------------------------------------- +export CARDANO_PREBUILT_DIR=/opt/cardano +export _VENV_DIR=/opt/tests-venv +export _PATH_PREPEND="/opt/cardano/cardano-node/bin:/opt/cardano/cardano-submit-api/bin:/opt/cardano/cardano-cli/bin:/opt/cardano/bech32/bin" + +# --------------------------------------------------------------------------- +# 3. Cluster state lives on the shared volume so the driver can read sockets. +# --------------------------------------------------------------------------- +CLUSTER_STATE_DIR="${CLUSTER_STATE_DIR:-/cluster-state}" +export _INSTANCE_NUM=0 +export _STATE_CLUSTER="${CLUSTER_STATE_DIR}/state-cluster${_INSTANCE_NUM}" +export _SCRIPTS_DEST="${CLUSTER_STATE_DIR}/startup_scripts" +export CLUSTER_STATE_DIR + +# Local clusters (conway_fast, etc.) use bft1.socket. +export CARDANO_NODE_SOCKET_PATH="${_STATE_CLUSTER}/bft1.socket" + +export _output_dir="${ANTITHESIS_OUTPUT_DIR:-/tmp/antithesis}" +mkdir -p "$_output_dir" "${CLUSTER_STATE_DIR}" + +# --------------------------------------------------------------------------- +# Clean up any stale cluster state left by a previous container run on the +# same Docker volume. The previous node process is gone (fresh container), +# so only the socket file and lock files remain โ€” start-cluster refuses to +# run if it finds them. Remove before starting the health check so the +# health endpoint never serves a stale socket as "ready". +# --------------------------------------------------------------------------- +rm -rf "${_STATE_CLUSTER:?}" "${_SCRIPTS_DEST:?}" + +# --------------------------------------------------------------------------- +# 4. Health check server on port 8090 (Antithesis network bridge traffic). +# Returns HTTP 200 "ready" once the cluster socket file exists, +# 503 "starting" while the cluster is still coming up. +# Uses the venv Python directly โ€” python3 is not in PATH outside a nix shell. +# --------------------------------------------------------------------------- +"${_VENV_DIR}/bin/python3" -c " +import os, socket as _s +_sock_path = os.environ.get('CARDANO_NODE_SOCKET_PATH', '') +server = _s.socket(_s.AF_INET, _s.SOCK_STREAM) +server.setsockopt(_s.SOL_SOCKET, _s.SO_REUSEADDR, 1) +server.bind(('0.0.0.0', 8090)) +server.listen(64) +while True: + conn, _ = server.accept() + ready = os.path.exists(_sock_path) + body = b'ready' if ready else b'starting' + status = b'200 OK' if ready else b'503 Service Unavailable' + conn.sendall(b'HTTP/1.1 ' + status + b'\r\nContent-Length: ' + str(len(body)).encode() + b'\r\n\r\n' + body) + conn.close() +" & +_health_pid=$! +trap 'kill "$_health_pid" 2>/dev/null || true' EXIT + +# --------------------------------------------------------------------------- +# 5. Prepare cluster startup scripts and run the cluster. +# The inner script uses single quotes so the outer shell does NOT expand +# variables โ€” the nix shell inherits all exported vars above and the inner +# bash expands them from its environment. This avoids PATH corruption from +# nested quoting (single quotes inside a double-quoted string are literal +# and prevent $PATH from being expanded in the inner shell). +# --------------------------------------------------------------------------- +export _testnet_variant="${TESTNET_VARIANT:-conway_fast}" + +set +e +# shellcheck disable=SC2016 +nix develop --accept-flake-config .#testenv --command bash -c ' + set -euo pipefail + . "$_VENV_DIR/bin/activate" + export PATH="$_PATH_PREPEND:$PATH" + + # Instantiate cluster scripts for instance $_INSTANCE_NUM into the + # shared volume. --clean removes any previous attempt. + python3 -m cardano_node_tests.prepare_cluster_scripts \ + --dest-dir "$_SCRIPTS_DEST" \ + --testnet-variant "$_testnet_variant" \ + --instance-num "$_INSTANCE_NUM" \ + --clean + + # Patch pool1 to bind on all interfaces so the driver container can reach + # its P2P port over the Docker bridge network. By default cardonnay + # generates --host-addr 127.0.0.1 for all local cluster nodes; pool1 is + # the one the ping tests connect to via TCP. + if [ -f "$_SCRIPTS_DEST/cardano-node-pool1" ]; then + sed -i "s/--host-addr 127.0.0.1/--host-addr 0.0.0.0/g" \ + "$_SCRIPTS_DEST/cardano-node-pool1" + fi + + # start-cluster must run from the parent of the state-cluster directory. + cd "$CLUSTER_STATE_DIR" + "$_SCRIPTS_DEST/start-cluster" + + # shellcheck disable=SC2016 + printf '"'"'{"antithesis_setup": {"status": "complete", "details": {"info": ["cardano-node cluster ready, socket=%s"]}}}\n'"'"' \ + "$CARDANO_NODE_SOCKET_PATH" >> "$_output_dir/sdk.jsonl" + + # Keep the cluster alive until the container is stopped. + tail -f /dev/null +' +_rc=$? +set -e + +echo "node_run.sh exiting with code ${_rc}" +exit 0