diff --git a/.github/scripts/run_tests_with_backtrace.sh b/.github/scripts/run_tests_with_backtrace.sh new file mode 100755 index 00000000..8765c59c --- /dev/null +++ b/.github/scripts/run_tests_with_backtrace.sh @@ -0,0 +1,220 @@ +#!/usr/bin/env bash +# Run a test binary under debug CI. On fatal signals, print post-mortem +# backtraces from core dumps when available. Linux also runs under catchsegv +# so a partial backtrace appears in the log even without a core file. +# +# When LIVEKIT_TEST_STALL_SECONDS is set to a positive integer, a watchdog +# monitors test output and dumps live thread backtraces if the log goes silent +# for that many seconds (integration-test hang diagnostics on linux-x64). +set -uo pipefail + +usage() { + echo "Usage: $0 [gtest-args...]" >&2 + exit 2 +} + +[[ $# -ge 1 ]] || usage + +binary=$1 +shift + +if [[ ! -x "$binary" ]]; then + echo "Error: not executable: $binary" >&2 + exit 2 +fi + +binary_abs=$(cd "$(dirname "$binary")" && pwd)/$(basename "$binary") +core_dir="${RUNNER_TEMP:-/tmp}/livekit-test-cores" +mkdir -p "$core_dir" + +ulimit -c unlimited || true + +if [[ "$(uname -s)" == "Linux" ]]; then + echo "${core_dir}/core.%e.%p" | sudo tee /proc/sys/kernel/core_pattern >/dev/null || true +fi + +if [[ "$(uname -s)" == "Darwin" ]]; then + ulimit -c unlimited || true + sudo sysctl -w kern.coredump=1 >/dev/null 2>&1 || true + sudo mkdir -p /cores 2>/dev/null || true + sudo chmod 1777 /cores 2>/dev/null || true +fi + +dump_macos_crash_reports() { + local binary_name + binary_name=$(basename "${binary_abs}") + echo "=== macOS DiagnosticReports for ${binary_name} ===" + local found=0 + for report_dir in "${HOME}/Library/Logs/DiagnosticReports" "/Library/Logs/DiagnosticReports"; do + if [[ ! -d "${report_dir}" ]]; then + continue + fi + while IFS= read -r report; do + found=1 + echo "Crash report: ${report}" + # .ips files are JSON-ish; print the first 200 lines for the CI log. + head -n 200 "${report}" || true + done < <(find "${report_dir}" -maxdepth 1 -name "${binary_name}*.ips" -type f -print 2>/dev/null | sort -r | head -3) + done + if ((found == 0)); then + echo "No DiagnosticReports .ips found for ${binary_name}" + fi +} + +dump_live_backtraces() { + local test_pid=$1 + local reason=$2 + + echo "=== live backtrace diagnostics (${reason}, pid ${test_pid}) ===" + + if [[ "$(uname -s)" == "Linux" ]]; then + if command -v gdb >/dev/null 2>&1; then + gdb -batch \ + -ex 'set pagination off' \ + -ex 'thread apply all bt full' \ + -p "${test_pid}" || true + else + echo "gdb not available; install gdb for live backtraces" + fi + return 0 + fi + + if [[ "$(uname -s)" == "Darwin" ]]; then + if command -v sample >/dev/null 2>&1; then + sample "${test_pid}" 5 -mayDie 2>&1 || true + fi + if command -v lldb >/dev/null 2>&1; then + lldb -p "${test_pid}" --batch -o 'thread backtrace all' -o 'detach' -o 'quit' 2>&1 || true + else + echo "lldb not available" + fi + fi +} + +dump_backtraces() { + local test_pid=$1 + local status=$2 + + echo "=== crash diagnostics (exit status ${status}, pid ${test_pid}) ===" + + if [[ "$(uname -s)" == "Linux" ]]; then + local core="" + core=$(find "$core_dir" -maxdepth 1 -name 'core.*' -type f 2>/dev/null | sort -r | head -1) + if [[ -z "$core" ]]; then + core=$(find /tmp -maxdepth 1 -name 'core.*' -type f 2>/dev/null | sort -r | head -1) + fi + if [[ -n "$core" && -f "$core" ]]; then + echo "Core file: ${core}" + if command -v gdb >/dev/null 2>&1; then + gdb -batch \ + -ex 'set pagination off' \ + -ex 'thread apply all bt full' \ + "${binary_abs}" "${core}" || true + else + echo "gdb not available; install gdb for post-mortem backtraces" + fi + cp -a "${core}" "${core_dir}/" 2>/dev/null || true + basename "${core}" >"${core_dir}/last-core.name" + else + echo "No core file found under ${core_dir} or /tmp" + fi + return 0 + fi + + if [[ "$(uname -s)" == "Darwin" ]]; then + local core="" + for candidate in "/cores/core.${test_pid}" "/cores/core.${test_pid}.dump"; do + if [[ -f "${candidate}" ]]; then + core=${candidate} + break + fi + done + if [[ -z "$core" ]]; then + core=$(find /cores -maxdepth 1 -name "core.*" -type f 2>/dev/null | sort -r | head -1) + fi + if [[ -n "$core" && -f "$core" ]]; then + echo "Core file: ${core}" + if command -v lldb >/dev/null 2>&1; then + lldb -b -c "${core}" -o 'thread backtrace all' -o 'quit' -- "${binary_abs}" || true + else + echo "lldb not available" + fi + cp -a "${core}" "${core_dir}/" 2>/dev/null || true + basename "${core}" >"${core_dir}/last-core.name" + else + echo "No core file found under /cores for pid ${test_pid}" + fi + dump_macos_crash_reports + fi +} + +run_test() { + if [[ "$(uname -s)" == "Linux" ]] && command -v catchsegv >/dev/null 2>&1; then + catchsegv "${binary_abs}" "$@" + else + "${binary_abs}" "$@" + fi +} + +start_stall_watchdog() { + local test_pid=$1 + local log_file=$2 + local stall_limit=$3 + + ( + local last_size=-1 + local stall=0 + while kill -0 "${test_pid}" 2>/dev/null; do + local size + size=$(wc -c <"${log_file}" 2>/dev/null || echo 0) + if [[ "${size}" == "${last_size}" ]]; then + stall=$((stall + 5)) + else + stall=0 + last_size=${size} + fi + if ((stall >= stall_limit)); then + echo "=== TEST HANG DETECTED: no output for ${stall}s (pid ${test_pid}) ===" + echo "--- last log lines ---" + tail -n 40 "${log_file}" || true + dump_live_backtraces "${test_pid}" "stall ${stall}s" + kill -ABRT "${test_pid}" 2>/dev/null || kill -TERM "${test_pid}" 2>/dev/null || true + break + fi + sleep 5 + done + ) & + echo $! +} + +stall_limit=${LIVEKIT_TEST_STALL_SECONDS:-0} +log_file="${RUNNER_TEMP:-/tmp}/livekit-test-output.log" + +set +e +if ((stall_limit > 0)); then + : >"${log_file}" + run_test "$@" >"${log_file}" 2>&1 & + test_pid=$! + watchdog_pid=$(start_stall_watchdog "${test_pid}" "${log_file}" "${stall_limit}") + wait "${test_pid}" + status=$? + kill "${watchdog_pid}" 2>/dev/null || true + wait "${watchdog_pid}" 2>/dev/null || true + cat "${log_file}" +else + run_test "$@" & + test_pid=$! + wait "${test_pid}" + status=$? +fi +set -e + +if ((status > 128)); then + signal=$((status - 128)) + echo "Test process ${test_pid} terminated by signal ${signal}" + dump_backtraces "${test_pid}" "${status}" +elif ((status != 0)); then + echo "Test process exited with status ${status}" +fi + +exit "${status}" diff --git a/.github/scripts/stage_crash_diagnostics.sh b/.github/scripts/stage_crash_diagnostics.sh new file mode 100755 index 00000000..47cb1835 --- /dev/null +++ b/.github/scripts/stage_crash_diagnostics.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# Collect unstripped test binaries, shared libraries, and core dumps for upload. +set -euo pipefail + +build_dir=${1:?usage: stage_crash_diagnostics.sh } +staging="${RUNNER_TEMP}/crash-diagnostics" + +rm -rf "${staging}" +mkdir -p "${staging}/bin" "${staging}/lib" "${staging}/cores" + +shopt -s nullglob +for bin in "${build_dir}"/bin/livekit_*; do + if [[ -f "${bin}" && -x "${bin}" ]]; then + cp -a "${bin}" "${staging}/bin/" + fi +done + +for lib in "${build_dir}"/lib/liblivekit.*; do + if [[ -f "${lib}" ]]; then + cp -a "${lib}" "${staging}/lib/" + fi +done + +while IFS= read -r -d '' ffi_lib; do + cp -a "${ffi_lib}" "${staging}/lib/" +done < <(find client-sdk-rust/target/debug -name 'liblivekit_ffi.*' -print0 2>/dev/null) + +core_dir="${RUNNER_TEMP}/livekit-test-cores" +if [[ -d "${core_dir}" ]]; then + find "${core_dir}" -maxdepth 1 -name 'core.*' -type f -exec cp -a {} "${staging}/cores/" \; 2>/dev/null || true +fi + +if [[ "$(uname -s)" == "Darwin" && -d /cores ]]; then + find /cores -maxdepth 1 -name 'core.*' -type f -exec cp -a {} "${staging}/cores/" \; 2>/dev/null || true +fi + +if [[ "$(uname -s)" == "Darwin" ]]; then + mkdir -p "${staging}/crash-reports" + for report_dir in "${HOME}/Library/Logs/DiagnosticReports" "/Library/Logs/DiagnosticReports"; do + if [[ -d "${report_dir}" ]]; then + find "${report_dir}" -maxdepth 1 -name '*.ips' -type f -exec cp -a {} "${staging}/crash-reports/" \; 2>/dev/null || true + fi + done +fi + +echo "Staged crash diagnostics under ${staging}:" +find "${staging}" -type f -print diff --git a/.github/workflows/builds.yml b/.github/workflows/builds.yml index 6a218178..aade2a9f 100644 --- a/.github/workflows/builds.yml +++ b/.github/workflows/builds.yml @@ -94,7 +94,7 @@ jobs: - name: Setup vcpkg (Windows only) if: runner.os == 'Windows' - uses: lukka/run-vcpkg@b1a0dd252f06b9e25b3c022a9a03bd7a427fb6a2 # v11.5 + uses: lukka/run-vcpkg@6fe69898af670ac05f4a8427cc5cff4fb361cee5 # v11.5 with: vcpkgGitCommitId: ${{ env.VCPKG_GIT_COMMIT }} @@ -125,7 +125,7 @@ jobs: # ---------- Rust toolchain ---------- - name: Install Rust (stable) - uses: dtolnay/rust-toolchain@67ef31d5b988238dd797d409d6f9574278e20537 + uses: dtolnay/rust-toolchain@3c5f7ea28cd621ae0bf5283f0e981fb97b8a7af9 with: toolchain: stable diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8f64c130..b483a003 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,6 +62,7 @@ jobs: - vcpkg.json - .github/workflows/ci.yml - .github/workflows/tests.yml + - .github/workflows/nightly.yml docs: - README.md - include/** diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml new file mode 100644 index 00000000..739381c2 --- /dev/null +++ b/.github/workflows/nightly.yml @@ -0,0 +1,221 @@ +name: Nightly + +on: + schedule: + - cron: "23 7 * * *" + workflow_dispatch: + # TEMPORARY: enables validating this new workflow from the PR before it exists + # on the default branch. Remove this pull_request trigger before merging. + pull_request: + types: [opened, reopened, synchronize, ready_for_review] + branches: ["main"] + +permissions: + contents: read + actions: read + packages: write + +# concurrency: +# group: nightly-${{ github.ref }} +# cancel-in-progress: false + +jobs: + debug-tests: + name: Debug Tests + uses: ./.github/workflows/tests.yml + with: + build_type: debug + unit_repeat: ${{ github.event_name == 'pull_request' && 1 || 100 }} + integration_repeat: 10 + integration_rust_log: info,livekit_ffi::server=debug + run_stress_tests: true + stress_repeat: 1 + unit_timeout_minutes: 60 + integration_timeout_minutes: 120 + stress_timeout_minutes: 120 + job_timeout_minutes: 180 + artifact_retention_days: 14 + run_coverage: false + secrets: inherit + + cpp-checks: + name: C++ Checks + uses: ./.github/workflows/cpp-checks.yml + + generate-docs: + name: Generate Docs + uses: ./.github/workflows/generate-docs.yml + with: + upload_artifact: false + + docker-images: + name: Docker Images + uses: ./.github/workflows/docker-images.yml + with: + push_images: ${{ github.event_name != 'pull_request' }} + push_tag: nightly + secrets: inherit + + sanitizer: + name: Sanitizer Checks + runs-on: ubuntu-latest + timeout-minutes: 90 + env: + CARGO_TERM_COLOR: always + CARGO_INCREMENTAL: "0" + RUST_BACKTRACE: full + ASAN_OPTIONS: detect_leaks=0:halt_on_error=1:symbolize=1:print_stacktrace=1 + UBSAN_OPTIONS: halt_on_error=1:print_stacktrace=1 + + steps: + - name: Checkout (with submodules) + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + submodules: recursive + fetch-depth: 1 + + - name: Pull LFS files + run: git lfs pull + + - name: Prepare CI test scripts + run: | + chmod +x .github/scripts/run_tests_with_backtrace.sh + chmod +x .github/scripts/stage_crash_diagnostics.sh + + - name: Install deps + run: | + set -eux + sudo apt-get update + sudo apt-get install -y \ + build-essential cmake ninja-build pkg-config \ + llvm-dev libclang-dev clang \ + libva-dev libdrm-dev libgbm-dev libx11-dev libgl1-mesa-dev \ + libxext-dev libxcomposite-dev libxdamage-dev libxfixes-dev \ + libxrandr-dev libxi-dev libxkbcommon-dev \ + libasound2-dev libpulse-dev \ + libssl-dev \ + libprotobuf-dev protobuf-compiler \ + libabsl-dev \ + libwayland-dev libdecor-0-dev \ + jq + + - name: Install Rust (stable) + uses: dtolnay/rust-toolchain@3c5f7ea28cd621ae0bf5283f0e981fb97b8a7af9 + with: + toolchain: stable + + - name: Set build environment + run: | + LLVM_VERSION=$(llvm-config --version | cut -d. -f1) + { + echo "LIBCLANG_PATH=/usr/lib/llvm-${LLVM_VERSION}/lib" + echo "CXXFLAGS=-Wno-deprecated-declarations -fno-omit-frame-pointer" + echo "CFLAGS=-Wno-deprecated-declarations -fno-omit-frame-pointer" + } >> "$GITHUB_ENV" + + - name: Configure sanitizer build + run: | + cmake --preset linux-debug-tests \ + -DCMAKE_C_FLAGS="-Wno-deprecated-declarations -fsanitize=address,undefined -fno-omit-frame-pointer" \ + -DCMAKE_CXX_FLAGS="-Wno-deprecated-declarations -fsanitize=address,undefined -fno-omit-frame-pointer" \ + -DCMAKE_EXE_LINKER_FLAGS="-fsanitize=address,undefined" \ + -DCMAKE_SHARED_LINKER_FLAGS="-fsanitize=address,undefined" + + - name: Build sanitizer tests + run: cmake --build build-debug --target livekit_unit_tests livekit_integration_tests --parallel 2 + + - name: Run sanitizer unit tests + timeout-minutes: 20 + run: | + .github/scripts/run_tests_with_backtrace.sh \ + build-debug/bin/livekit_unit_tests \ + --gtest_output=xml:build-debug/sanitizer-unit-test-results.xml + + - name: Start livekit-server + id: livekit_server + uses: livekit/dev-server-action@61e2b4dcb170dd3591e0c9b0db3c3fe5db93b500 + continue-on-error: true + with: + github-token: ${{ github.token }} + + - name: Start livekit-server fallback + if: steps.livekit_server.outcome == 'failure' + id: livekit_server_fallback + shell: bash + env: + GH_TOKEN: ${{ github.token }} + run: | + set -euxo pipefail + tag="$( + gh api repos/livekit/livekit/releases \ + --jq 'limit(1; .[] | select([.assets[].name] | any(endswith("_linux_amd64.tar.gz"))) | .tag_name)' + )" + gh release download "${tag}" \ + --repo livekit/livekit \ + --pattern "*_linux_amd64.tar.gz" \ + --output "$RUNNER_TEMP/livekit-server-archive" + tar -xzf "$RUNNER_TEMP/livekit-server-archive" -C "$RUNNER_TEMP" + chmod +x "$RUNNER_TEMP/livekit-server" + cat > "$RUNNER_TEMP/livekit.yaml" <<'EOF' + logging: { json: true } + EOF + "$RUNNER_TEMP/livekit-server" --config "$RUNNER_TEMP/livekit.yaml" --dev > "$RUNNER_TEMP/livekit.jsonl" 2>&1 & + echo "log-path=$RUNNER_TEMP/livekit.jsonl" >> "$GITHUB_OUTPUT" + for _ in $(seq 1 30); do + if [[ "$(curl -fsS http://localhost:7880/ || true)" == "OK" ]]; then + exit 0 + fi + sleep 1 + done + exit 1 + + - name: Install livekit-cli + shell: bash + run: curl -sSL https://get.livekit.io/cli | bash + + - name: Run sanitizer integration tests (lifecycle subset) + timeout-minutes: 30 + shell: bash + run: | + set -euo pipefail + source .token_helpers/set_data_track_test_tokens.bash + .github/scripts/run_tests_with_backtrace.sh \ + build-debug/bin/livekit_integration_tests \ + --gtest_filter='PlatformAudioIntegrationTest.*:DataTrackE2ETest.UnpublishUpdatesPublishedStateEndToEnd:DataTrackPayloads/DataTrackTransportTest.PublishesAndReceivesFramesEndToEnd/MultiPacket' \ + --gtest_repeat=10 \ + --gtest_recreate_environments_when_repeating=1 \ + --gtest_output=xml:build-debug/sanitizer-integration-test-results.xml + + - name: Dump livekit-server log on failure + if: failure() + shell: bash + run: | + log_path="${{ steps.livekit_server.outputs.log-path }}" + if [[ -z "$log_path" ]]; then + log_path="${{ steps.livekit_server_fallback.outputs.log-path }}" + fi + tail -n 500 "$log_path" || true + + - name: Stage crash diagnostics + if: failure() + run: .github/scripts/stage_crash_diagnostics.sh build-debug + + - name: Upload sanitizer test results + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: sanitizer-test-results + path: | + build-debug/sanitizer-unit-test-results.xml + build-debug/sanitizer-integration-test-results.xml + if-no-files-found: ignore + retention-days: 14 + + - name: Upload sanitizer crash diagnostics + if: failure() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: sanitizer-crash-diagnostics + path: ${{ runner.temp }}/crash-diagnostics/ + if-no-files-found: ignore + retention-days: 14 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 02139930..1d70a431 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -2,8 +2,133 @@ name: Tests # Called by top-level ci.yml on: - workflow_call: {} - workflow_dispatch: {} + workflow_call: + inputs: + build_type: + description: Debug or release test build. + required: false + type: string + default: release + unit_repeat: + description: Number of times to repeat unit tests. + required: false + type: number + default: 1 + integration_repeat: + description: Number of times to repeat integration tests. + required: false + type: number + default: 1 + integration_rust_log: + description: RUST_LOG value for the integration test step. + required: false + type: string + default: metrics=debug + run_stress_tests: + description: Run stress tests that require LiveKit server setup. + required: false + type: boolean + default: false + stress_repeat: + description: Number of times to repeat stress tests. + required: false + type: number + default: 1 + unit_timeout_minutes: + description: Unit test step timeout in minutes. + required: false + type: number + default: 10 + integration_timeout_minutes: + description: Integration test step timeout in minutes. + required: false + type: number + default: 5 + stress_timeout_minutes: + description: Stress test step timeout in minutes. + required: false + type: number + default: 20 + job_timeout_minutes: + description: Matrix test job timeout in minutes. + required: false + type: number + default: 60 + artifact_retention_days: + description: Test artifact retention in days. + required: false + type: number + default: 7 + run_coverage: + description: Run the Linux coverage job. + required: false + type: boolean + default: true + workflow_dispatch: + inputs: + build_type: + description: Debug or release test build. + required: false + type: choice + options: + - release + - debug + default: release + unit_repeat: + description: Number of times to repeat unit tests. + required: false + type: number + default: 1 + integration_repeat: + description: Number of times to repeat integration tests. + required: false + type: number + default: 1 + integration_rust_log: + description: RUST_LOG value for the integration test step. + required: false + type: string + default: metrics=debug + run_stress_tests: + description: Run stress tests that require LiveKit server setup. + required: false + type: boolean + default: false + stress_repeat: + description: Number of times to repeat stress tests. + required: false + type: number + default: 1 + unit_timeout_minutes: + description: Unit test step timeout in minutes. + required: false + type: number + default: 10 + integration_timeout_minutes: + description: Integration test step timeout in minutes. + required: false + type: number + default: 5 + stress_timeout_minutes: + description: Stress test step timeout in minutes. + required: false + type: number + default: 20 + job_timeout_minutes: + description: Matrix test job timeout in minutes. + required: false + type: number + default: 60 + artifact_retention_days: + description: Test artifact retention in days. + required: false + type: number + default: 7 + run_coverage: + description: Run the Linux coverage job. + required: false + type: boolean + default: true permissions: contents: read @@ -35,19 +160,18 @@ jobs: include: - os: ubuntu-latest name: linux-x64 - build_cmd: ./build.sh release-tests e2e-testing: true + # Extra FFI logging on linux-x64 to diagnose timing-sensitive integration hangs. + integration_rust_log: info,livekit_ffi::server=debug - os: ubuntu-24.04-arm name: linux-arm64 - build_cmd: ./build.sh release-tests e2e-testing: true - os: macos-26-xlarge name: macos-arm64 - build_cmd: ./build.sh release-tests e2e-testing: true - os: macos-26-large name: macos-x64 - build_cmd: ./build.sh release-tests --macos-arch x86_64 + macos_arch: x86_64 e2e-testing: true # Pinned to Windows 2022 for current VS 17 implementation - os: windows-2022 @@ -57,6 +181,9 @@ jobs: name: Test (${{ matrix.name }}) runs-on: ${{ matrix.os }} + timeout-minutes: ${{ inputs.job_timeout_minutes }} + env: + BUILD_DIR: ${{ inputs.build_type == 'debug' && 'build-debug' || 'build-release' }} steps: - name: Checkout (with submodules) @@ -68,6 +195,13 @@ jobs: - name: Pull LFS files run: git lfs pull + - name: Prepare CI test scripts + if: runner.os != 'Windows' && inputs.build_type == 'debug' + shell: bash + run: | + chmod +x .github/scripts/run_tests_with_backtrace.sh + chmod +x .github/scripts/stage_crash_diagnostics.sh + # Cargo's freshness check is mtime-based. A fresh `actions/checkout` # stamps every submodule source file with the checkout time, which is # newer than the cached target/ artifacts — so cargo rebuilds the whole @@ -169,56 +303,164 @@ jobs: - name: Set Linux build environment if: runner.os == 'Linux' run: | - echo "CXXFLAGS=-Wno-deprecated-declarations" >> "$GITHUB_ENV" - echo "CFLAGS=-Wno-deprecated-declarations" >> "$GITHUB_ENV" + flags="-Wno-deprecated-declarations" + if [[ "${{ inputs.build_type }}" == "debug" ]]; then + flags="${flags} -fno-omit-frame-pointer" + fi + echo "CXXFLAGS=${flags}" >> "$GITHUB_ENV" + echo "CFLAGS=${flags}" >> "$GITHUB_ENV" LLVM_VERSION=$(llvm-config --version | cut -d. -f1) echo "LIBCLANG_PATH=/usr/lib/llvm-${LLVM_VERSION}/lib" >> "$GITHUB_ENV" + - name: Set macOS debug backtrace flags + if: runner.os == 'macOS' && inputs.build_type == 'debug' + run: | + echo "CXXFLAGS=-fno-omit-frame-pointer" >> "$GITHUB_ENV" + echo "CFLAGS=-fno-omit-frame-pointer" >> "$GITHUB_ENV" + # ---------- Build (release-tests: tests on, examples off) ---------- - name: Build tests (Unix) if: runner.os != 'Windows' shell: bash run: | + set -euo pipefail chmod +x build.sh - ${{ matrix.build_cmd }} + build_cmd="./build.sh ${{ inputs.build_type }}-tests" + if [[ -n "${{ matrix.macos_arch || '' }}" ]]; then + build_cmd="${build_cmd} --macos-arch ${{ matrix.macos_arch }}" + fi + ${build_cmd} - name: Build tests (Windows) if: runner.os == 'Windows' shell: pwsh - run: ${{ matrix.build_cmd }} + run: .\build.cmd ${{ inputs.build_type }}-tests # ---------- Run unit tests ---------- - name: Run unit tests (Unix) if: runner.os != 'Windows' - timeout-minutes: 10 + timeout-minutes: ${{ inputs.unit_timeout_minutes }} shell: bash + env: + RUST_BACKTRACE: full run: | - build-release/bin/livekit_unit_tests \ - --gtest_repeat=100 \ - --gtest_brief=1 \ - --gtest_output=xml:build-release/unit-test-results.xml + set -euo pipefail + bin="${{ env.BUILD_DIR }}/bin/livekit_unit_tests" + args=( + --gtest_repeat=${{ inputs.unit_repeat }} + --gtest_output=xml:${{ env.BUILD_DIR }}/unit-test-results.xml + ) + if [[ "${{ inputs.build_type }}" == "debug" ]]; then + .github/scripts/run_tests_with_backtrace.sh "$bin" "${args[@]}" + else + "$bin" "${args[@]}" + fi - name: Run unit tests (Windows) if: runner.os == 'Windows' - timeout-minutes: 10 + timeout-minutes: ${{ inputs.unit_timeout_minutes }} shell: pwsh run: | - build-release\bin\livekit_unit_tests.exe ` - --gtest_repeat=100 ` - --gtest_brief=1 ` - --gtest_output="xml:build-release\unit-test-results.xml" + $unitArgs = @( + "--gtest_repeat=${{ inputs.unit_repeat }}" + "--gtest_output=xml:${{ env.BUILD_DIR }}\unit-test-results.xml" + ) + & "${{ env.BUILD_DIR }}\bin\livekit_unit_tests.exe" @unitArgs # ---------- Start livekit-server for integration tests ---------- - name: Start livekit-server - if: matrix.e2e-testing + if: matrix.e2e-testing && (inputs.integration_repeat > 0 || inputs.run_stress_tests) id: livekit_server uses: livekit/dev-server-action@61e2b4dcb170dd3591e0c9b0db3c3fe5db93b500 + continue-on-error: true with: github-token: ${{ github.token }} + - name: Start livekit-server fallback + if: matrix.e2e-testing && (inputs.integration_repeat > 0 || inputs.run_stress_tests) && steps.livekit_server.outcome == 'failure' + id: livekit_server_fallback + shell: bash + env: + GH_TOKEN: ${{ github.token }} + run: | + set -euxo pipefail + + if [[ "$RUNNER_OS" == "macOS" ]]; then + brew install livekit + livekit_cmd="livekit-server" + else + case "${RUNNER_OS}-${RUNNER_ARCH}" in + Linux-X64) suffix='linux_amd64.tar.gz' ;; + Linux-ARM64) suffix='linux_arm64.tar.gz' ;; + Windows-X64) suffix='windows_amd64.zip' ;; + Windows-ARM64) suffix='windows_arm64.zip' ;; + *) echo "Unsupported platform: ${RUNNER_OS}-${RUNNER_ARCH}"; exit 1 ;; + esac + + tag="$( + gh api repos/livekit/livekit/releases \ + --jq "limit(1; .[] | select([.assets[].name] | any(endswith(\"_${suffix}\"))) | .tag_name)" + )" + if [[ -z "$tag" ]]; then + echo "::error::Could not find a LiveKit release with artifact suffix ${suffix}" + exit 1 + fi + echo "Using LiveKit server ${tag} (${suffix})" + + gh release download "${tag}" \ + --repo livekit/livekit \ + --pattern "*_${suffix}" \ + --output "$RUNNER_TEMP/livekit-server-archive" + + case "${RUNNER_OS}" in + Linux) + tar -xzf "$RUNNER_TEMP/livekit-server-archive" -C "$RUNNER_TEMP" + chmod +x "$RUNNER_TEMP/livekit-server" + livekit_cmd="$RUNNER_TEMP/livekit-server" + ;; + Windows) + unzip -o "$RUNNER_TEMP/livekit-server-archive" -d "$RUNNER_TEMP" + livekit_cmd="$RUNNER_TEMP/livekit-server.exe" + ;; + esac + fi + + "$livekit_cmd" --version + cat > "$RUNNER_TEMP/livekit.yaml" <<'EOF' + logging: { json: true } + EOF + "$livekit_cmd" --config "$RUNNER_TEMP/livekit.yaml" --dev > "$RUNNER_TEMP/livekit.jsonl" 2>&1 & + pid=$! + echo "Running server in the background: pid=$pid" + echo "pid=$pid" >> "$GITHUB_OUTPUT" + echo "log-path=$RUNNER_TEMP/livekit.jsonl" >> "$GITHUB_OUTPUT" + + for i in $(seq 1 30); do + if [[ "$(curl -fsS http://localhost:7880/ || true)" == "OK" ]]; then + echo "Server passed health check" + exit 0 + fi + echo "Waiting for server... (retry $i/30)" + sleep 1 + done + echo "::error::livekit-server fallback did not pass health check" + tail -n 500 "$RUNNER_TEMP/livekit.jsonl" || true + exit 1 + + - name: Resolve livekit-server log path + if: always() && matrix.e2e-testing && (inputs.integration_repeat > 0 || inputs.run_stress_tests) + id: livekit_server_log + shell: bash + run: | + log_path="${{ steps.livekit_server.outputs.log-path }}" + if [[ -z "$log_path" ]]; then + log_path="${{ steps.livekit_server_fallback.outputs.log-path }}" + fi + echo "log-path=${log_path}" >> "$GITHUB_OUTPUT" + # Needed by token helper script - name: Install livekit-cli - if: matrix.e2e-testing + if: matrix.e2e-testing && (inputs.integration_repeat > 0 || inputs.run_stress_tests) shell: bash env: # Windows installs lk via `gh api` / `gh release download`, which need this env var @@ -249,21 +491,70 @@ jobs: lk --version - name: Run integration tests - if: matrix.e2e-testing - timeout-minutes: 10 + if: matrix.e2e-testing && inputs.integration_repeat > 0 + timeout-minutes: ${{ inputs.integration_timeout_minutes }} + shell: bash + env: + RUST_LOG: ${{ matrix.integration_rust_log || inputs.integration_rust_log }} + RUST_BACKTRACE: full + # linux-x64: verbose C++ logs + stall watchdog (see run_tests_with_backtrace.sh). + LIVEKIT_TEST_LOG_LEVEL: ${{ matrix.name == 'linux-x64' && 'debug' || '' }} + LIVEKIT_TEST_STALL_SECONDS: ${{ matrix.name == 'linux-x64' && '90' || '0' }} + run: | + set -euo pipefail + source .token_helpers/set_data_track_test_tokens.bash + bin="${{ env.BUILD_DIR }}/bin/livekit_integration_tests" + args=( + --gtest_repeat=${{ inputs.integration_repeat }} + --gtest_recreate_environments_when_repeating=1 + --gtest_output=xml:${{ env.BUILD_DIR }}/integration-test-results.xml + ) + if [[ "${{ inputs.build_type }}" == "debug" ]]; then + .github/scripts/run_tests_with_backtrace.sh "$bin" "${args[@]}" + else + "$bin" "${args[@]}" + fi + + - name: Run stress tests + if: matrix.e2e-testing && inputs.run_stress_tests + timeout-minutes: ${{ inputs.stress_timeout_minutes }} shell: bash env: RUST_LOG: "metrics=debug" + RUST_BACKTRACE: full run: | set -euo pipefail source .token_helpers/set_data_track_test_tokens.bash - build-release/bin/livekit_integration_tests \ - --gtest_output=xml:build-release/integration-test-results.xml + bin="${{ env.BUILD_DIR }}/bin/livekit_stress_tests" + args=( + --gtest_repeat=${{ inputs.stress_repeat }} + --gtest_recreate_environments_when_repeating=1 + --gtest_output=xml:${{ env.BUILD_DIR }}/stress-test-results.xml + ) + if [[ "${{ inputs.build_type }}" == "debug" ]]; then + .github/scripts/run_tests_with_backtrace.sh "$bin" "${args[@]}" + else + "$bin" "${args[@]}" + fi - name: Dump livekit-server log on failure - if: failure() && matrix.e2e-testing + if: failure() && matrix.e2e-testing && (inputs.integration_repeat > 0 || inputs.run_stress_tests) + shell: bash + run: tail -n 500 "${{ steps.livekit_server_log.outputs.log-path }}" || true + + - name: Stage crash diagnostics + if: failure() && inputs.build_type == 'debug' && runner.os != 'Windows' shell: bash - run: tail -n 500 "${{ steps.livekit_server.outputs.log-path }}" || true + run: .github/scripts/stage_crash_diagnostics.sh "${{ env.BUILD_DIR }}" + + - name: Upload crash diagnostics + if: failure() && inputs.build_type == 'debug' && runner.os != 'Windows' + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: crash-diagnostics-${{ matrix.name }} + path: ${{ runner.temp }}/crash-diagnostics/ + if-no-files-found: ignore + retention-days: ${{ inputs.artifact_retention_days }} # ---------- Upload results ---------- - name: Upload test results @@ -272,11 +563,12 @@ jobs: with: name: test-results-${{ matrix.name }} path: | - build-release/unit-test-results.xml - build-release/integration-test-results.xml - ${{ steps.livekit_server.outputs.log-path }} + ${{ env.BUILD_DIR }}/unit-test-results.xml + ${{ env.BUILD_DIR }}/integration-test-results.xml + ${{ env.BUILD_DIR }}/stress-test-results.xml + ${{ steps.livekit_server_log.outputs.log-path }} if-no-files-found: ignore - retention-days: 7 + retention-days: ${{ inputs.artifact_retention_days }} # ============================================================================ # Code Coverage (Linux only) @@ -285,6 +577,7 @@ jobs: # ============================================================================ coverage: name: Code Coverage + if: inputs.run_coverage runs-on: ubuntu-latest # A debug build instrumented with --coverage is far heavier (RAM + disk) # than the release builds. Cap the wall-clock so a stuck/OOM build fails diff --git a/AGENTS.md b/AGENTS.md index 39da5348..37a8c8df 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -399,15 +399,17 @@ all filtered stages; normal pull requests and pushes use the path filters. - `.github/workflows/license_check.yml` — Cheap license check, run on every CI invocation. - `.github/workflows/docker-images.yml` — Reusable Docker image smoke-test and - publish workflow (optional push via input), called by CI and release workflows. + publish workflow (optional push via input), called by CI, release, and + nightly workflows. When adding or renaming files that affect a CI stage, update the matching `ci.yml` `changes` filter in the same PR. For example, new build scripts, CMake files, package manifests, or reusable build workflows should be added to -the `builds` filter; test-only helpers to `tests`; formatting/static-analysis -configuration to `cpp_checks`; and docs generation inputs to `docs`. +the `builds` filter; Docker packaging inputs to `docker`; test-only helpers to +`tests`; formatting/static-analysis configuration to `cpp_checks`; and docs +generation inputs to `docs`. Keep broad agent guidance files such as `AGENTS.md` out of the expensive -`builds`, `tests`, `cpp_checks`, and `docs` filters unless they start affecting -generated docs or build artifacts. An `AGENTS.md`-only change should not trigger -those stages; only the always-on cheap checks should run. +`builds`, `docker`, `tests`, `cpp_checks`, and `docs` filters unless they start +affecting generated docs or build artifacts. An `AGENTS.md`-only change should +not trigger those stages; only the always-on cheap checks should run. diff --git a/client-sdk-rust b/client-sdk-rust index 8e551062..d3bb1453 160000 --- a/client-sdk-rust +++ b/client-sdk-rust @@ -1 +1 @@ -Subproject commit 8e551062c59f912159b8cebac44b2cdcce0024ef +Subproject commit d3bb14531f88bfc7f4c43cdcd9a6f68911bfd9f3 diff --git a/docker/Dockerfile.sdk b/docker/Dockerfile.sdk index 6d31a878..2a263ee7 100644 --- a/docker/Dockerfile.sdk +++ b/docker/Dockerfile.sdk @@ -1,3 +1,5 @@ +# syntax=docker/dockerfile:1.7 +# # Copyright 2026 LiveKit # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -45,7 +47,11 @@ RUN mkdir -p /client-sdk-cpp/client-sdk-rust/.cargo \ # Build and install the SDK into a fixed prefix so downstream projects can # consume the image as a prebuilt LiveKit SDK environment. -RUN LLVM_VERSION="$(llvm-config --version | cut -d. -f1)" \ +RUN --mount=type=cache,target=/root/.cargo/registry,sharing=locked \ + --mount=type=cache,target=/root/.cargo/git,sharing=locked \ + --mount=type=cache,target=/client-sdk-cpp/client-sdk-rust/target,sharing=locked \ + --mount=type=cache,target=/client-sdk-cpp/build-release,sharing=locked \ + LLVM_VERSION="$(llvm-config --version | cut -d. -f1)" \ && export LIBCLANG_PATH="/usr/lib/llvm-${LLVM_VERSION}/lib" \ && export CXXFLAGS="-Wno-deprecated-declarations" \ && export CFLAGS="-Wno-deprecated-declarations" \ diff --git a/include/livekit/local_participant.h b/include/livekit/local_participant.h index 9369a914..2c1d5cf5 100644 --- a/include/livekit/local_participant.h +++ b/include/livekit/local_participant.h @@ -246,6 +246,14 @@ class LIVEKIT_API LocalParticipant : public Participant { /// prune expired @c weak_ptr entries. mutable TrackMap published_tracks_by_sid_; + /// Guards @ref published_tracks_by_sid_. The map is written from the + /// application thread (@ref publishTrack / @ref unpublishTrack) and both read + /// and pruned from the FFI callback thread (@ref trackPublications / + /// @ref findTrackPublication, reached via Room::onEvent). Without this lock + /// those concurrent accesses race and free map nodes out from under each + /// other (heap-use-after-free). Leaf lock: no other lock is taken while held. + mutable std::mutex published_tracks_mutex_; + std::unordered_map rpc_handlers_; // Shared state for RPC invocation tracking. Using shared_ptr so the state diff --git a/include/livekit/platform_audio.h b/include/livekit/platform_audio.h index 4e12343e..66755d96 100644 --- a/include/livekit/platform_audio.h +++ b/include/livekit/platform_audio.h @@ -114,8 +114,8 @@ class LIVEKIT_API PlatformAudioSource { PlatformAudioSource(FfiHandle handle, std::shared_ptr platform_audio) noexcept; - FfiHandle handle_; std::shared_ptr platform_audio_; + FfiHandle handle_; }; /// Platform audio device manager backed by WebRTC's Audio Device Module. diff --git a/src/data_track_stream.cpp b/src/data_track_stream.cpp index 155f4788..6a777377 100644 --- a/src/data_track_stream.cpp +++ b/src/data_track_stream.cpp @@ -52,22 +52,28 @@ void DataTrackStream::init(FfiHandle subscription_handle) { bool DataTrackStream::read(DataTrackFrame& out) { proto::DataTrackStreamReadResponse read_response; bool missing_read_response = false; + std::uint64_t subscription_handle = 0; { const std::scoped_lock lock(mutex_); if (closed_ || eof_) { return false; } + subscription_handle = static_cast(subscription_handle_.get()); + } - const auto subscription_handle = static_cast(subscription_handle_.get()); + // Do not hold mutex_ across sendRequest: readFrameWithTimeout may call close() + // from another thread on timeout, and close() also needs mutex_. + proto::FfiRequest req; + auto* msg = req.mutable_data_track_stream_read(); + msg->set_stream_handle(subscription_handle); + const proto::FfiResponse resp = FfiClient::instance().sendRequest(req); - // Signal the Rust side that we're ready to receive the next frame. - // The Rust SubscriptionTask uses a demand-driven protocol: it won't pull - // from the underlying stream until notified via this request. - proto::FfiRequest req; - auto* msg = req.mutable_data_track_stream_read(); - msg->set_stream_handle(subscription_handle); - const proto::FfiResponse resp = FfiClient::instance().sendRequest(req); + { + const std::scoped_lock lock(mutex_); + if (closed_ || eof_) { + return false; + } if (!resp.has_data_track_stream_read()) { missing_read_response = true; } else { diff --git a/src/local_participant.cpp b/src/local_participant.cpp index 7fda68ac..1e8d9a0f 100644 --- a/src/local_participant.cpp +++ b/src/local_participant.cpp @@ -197,7 +197,10 @@ void LocalParticipant::publishTrack(const std::shared_ptr& track, const T auto publication = std::make_shared(owned_pub); const std::string sid = publication->sid(); - published_tracks_by_sid_[sid] = std::weak_ptr(track); + { + const std::scoped_lock lock(published_tracks_mutex_); + published_tracks_by_sid_[sid] = std::weak_ptr(track); + } track->setPublication(publication); } @@ -237,6 +240,7 @@ void LocalParticipant::unpublishTrack(const std::string& track_sid) { fut.get(); + const std::scoped_lock lock(published_tracks_mutex_); if (auto it = published_tracks_by_sid_.find(track_sid); it != published_tracks_by_sid_.end()) { if (auto t = it->second.lock()) { t->setPublication(nullptr); @@ -247,6 +251,7 @@ void LocalParticipant::unpublishTrack(const std::string& track_sid) { LocalParticipant::PublicationMap LocalParticipant::trackPublications() const { PublicationMap out; + const std::scoped_lock lock(published_tracks_mutex_); for (auto it = published_tracks_by_sid_.begin(); it != published_tracks_by_sid_.end();) { auto t = it->second.lock(); if (!t) { @@ -443,6 +448,7 @@ void LocalParticipant::handleRpcMethodInvocation(uint64_t invocation_id, const s } std::shared_ptr LocalParticipant::findTrackPublication(const std::string& sid) const { + const std::scoped_lock lock(published_tracks_mutex_); auto it = published_tracks_by_sid_.find(sid); if (it == published_tracks_by_sid_.end()) { return nullptr; diff --git a/src/room.cpp b/src/room.cpp index 3ad58938..71680389 100644 --- a/src/room.cpp +++ b/src/room.cpp @@ -436,7 +436,7 @@ void Room::onEvent(const FfiEvent& event) { if (event.message_case() == FfiEvent::kRpcMethodInvocation) { const auto& rpc = event.rpc_method_invocation(); - LocalParticipant* lp = nullptr; + std::shared_ptr lp; { const std::scoped_lock guard(lock_); if (!local_participant_) { @@ -448,7 +448,7 @@ void Room::onEvent(const FfiEvent& event) { // RPC is not targeted at this room's local participant; ignore. return; } - lp = local_participant_.get(); + lp = local_participant_; } // Call outside the lock to avoid deadlocks / re-entrancy issues. diff --git a/src/room_proto_converter.cpp b/src/room_proto_converter.cpp index 53b49c59..d3c6f43c 100644 --- a/src/room_proto_converter.cpp +++ b/src/room_proto_converter.cpp @@ -28,14 +28,14 @@ std::string bytesToString(const std::vector& bytes) { return std::string(reinterpret_cast(bytes.data()), bytes.size()); } -std::vector toProto(const PacketTrailerFeatures& features) { - std::vector out; +std::vector toProto(const PacketTrailerFeatures& features) { + std::vector out; out.reserve(2); if (features.user_timestamp) { - out.push_back(proto::PacketTrailerFeature::PTF_USER_TIMESTAMP); + out.push_back(proto::FrameMetadataFeature::FMF_USER_TIMESTAMP); } if (features.frame_id) { - out.push_back(proto::PacketTrailerFeature::PTF_FRAME_ID); + out.push_back(proto::FrameMetadataFeature::FMF_FRAME_ID); } return out; } @@ -44,10 +44,10 @@ PacketTrailerFeatures fromProto(const google::protobuf::RepeatedField& feat PacketTrailerFeatures out{}; for (const int feature : features) { switch (feature) { - case proto::PacketTrailerFeature::PTF_USER_TIMESTAMP: + case proto::FrameMetadataFeature::FMF_USER_TIMESTAMP: out.user_timestamp = true; break; - case proto::PacketTrailerFeature::PTF_FRAME_ID: + case proto::FrameMetadataFeature::FMF_FRAME_ID: out.frame_id = true; break; default: @@ -502,8 +502,8 @@ proto::TrackPublishOptions toProto(const TrackPublishOptions& in) { if (in.preconnect_buffer) { msg.set_preconnect_buffer(*in.preconnect_buffer); } - for (const proto::PacketTrailerFeature feature : toProto(in.packet_trailer_features)) { - msg.add_packet_trailer_features(feature); + for (const proto::FrameMetadataFeature feature : toProto(in.packet_trailer_features)) { + msg.add_frame_metadata_features(feature); } return msg; } @@ -537,7 +537,7 @@ TrackPublishOptions fromProto(const proto::TrackPublishOptions& in) { if (in.has_preconnect_buffer()) { out.preconnect_buffer = in.preconnect_buffer(); } - out.packet_trailer_features = fromProto(in.packet_trailer_features()); + out.packet_trailer_features = fromProto(in.frame_metadata_features()); return out; } diff --git a/src/tests/common/test_common.h b/src/tests/common/test_common.h index 427cd4ca..25887437 100644 --- a/src/tests/common/test_common.h +++ b/src/tests/common/test_common.h @@ -144,6 +144,37 @@ inline bool waitForParticipant(Room* room, const std::string& identity, std::chr /// once the room is torn down (or before connect). Dereferencing the result of /// lock() blindly is undefined behavior, so tests must go through this helper, /// which throws instead of crashing when the handle is expired. +/// Resolve integration-test log verbosity from LIVEKIT_TEST_LOG_LEVEL (trace/debug/info/...). +inline livekit::LogLevel testLogLevelFromEnv() { + const char* level = std::getenv("LIVEKIT_TEST_LOG_LEVEL"); + if (level == nullptr || std::string(level).empty()) { + return livekit::LogLevel::Info; + } + const std::string value(level); + if (value == "trace") { + return livekit::LogLevel::Trace; + } + if (value == "debug") { + return livekit::LogLevel::Debug; + } + if (value == "info") { + return livekit::LogLevel::Info; + } + if (value == "warn") { + return livekit::LogLevel::Warn; + } + if (value == "error") { + return livekit::LogLevel::Error; + } + if (value == "critical") { + return livekit::LogLevel::Critical; + } + if (value == "off") { + return livekit::LogLevel::Off; + } + return livekit::LogLevel::Info; +} + inline std::shared_ptr lockLocalParticipant(const Room& room) { if (auto participant = room.localParticipant().lock()) { return participant; @@ -497,7 +528,7 @@ class StressTestStats { class LiveKitTestBase : public ::testing::Test { protected: void SetUp() override { - livekit::initialize(livekit::LogLevel::Info); + livekit::initialize(testLogLevelFromEnv()); config_ = TestConfig::fromEnv(); // Tracing is controlled by compile-time macro LIVEKIT_TEST_ENABLE_TRACING diff --git a/src/tests/integration/test_data_track.cpp b/src/tests/integration/test_data_track.cpp index b575c410..74a6b974 100644 --- a/src/tests/integration/test_data_track.cpp +++ b/src/tests/integration/test_data_track.cpp @@ -547,6 +547,14 @@ TEST_F(DataTrackE2ETest, CanResubscribeToRemoteDataTrack) { } }); + // RAII wrapper to ensure publisher thread is joined on scope exit + const auto stop_publisher = std::shared_ptr(nullptr, [&](void*) { + keep_publishing.store(false); // stops thread while loop + if (publisher.joinable()) { + publisher.join(); + } + }); + auto remote_track = subscriber_delegate.waitForTrack(kTrackWaitTimeout); ASSERT_NE(remote_track, nullptr) << "Timed out waiting for remote data track"; @@ -564,9 +572,6 @@ TEST_F(DataTrackE2ETest, CanResubscribeToRemoteDataTrack) { std::this_thread::sleep_for(50ms); } - keep_publishing.store(false); - publisher.join(); - if (publish_error) { std::rethrow_exception(publish_error); } diff --git a/src/tests/integration/test_platform_audio.cpp b/src/tests/integration/test_platform_audio.cpp index c6e86596..d27ac041 100644 --- a/src/tests/integration/test_platform_audio.cpp +++ b/src/tests/integration/test_platform_audio.cpp @@ -14,9 +14,7 @@ * limitations under the License. */ -#include #include -#include #include #include #include diff --git a/src/tests/integration/test_rpc.cpp b/src/tests/integration/test_rpc.cpp index 1abac063..3ee46fce 100644 --- a/src/tests/integration/test_rpc.cpp +++ b/src/tests/integration/test_rpc.cpp @@ -460,96 +460,4 @@ TEST_F(RpcIntegrationTest, ConcurrentRpcCalls) { receiver_room.reset(); } -// Integration test: Run for approximately 1 minute -TEST_F(RpcIntegrationTest, OneMinuteIntegration) { - EXPECT_TRUE(config_.available) << "Missing integration configuration"; - - auto receiver_room = std::make_unique(); - RoomOptions options; - options.auto_subscribe = true; - - ASSERT_TRUE(receiver_room->connect(config_.url, config_.token_b, options)) << "Receiver failed to connect"; - - std::string receiver_identity = lockLocalParticipant(*receiver_room)->identity(); - - std::atomic total_received{0}; - std::atomic total_bytes_received{0}; - - lockLocalParticipant(*receiver_room) - ->registerRpcMethod("integration-test", [&](const RpcInvocationData& data) -> std::optional { - total_received++; - total_bytes_received += data.payload.size(); - return "ack:" + std::to_string(data.payload.size()); - }); - - auto caller_room = std::make_unique(); - ASSERT_TRUE(caller_room->connect(config_.url, config_.token_a, options)) << "Caller failed to connect"; - - bool receiver_visible = waitForParticipant(caller_room.get(), receiver_identity, 10s); - ASSERT_TRUE(receiver_visible) << "Receiver not visible to caller"; - - // Hold the local participant alive for the duration of the sender thread so - // it cannot expire mid-call while RPCs are in flight. - ASSERT_NO_THROW(lockLocalParticipant(*caller_room)); - - // Run for 1 minute - const auto test_duration = 60s; - const auto start_time = std::chrono::steady_clock::now(); - - std::atomic total_sent{0}; - std::atomic successful_calls{0}; - std::atomic failed_calls{0}; - std::atomic running{true}; - - // Sender thread - std::thread sender([&]() { - std::vector payload_sizes = {100, 1024, 5 * 1024, 10 * 1024, kRpcV1PayloadLimit}; - int size_index = 0; - - while (running.load()) { - size_t payload_size = payload_sizes[size_index % payload_sizes.size()]; - std::string payload = generateRandomPayload(payload_size); - - try { - auto caller_lp = lockLocalParticipant(*caller_room); - ASSERT_NE(caller_lp, nullptr); - std::string response = caller_lp->performRpc(receiver_identity, "integration-test", payload, 30.0); - if (response == "ack:" + std::to_string(payload_size)) { - successful_calls++; - } - } catch (const std::exception& e) { - failed_calls++; - } - - total_sent++; - size_index++; - std::this_thread::sleep_for(100ms); // Rate limit - } - }); - - // Wait for test duration - while (std::chrono::steady_clock::now() - start_time < test_duration) { - std::this_thread::sleep_for(1s); - std::cout << "Progress: sent=" << total_sent.load() << " successful=" << successful_calls.load() - << " failed=" << failed_calls.load() << " received=" << total_received.load() << std::endl; - } - - running.store(false); - sender.join(); - - std::cout << "\n=== Integration Test Results (1 minute) ===" << std::endl; - std::cout << "Total sent: " << total_sent.load() << std::endl; - std::cout << "Successful: " << successful_calls.load() << std::endl; - std::cout << "Failed: " << failed_calls.load() << std::endl; - std::cout << "Total received: " << total_received.load() << std::endl; - std::cout << "Total bytes received: " << total_bytes_received.load() << std::endl; - - EXPECT_GT(successful_calls.load(), 0); - EXPECT_EQ(total_sent.load(), total_received.load()); - - lockLocalParticipant(*receiver_room)->unregisterRpcMethod("integration-test"); - caller_room.reset(); - receiver_room.reset(); -} - } // namespace livekit::test diff --git a/src/tests/stress/test_platform_audio_stress.cpp b/src/tests/stress/test_platform_audio_stress.cpp new file mode 100644 index 00000000..f38042ac --- /dev/null +++ b/src/tests/stress/test_platform_audio_stress.cpp @@ -0,0 +1,132 @@ +/* + * Copyright 2026 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include "../common/test_common.h" + +namespace livekit::test { + +using namespace std::chrono_literals; + +namespace { + +constexpr auto kSubscriptionTimeout = 20s; + +struct PlatformTrackState { + std::mutex mutex; + std::condition_variable cv; + std::set subscribed_audio_names; +}; + +class PlatformTrackCollectorDelegate : public RoomDelegate { +public: + explicit PlatformTrackCollectorDelegate(PlatformTrackState& state) : state_(state) {} + + void onTrackSubscribed(Room&, const TrackSubscribedEvent& event) override { + std::lock_guard lock(state_.mutex); + if (event.track && event.track->kind() == TrackKind::KIND_AUDIO && event.publication) { + state_.subscribed_audio_names.insert(event.publication->name()); + } + state_.cv.notify_all(); + } + +private: + PlatformTrackState& state_; +}; + +bool runPlatformAudioCycle(PlatformAudio& platform_audio, const TestConfig& config, const std::string& track_name) { + RoomOptions options; + options.auto_subscribe = true; + + PlatformTrackState receiver_state; + PlatformTrackCollectorDelegate receiver_delegate(receiver_state); + + auto receiver_room = std::make_unique(); + receiver_room->setDelegate(&receiver_delegate); + if (!receiver_room->connect(config.url, config.token_b, options)) { + return false; + } + + auto sender_room = std::make_unique(); + if (!sender_room->connect(config.url, config.token_a, options)) { + return false; + } + + const auto source = platform_audio.createAudioSource(); + if (source == nullptr) { + return false; + } + + const auto track = LocalAudioTrack::createLocalAudioTrack(track_name, source); + if (track == nullptr) { + return false; + } + + TrackPublishOptions publish_options; + publish_options.source = TrackSource::SOURCE_MICROPHONE; + lockLocalParticipant(*sender_room)->publishTrack(track, publish_options); + + std::unique_lock lock(receiver_state.mutex); + return receiver_state.cv.wait_for(lock, kSubscriptionTimeout, + [&]() { return receiver_state.subscribed_audio_names.count(track_name) > 0; }); +} + +} // namespace + +class PlatformAudioStressTest : public LiveKitTestBase {}; + +// Control arm for the macOS PlatformAudio instability investigation. +// +// The standard PlatformAudioIntegrationTest cases each call livekit::shutdown() +// in TearDown(), which disposes the FFI server, drops the last Arc, +// and runs AdmProxy::~AdmProxy() -> platform_adm_->Terminate(). Under +// --gtest_repeat that means the native CoreAudio ADM is fully terminated and +// recreated on every iteration -- the suspected crash path. +// +// This test instead holds a single PlatformAudio alive for the whole test, so +// the runtime and ADM are created once and never terminated between cycles. +TEST_F(PlatformAudioStressTest, PinnedRuntimeRepeatedPublishStress) { + EXPECT_TRUE(config_.available) << "Missing integration configuration"; + + std::unique_ptr platform_audio; + try { + platform_audio = std::make_unique(); + } catch (const PlatformAudioError& error) { + GTEST_SKIP() << "PlatformAudio unavailable: " << error.what(); + } + + int iterations = 20; + if (const char* env = std::getenv("PLATFORM_AUDIO_PIN_ITERATIONS")) { + const int parsed = std::atoi(env); + if (parsed > 0) { + iterations = parsed; + } + } + + for (int i = 0; i < iterations; ++i) { + const std::string track_name = "platform-mic-pinned-" + std::to_string(i); + const bool subscribed = runPlatformAudioCycle(*platform_audio, config_, track_name); + ASSERT_TRUE(subscribed) << "Receiver never subscribed on pinned iteration " << i; + } +} + +} // namespace livekit::test diff --git a/src/tests/stress/test_rpc_stress.cpp b/src/tests/stress/test_rpc_stress.cpp index 6b081618..d48f4c07 100644 --- a/src/tests/stress/test_rpc_stress.cpp +++ b/src/tests/stress/test_rpc_stress.cpp @@ -848,4 +848,96 @@ TEST_F(RpcStressTest, HighThroughputBurst) { receiver_room.reset(); } +// Integration test: Run for approximately 1 minute +TEST_F(RpcStressTest, OneMinuteIntegration) { + EXPECT_TRUE(config_.available) << "Missing integration configuration"; + + auto receiver_room = std::make_unique(); + RoomOptions options; + options.auto_subscribe = true; + + ASSERT_TRUE(receiver_room->connect(config_.url, config_.token_b, options)) << "Receiver failed to connect"; + + std::string receiver_identity = lockLocalParticipant(*receiver_room)->identity(); + + std::atomic total_received{0}; + std::atomic total_bytes_received{0}; + + lockLocalParticipant(*receiver_room) + ->registerRpcMethod("integration-test", [&](const RpcInvocationData& data) -> std::optional { + total_received++; + total_bytes_received += data.payload.size(); + return "ack:" + std::to_string(data.payload.size()); + }); + + auto caller_room = std::make_unique(); + ASSERT_TRUE(caller_room->connect(config_.url, config_.token_a, options)) << "Caller failed to connect"; + + bool receiver_visible = waitForParticipant(caller_room.get(), receiver_identity, 10s); + ASSERT_TRUE(receiver_visible) << "Receiver not visible to caller"; + + // Hold the local participant alive for the duration of the sender thread so + // it cannot expire mid-call while RPCs are in flight. + ASSERT_NO_THROW(lockLocalParticipant(*caller_room)); + + // Run for 1 minute + const auto test_duration = 60s; + const auto start_time = std::chrono::steady_clock::now(); + + std::atomic total_sent{0}; + std::atomic successful_calls{0}; + std::atomic failed_calls{0}; + std::atomic running{true}; + + // Sender thread + std::thread sender([&]() { + std::vector payload_sizes = {100, 1024, 5 * 1024, 10 * 1024, kMaxRpcPayloadSize}; + int size_index = 0; + + while (running.load()) { + size_t payload_size = payload_sizes[size_index % payload_sizes.size()]; + std::string payload = generateRandomPayload(payload_size); + + try { + auto caller_lp = lockLocalParticipant(*caller_room); + ASSERT_NE(caller_lp, nullptr); + std::string response = caller_lp->performRpc(receiver_identity, "integration-test", payload, 30.0); + if (response == "ack:" + std::to_string(payload_size)) { + successful_calls++; + } + } catch (const std::exception& e) { + failed_calls++; + } + + total_sent++; + size_index++; + std::this_thread::sleep_for(100ms); // Rate limit + } + }); + + // Wait for test duration + while (std::chrono::steady_clock::now() - start_time < test_duration) { + std::this_thread::sleep_for(1s); + std::cout << "Progress: sent=" << total_sent.load() << " successful=" << successful_calls.load() + << " failed=" << failed_calls.load() << " received=" << total_received.load() << std::endl; + } + + running.store(false); + sender.join(); + + std::cout << "\n=== Integration Test Results (1 minute) ===" << std::endl; + std::cout << "Total sent: " << total_sent.load() << std::endl; + std::cout << "Successful: " << successful_calls.load() << std::endl; + std::cout << "Failed: " << failed_calls.load() << std::endl; + std::cout << "Total received: " << total_received.load() << std::endl; + std::cout << "Total bytes received: " << total_bytes_received.load() << std::endl; + + EXPECT_GT(successful_calls.load(), 0); + EXPECT_EQ(total_sent.load(), total_received.load()); + + lockLocalParticipant(*receiver_room)->unregisterRpcMethod("integration-test"); + caller_room.reset(); + receiver_room.reset(); +} + } // namespace livekit::test diff --git a/src/tests/unit/test_video_frame_metadata.cpp b/src/tests/unit/test_video_frame_metadata.cpp index 7f08ff6b..ab6df956 100644 --- a/src/tests/unit/test_video_frame_metadata.cpp +++ b/src/tests/unit/test_video_frame_metadata.cpp @@ -85,9 +85,9 @@ TEST(TrackPublishOptionsTest, PacketTrailerFeaturesRoundTrip) { options.packet_trailer_features.frame_id = true; proto::TrackPublishOptions proto_options = toProto(options); - ASSERT_EQ(proto_options.packet_trailer_features_size(), 2); - EXPECT_EQ(proto_options.packet_trailer_features(0), proto::PacketTrailerFeature::PTF_USER_TIMESTAMP); - EXPECT_EQ(proto_options.packet_trailer_features(1), proto::PacketTrailerFeature::PTF_FRAME_ID); + ASSERT_EQ(proto_options.frame_metadata_features_size(), 2); + EXPECT_EQ(proto_options.frame_metadata_features(0), proto::FrameMetadataFeature::FMF_USER_TIMESTAMP); + EXPECT_EQ(proto_options.frame_metadata_features(1), proto::FrameMetadataFeature::FMF_FRAME_ID); TrackPublishOptions round_trip = fromProto(proto_options); EXPECT_TRUE(round_trip.packet_trailer_features.user_timestamp);