Skip to content

Add integration test for Firecracker with clippy-tracing instrumentation #5256

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
250 changes: 250 additions & 0 deletions tests/integration_tests/functional/test_firecracker_tracing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
# Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

"""Test that Firecracker works correctly when instrumented with tracing and trace level logs are enabled.

This test verifies that the log-instrument crate can successfully instrument the actual Firecracker
binary and that the instrumented Firecracker works correctly with trace-level logging.

Test Coverage:
1. Uses clippy-tracing tool to add instrumentation to Firecracker source files
2. Builds Firecracker with tracing features enabled
3. Verifies basic functionality (API calls, VM lifecycle) works with instrumentation
4. Confirms trace-level logs are generated and contain meaningful information
5. Checks performance impact is within acceptable bounds
"""

import re
import tempfile
import time
from pathlib import Path

import pytest

from framework import utils
from framework.microvm import MicroVMFactory
from host_tools.cargo_build import cargo, get_binary


def build_instrumented_firecracker():
"""Build Firecracker with tracing instrumentation enabled."""
# First, add instrumentation using clippy-tracing
clippy_tracing = get_binary("clippy-tracing")

# Add instrumentation to a subset of files to avoid performance issues
# We'll instrument just the API server and main entry points for meaningful traces
cargo_args = [
"--action",
"fix",
"--path",
"./src/firecracker/src/main.rs",
"--path",
"./src/firecracker/src/api_server",
"--path",
"./src/vmm/src/lib.rs",
"--path",
"./src/vmm/src/builder.rs",
]

utils.check_output(f"{clippy_tracing} {' '.join(cargo_args)}")

# Build Firecracker with tracing feature enabled
cargo("build", "--features tracing --bin firecracker")

return get_binary("firecracker")


def cleanup_instrumentation():
"""Remove instrumentation from source files."""
clippy_tracing = get_binary("clippy-tracing")

# Strip instrumentation from the files we modified
strip_args = [
"--action",
"strip",
"--path",
"./src/firecracker/src/main.rs",
"--path",
"./src/firecracker/src/api_server",
"--path",
"./src/vmm/src/lib.rs",
"--path",
"./src/vmm/src/builder.rs",
]

utils.check_output(f"{clippy_tracing} {' '.join(strip_args)}")


@pytest.fixture(scope="module")
def instrumented_firecracker_binary():
"""Build instrumented Firecracker binary for testing."""
binary = build_instrumented_firecracker()
yield binary
cleanup_instrumentation()


def test_firecracker_tracing_basic_functionality(instrumented_firecracker_binary):
"""Test that instrumented Firecracker can start and handle basic API calls with trace logging."""
# Create a temporary directory for this test
with tempfile.TemporaryDirectory() as temp_dir:
# Create a MicroVM factory with the instrumented binary
factory = MicroVMFactory(instrumented_firecracker_binary)

# Build a microVM
vm = factory.build()

# Configure basic VM settings
vm.basic_config(vcpu_count=1, mem_size_mib=128)

# Spawn the VM with trace level logging
vm.spawn(log_level="Trace", log_show_level=True, log_show_origin=True)

try:
# Wait for the API socket to be available
vm._wait_for_api_socket() # pylint: disable=protected-access

# Make some basic API calls to generate trace logs
# Get instance info
response = vm.api.describe_instance.get()
assert response.status_code == 200

# Get machine config
response = vm.api.machine_config.get()
assert response.status_code == 200

# Set logger to trace level to ensure we capture instrumentation logs
logger_config = {"level": "Trace"}
response = vm.api.logger.put(**logger_config)
assert response.status_code == 204

# Make another API call after setting trace level
response = vm.api.describe_instance.get()
assert response.status_code == 200

# Verify that the VM is working correctly
assert vm.state == "Not started"

finally:
vm.kill()

# Check the logs for instrumentation traces
log_data = vm.log_data

# Verify that trace level logs are present
assert "TRACE" in log_data, "Expected TRACE level logs in output"

# Look for log-instrument traces (function entry/exit)
# These should have the format: ThreadId(X)>>function_name or ThreadId(X)<<function_name
trace_pattern = r"ThreadId\(\d+\)(?:::[^>]*)?(?:>>|<<)\w+"
trace_matches = re.findall(trace_pattern, log_data)

assert (
len(trace_matches) > 0
), f"Expected to find log-instrument traces in logs, but found none. Log data: {log_data[:1000]}..."

# Verify we see function entry and exit traces
entry_traces = [match for match in trace_matches if ">>" in match]
exit_traces = [match for match in trace_matches if "<<" in match]

assert len(entry_traces) > 0, "Expected to find function entry traces (>>)"
assert len(exit_traces) > 0, "Expected to find function exit traces (<<)"

# Verify that meaningful functions are being traced
# Look for traces from main, API handling, or VM management functions
meaningful_functions = ["main", "api", "vmm", "request", "response"]
found_meaningful = False

for trace in trace_matches:
for func in meaningful_functions:
if func.lower() in trace.lower():
found_meaningful = True
break
if found_meaningful:
break

assert (
found_meaningful
), f"Expected to find traces from meaningful functions, but traces were: {trace_matches[:10]}"


def test_firecracker_tracing_performance_impact():
"""Test that instrumented Firecracker still performs reasonably (basic smoke test)."""
# This is a basic performance smoke test to ensure tracing doesn't break functionality
# We're not doing detailed performance analysis, just ensuring it doesn't hang or crash

# Build instrumented binary
instrumented_binary = build_instrumented_firecracker()

try:
factory = MicroVMFactory(instrumented_binary)
vm = factory.build()

# Time the basic configuration and startup
start_time = time.time()

vm.basic_config(vcpu_count=1, mem_size_mib=128, add_root_device=False)
vm.spawn(log_level="Trace")

# Make several API calls
for _ in range(5):
response = vm.api.describe_instance.get()
assert response.status_code == 200

elapsed = time.time() - start_time

# Should complete within reasonable time (30 seconds is very generous)
# This is just to catch major performance regressions or hangs
assert (
elapsed < 30
), f"Instrumented Firecracker took too long to start and handle API calls: {elapsed}s"

vm.kill()

finally:
cleanup_instrumentation()


def test_trace_log_filtering():
"""Test that trace log filtering works correctly with instrumented Firecracker."""
instrumented_binary = build_instrumented_firecracker()

try:
factory = MicroVMFactory(instrumented_binary)
vm = factory.build()

vm.basic_config(vcpu_count=1, mem_size_mib=128, add_root_device=False)
vm.spawn(log_level="Info") # Start with Info level

try:
# Initially should not have trace logs
initial_log_data = vm.log_data

# Set logger to trace level
logger_config = {"level": "Trace"}
response = vm.api.logger.put(**logger_config)
assert response.status_code == 204

# Make API calls to generate traces
for _ in range(3):
response = vm.api.describe_instance.get()
assert response.status_code == 200

# Now should have trace logs
final_log_data = vm.log_data

# Verify no TRACE logs were present initially
assert (
"TRACE" not in initial_log_data
), "Expected no TRACE logs before setting log level to Trace"

# The new log data should contain trace information
new_log_data = final_log_data[len(initial_log_data) :]
assert (
"TRACE" in new_log_data
), "Expected TRACE logs after setting log level to Trace"

finally:
vm.kill()

finally:
cleanup_instrumentation()
Loading