@@ -78,6 +78,8 @@ use zip::ZipArchive;
7878use zip:: ZipWriter ;
7979use zip:: write:: FullFileOptions ;
8080
81+ use super :: support_bundle:: perfetto;
82+
8183// We use "/var/tmp" to use Nexus' filesystem for temporary storage,
8284// rather than "/tmp", which would keep this collected data in-memory.
8385const TEMPDIR : & str = "/var/tmp" ;
@@ -1037,11 +1039,94 @@ impl BundleCollection {
10371039 //
10381040 // Only finish if we've exhausted all possible steps and joined all spawned work.
10391041 if steps. is_empty ( ) {
1042+ // Write trace file before returning
1043+ if let Err ( err) = self . write_trace_file ( output, & report) . await {
1044+ warn ! (
1045+ self . log,
1046+ "Failed to write trace file" ;
1047+ "error" => ?err
1048+ ) ;
1049+ }
10401050 return report;
10411051 }
10421052 }
10431053 }
10441054
1055+ // Write a Perfetto Event format JSON file for visualization
1056+ async fn write_trace_file (
1057+ & self ,
1058+ output : & Utf8TempDir ,
1059+ report : & SupportBundleCollectionReport ,
1060+ ) -> anyhow:: Result < ( ) > {
1061+ let meta_dir = output. path ( ) . join ( "meta" ) ;
1062+ tokio:: fs:: create_dir_all ( & meta_dir) . await . with_context ( || {
1063+ format ! ( "Failed to create meta directory {meta_dir}" )
1064+ } ) ?;
1065+
1066+ let trace_path = meta_dir. join ( "trace.json" ) ;
1067+
1068+ // Convert steps to Perfetto Trace Event format.
1069+ // Sort steps by start time and assign each a unique sequential ID.
1070+ //
1071+ // This is necessary because the trace event format does not like
1072+ // multiple slices to overlap - so we make each slice distinct.
1073+ //
1074+ // Ideally we'd be able to correlate these with actual tokio tasks,
1075+ // but it's hard to convert tokio::task::Id to a u64 because
1076+ // of https://github.com/tokio-rs/tokio/issues/7430
1077+ let mut sorted_steps: Vec < _ > = report. steps . iter ( ) . collect ( ) ;
1078+ sorted_steps. sort_by_key ( |s| s. start ) ;
1079+
1080+ // Generate trace events - each step gets a unique ID (1, 2, 3, ...)
1081+ // based on its start time order
1082+ let trace_events: Vec < _ > = sorted_steps
1083+ . iter ( )
1084+ . enumerate ( )
1085+ . map ( |( i, step) | {
1086+ let start_us = step. start . timestamp_micros ( ) ;
1087+ let duration_us = ( step. end - step. start )
1088+ . num_microseconds ( )
1089+ . unwrap_or ( 0 )
1090+ . max ( 0 ) ;
1091+ let step_id = i + 1 ;
1092+
1093+ perfetto:: TraceEvent {
1094+ name : step. name . clone ( ) ,
1095+ cat : "bundle_collection" . to_string ( ) ,
1096+ ph : "X" . to_string ( ) ,
1097+ ts : start_us,
1098+ dur : duration_us,
1099+ pid : 1 ,
1100+ tid : step_id,
1101+ args : json ! ( {
1102+ "status" : step. status. to_string( ) ,
1103+ } ) ,
1104+ }
1105+ } )
1106+ . collect ( ) ;
1107+
1108+ let trace = perfetto:: Trace {
1109+ trace_events,
1110+ display_time_unit : "ms" . to_string ( ) ,
1111+ } ;
1112+
1113+ let trace_content = serde_json:: to_string_pretty ( & trace)
1114+ . context ( "Failed to serialize trace JSON" ) ?;
1115+
1116+ tokio:: fs:: write ( & trace_path, trace_content) . await . with_context (
1117+ || format ! ( "Failed to write trace file to {trace_path}" ) ,
1118+ ) ?;
1119+
1120+ info ! (
1121+ self . log,
1122+ "Wrote trace file" ;
1123+ "path" => %trace_path,
1124+ "num_events" => trace. trace_events. len( )
1125+ ) ;
1126+
1127+ Ok ( ( ) )
1128+ }
1129+
10451130 async fn collect_bundle_id (
10461131 & self ,
10471132 dir : & Utf8Path ,
@@ -2528,6 +2613,117 @@ mod test {
25282613 assert ! ( report. is_none( ) ) ;
25292614 }
25302615
2616+ #[ nexus_test( server = crate :: Server ) ]
2617+ async fn test_trace_file_generated ( cptestctx : & ControlPlaneTestContext ) {
2618+ let nexus = & cptestctx. server . server_context ( ) . nexus ;
2619+ let datastore = nexus. datastore ( ) ;
2620+ let resolver = nexus. resolver ( ) ;
2621+ let opctx = OpContext :: for_tests (
2622+ cptestctx. logctx . log . clone ( ) ,
2623+ datastore. clone ( ) ,
2624+ ) ;
2625+
2626+ // Before we can create any bundles, we need to create the
2627+ // space for them to be provisioned.
2628+ let _datasets =
2629+ TestDataset :: setup ( cptestctx, & datastore, & opctx, 1 ) . await ;
2630+
2631+ // Create a bundle to collect
2632+ let bundle = datastore
2633+ . support_bundle_create (
2634+ & opctx,
2635+ "For trace file testing" ,
2636+ nexus. id ( ) ,
2637+ None ,
2638+ )
2639+ . await
2640+ . expect ( "Couldn't allocate a support bundle" ) ;
2641+
2642+ let collector = SupportBundleCollector :: new (
2643+ datastore. clone ( ) ,
2644+ resolver. clone ( ) ,
2645+ false ,
2646+ nexus. id ( ) ,
2647+ ) ;
2648+
2649+ // Collect the bundle
2650+ let mut request = BundleRequest :: default ( ) ;
2651+ request. data_selection . insert ( BundleData :: HostInfo ( HashSet :: new ( ) ) ) ;
2652+ let report = collector
2653+ . collect_bundle ( & opctx, & request)
2654+ . await
2655+ . expect ( "Collection should have succeeded" )
2656+ . expect ( "Should have generated a report" ) ;
2657+
2658+ // Download the trace file from the bundle
2659+ let head = false ;
2660+ let range = None ;
2661+ let response = nexus
2662+ . support_bundle_download (
2663+ & opctx,
2664+ bundle. id . into ( ) ,
2665+ SupportBundleQueryType :: Path {
2666+ file_path : "meta/trace.json" . to_string ( ) ,
2667+ } ,
2668+ head,
2669+ range,
2670+ )
2671+ . await
2672+ . expect ( "Should be able to download trace file" ) ;
2673+
2674+ // Parse the trace file using our Perfetto structs
2675+ let body_bytes =
2676+ response. into_body ( ) . collect ( ) . await . unwrap ( ) . to_bytes ( ) ;
2677+ let trace: perfetto:: Trace = serde_json:: from_slice ( & body_bytes)
2678+ . expect ( "Trace file should be valid Perfetto JSON" ) ;
2679+
2680+ // Verify display time unit
2681+ assert_eq ! (
2682+ trace. display_time_unit, "ms" ,
2683+ "Display time unit should be milliseconds"
2684+ ) ;
2685+
2686+ // We should have at least the main collection steps
2687+ assert ! (
2688+ !trace. trace_events. is_empty( ) ,
2689+ "Should have at least one trace event"
2690+ ) ;
2691+
2692+ // Verify each event has the expected structure
2693+ for event in & trace. trace_events {
2694+ // Verify category
2695+ assert_eq ! (
2696+ event. cat, "bundle_collection" ,
2697+ "Event should have category 'bundle_collection'"
2698+ ) ;
2699+ // Verify phase type
2700+ assert_eq ! ( event. ph, "X" , "Event should be Complete event type" ) ;
2701+ // Verify timestamps are positive
2702+ assert ! ( event. ts >= 0 , "Event timestamp should be non-negative" ) ;
2703+ assert ! ( event. dur >= 0 , "Event duration should be non-negative" ) ;
2704+ // Verify process and thread IDs are set
2705+ assert_eq ! ( event. pid, 1 , "All events should have pid=1" ) ;
2706+ assert ! ( event. tid > 0 , "Event thread ID should be positive" ) ;
2707+ }
2708+
2709+ // Verify we have the same number of events as steps in the report
2710+ assert_eq ! (
2711+ trace. trace_events. len( ) ,
2712+ report. steps. len( ) ,
2713+ "Number of events should match number of steps"
2714+ ) ;
2715+
2716+ // Verify step names match between report and trace
2717+ let trace_names: std:: collections:: HashSet < _ > =
2718+ trace. trace_events . iter ( ) . map ( |e| e. name . as_str ( ) ) . collect ( ) ;
2719+ let report_names: std:: collections:: HashSet < _ > =
2720+ report. steps . iter ( ) . map ( |s| s. name . as_str ( ) ) . collect ( ) ;
2721+ assert_eq ! (
2722+ trace_names, report_names,
2723+ "Trace event names should match report step names"
2724+ ) ;
2725+ }
2726+
25312727 #[ nexus_test( server = crate :: Server ) ]
25322728 async fn test_collect_chunked ( cptestctx : & ControlPlaneTestContext ) {
25332729 let nexus = & cptestctx. server . server_context ( ) . nexus ;
0 commit comments