@@ -1037,11 +1037,94 @@ impl BundleCollection {
10371037 //
10381038 // Only finish if we've exhausted all possible steps and joined all spawned work.
10391039 if steps. is_empty ( ) {
1040+ // Write trace file before returning
1041+ if let Err ( err) = self . write_trace_file ( output, & report) . await {
1042+ warn ! (
1043+ self . log,
1044+ "Failed to write trace file" ;
1045+ "error" => ?err
1046+ ) ;
1047+ }
10401048 return report;
10411049 }
10421050 }
10431051 }
10441052
1053+ // Write a Perfetto Event format JSON file for visualization
1054+ async fn write_trace_file (
1055+ & self ,
1056+ output : & Utf8TempDir ,
1057+ report : & SupportBundleCollectionReport ,
1058+ ) -> anyhow:: Result < ( ) > {
1059+ let meta_dir = output. path ( ) . join ( "meta" ) ;
1060+ tokio:: fs:: create_dir_all ( & meta_dir) . await . with_context ( || {
1061+ format ! ( "Failed to create meta directory {meta_dir}" )
1062+ } ) ?;
1063+
1064+ let trace_path = meta_dir. join ( "trace.json" ) ;
1065+
1066+ // Convert steps to Perfetto Trace Event format.
1067+ // Sort steps by start time and assign each a unique sequential ID.
1068+ //
1069+ // This is necessary because the trace event format does not like
1070+ // multiple slices to overlap - so we make each slice distinct.
1071+ //
1072+ // Ideally we'd be able to correlate these with actual tokio tasks,
1073+ // but it's hard to convert tokio::task::Id to a u64 because
1074+ // of https://github.com/tokio-rs/tokio/issues/7430
1075+ let mut sorted_steps: Vec < _ > = report. steps . iter ( ) . collect ( ) ;
1076+ sorted_steps. sort_by_key ( |s| s. start ) ;
1077+
1078+ // Generate trace events - each step gets a unique ID (1, 2, 3, ...)
1079+ // based on its start time order
1080+ let trace_events: Vec < _ > = sorted_steps
1081+ . iter ( )
1082+ . enumerate ( )
1083+ . map ( |( i, step) | {
1084+ let start_us = step. start . timestamp_micros ( ) ;
1085+ let duration_us = ( step. end - step. start )
1086+ . num_microseconds ( )
1087+ . unwrap_or ( 0 )
1088+ . max ( 0 ) ;
1089+ let step_id = i + 1 ;
1090+
1091+ json ! ( {
1092+ "name" : step. name,
1093+ "cat" : "bundle_collection" ,
1094+ "ph" : "X" , // Complete event (has duration)
1095+ "ts" : start_us,
1096+ "dur" : duration_us,
1097+ "pid" : 1 ,
1098+ "tid" : step_id,
1099+ "args" : {
1100+ "status" : step. status. to_string( ) ,
1101+ }
1102+ } )
1103+ } )
1104+ . collect ( ) ;
1105+
1106+ let trace_json = json ! ( {
1107+ "traceEvents" : trace_events,
1108+ "displayTimeUnit" : "ms" ,
1109+ } ) ;
1110+
1111+ let trace_content = serde_json:: to_string_pretty ( & trace_json)
1112+ . context ( "Failed to serialize trace JSON" ) ?;
1113+
1114+ tokio:: fs:: write ( & trace_path, trace_content) . await . with_context (
1115+ || format ! ( "Failed to write trace file to {trace_path}" ) ,
1116+ ) ?;
1117+
1118+ info ! (
1119+ self . log,
1120+ "Wrote trace file" ;
1121+ "path" => %trace_path,
1122+ "num_events" => trace_events. len( )
1123+ ) ;
1124+
1125+ Ok ( ( ) )
1126+ }
1127+
10451128 async fn collect_bundle_id (
10461129 & self ,
10471130 dir : & Utf8Path ,
@@ -2528,6 +2611,130 @@ mod test {
25282611 assert ! ( report. is_none( ) ) ;
25292612 }
25302613
2614+ #[ nexus_test( server = crate :: Server ) ]
2615+ async fn test_trace_file_generated ( cptestctx : & ControlPlaneTestContext ) {
2616+ let nexus = & cptestctx. server . server_context ( ) . nexus ;
2617+ let datastore = nexus. datastore ( ) ;
2618+ let resolver = nexus. resolver ( ) ;
2619+ let opctx = OpContext :: for_tests (
2620+ cptestctx. logctx . log . clone ( ) ,
2621+ datastore. clone ( ) ,
2622+ ) ;
2623+
2624+ // Before we can create any bundles, we need to create the
2625+ // space for them to be provisioned.
2626+ let _datasets =
2627+ TestDataset :: setup ( cptestctx, & datastore, & opctx, 1 ) . await ;
2628+
2629+ // Create a bundle to collect
2630+ let bundle = datastore
2631+ . support_bundle_create (
2632+ & opctx,
2633+ "For trace file testing" ,
2634+ nexus. id ( ) ,
2635+ None ,
2636+ )
2637+ . await
2638+ . expect ( "Couldn't allocate a support bundle" ) ;
2639+
2640+ let collector = SupportBundleCollector :: new (
2641+ datastore. clone ( ) ,
2642+ resolver. clone ( ) ,
2643+ false ,
2644+ nexus. id ( ) ,
2645+ ) ;
2646+
2647+ // Collect the bundle
2648+ let mut request = BundleRequest :: default ( ) ;
2649+ request. data_selection . insert ( BundleData :: HostInfo ( HashSet :: new ( ) ) ) ;
2650+ let report = collector
2651+ . collect_bundle ( & opctx, & request)
2652+ . await
2653+ . expect ( "Collection should have succeeded" )
2654+ . expect ( "Should have generated a report" ) ;
2655+
2656+ // Download the trace file from the bundle
2657+ let head = false ;
2658+ let range = None ;
2659+ let response = nexus
2660+ . support_bundle_download (
2661+ & opctx,
2662+ bundle. id . into ( ) ,
2663+ SupportBundleQueryType :: Path {
2664+ file_path : "meta/trace.json" . to_string ( ) ,
2665+ } ,
2666+ head,
2667+ range,
2668+ )
2669+ . await
2670+ . expect ( "Should be able to download trace file" ) ;
2671+
2672+ // Parse the trace file as JSON
2673+ let body_bytes =
2674+ response. into_body ( ) . collect ( ) . await . unwrap ( ) . to_bytes ( ) ;
2675+ let trace_json: serde_json:: Value = serde_json:: from_slice ( & body_bytes)
2676+ . expect ( "Trace file should be valid JSON" ) ;
2677+
2678+ // Verify the structure matches Perfetto Trace Event format
2679+ let trace_events = trace_json
2680+ . get ( "traceEvents" )
2681+ . expect ( "Should have traceEvents field" )
2682+ . as_array ( )
2683+ . expect ( "traceEvents should be an array" ) ;
2684+
2685+ // We should have at least the main collection steps
2686+ assert ! (
2687+ !trace_events. is_empty( ) ,
2688+ "Should have at least one trace event"
2689+ ) ;
2690+
2691+ // Verify each event has the expected fields
2692+ for event in trace_events {
2693+ assert ! ( event. get( "name" ) . is_some( ) , "Event should have name" ) ;
2694+ assert_eq ! (
2695+ event. get( "cat" ) . and_then( |v| v. as_str( ) ) ,
2696+ Some ( "bundle_collection" ) ,
2697+ "Event should have category 'bundle_collection'"
2698+ ) ;
2699+ assert_eq ! (
2700+ event. get( "ph" ) . and_then( |v| v. as_str( ) ) ,
2701+ Some ( "X" ) ,
2702+ "Event should be Complete event type"
2703+ ) ;
2704+ assert ! (
2705+ event. get( "ts" ) . and_then( |v| v. as_i64( ) ) . is_some( ) ,
2706+ "Event should have timestamp"
2707+ ) ;
2708+ assert ! (
2709+ event. get( "dur" ) . and_then( |v| v. as_i64( ) ) . is_some( ) ,
2710+ "Event should have duration"
2711+ ) ;
2712+ assert ! (
2713+ event. get( "args" ) . is_some( ) ,
2714+ "Event should have args field"
2715+ ) ;
2716+ }
2717+
2718+ // Verify we have the same number of events as steps in the report
2719+ assert_eq ! (
2720+ trace_events. len( ) ,
2721+ report. steps. len( ) ,
2722+ "Number of events should match number of steps"
2723+ ) ;
2724+
2725+ // Verify step names match between report and trace
2726+ let trace_names: std:: collections:: HashSet < _ > = trace_events
2727+ . iter ( )
2728+ . filter_map ( |e| e. get ( "name" ) . and_then ( |v| v. as_str ( ) ) )
2729+ . collect ( ) ;
2730+ let report_names: std:: collections:: HashSet < _ > =
2731+ report. steps . iter ( ) . map ( |s| s. name . as_str ( ) ) . collect ( ) ;
2732+ assert_eq ! (
2733+ trace_names, report_names,
2734+ "Trace event names should match report step names"
2735+ ) ;
2736+ }
2737+
25312738 #[ nexus_test( server = crate :: Server ) ]
25322739 async fn test_collect_chunked ( cptestctx : & ControlPlaneTestContext ) {
25332740 let nexus = & cptestctx. server . server_context ( ) . nexus ;
0 commit comments