Skip to content

Commit 9f840a6

Browse files
Adding logs for step runner and task runner in agent lifecycle (#5287)
* Adding logs for step runner and task runner in agent lifecycle * refining logs in RunAsyncInternal in TaskRunner to get away from low maintanability issue for this function. * Fixing CS1505 issue for low maintainability in task runner runInternalAsync function and minor log fixes * removing duplicate log * Minor fix in format
1 parent 83e32cc commit 9f840a6

File tree

2 files changed

+100
-28
lines changed

2 files changed

+100
-28
lines changed

src/Agent.Worker/StepsRunner.cs

Lines changed: 58 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ public async Task RunAsync(IExecutionContext jobContext, IList<IStep> steps)
4545
{
4646
ArgUtil.NotNull(jobContext, nameof(jobContext));
4747
ArgUtil.NotNull(steps, nameof(steps));
48+
Trace.Entering();
4849

4950
// TaskResult:
5051
// Abandoned (Server set this.)
@@ -56,35 +57,48 @@ public async Task RunAsync(IExecutionContext jobContext, IList<IStep> steps)
5657
CancellationTokenRegistration? jobCancelRegister = null;
5758
int stepIndex = 0;
5859
jobContext.Variables.Agent_JobStatus = jobContext.Result ?? TaskResult.Succeeded;
60+
Trace.Info($"Async command completion wait initiated - processing {jobContext.AsyncCommands?.Count ?? 0} pending commands");
5961
// Wait till all async commands finish.
62+
int successfulCommandCount = 0;
6063
foreach (var command in jobContext.AsyncCommands ?? new List<IAsyncCommandContext>())
6164
{
6265
try
6366
{
6467
// wait async command to finish.
68+
Trace.Info($"Async command initiated [Command:{command.Name}, CommandType:{command.GetType().Name}]");
6569
await command.WaitAsync();
70+
successfulCommandCount++;
71+
Trace.Info($"Async command completed successfully: {command.Name}");
6672
}
6773

6874
catch (Exception ex)
6975
{
7076
// Log the error
71-
Trace.Info($"Caught exception from async command {command.Name}: {ex}");
77+
Trace.Info($"Async command failed during job initialization [Command:{command.Name}, JobId:{jobContext.Variables.System_JobId}, Error:{ex.Message}]");
7278
}
7379
}
80+
Trace.Info($"Async command completion wait finished - {successfulCommandCount} commands processed");
81+
Trace.Info("Step iteration loop initiated - beginning sequential step processing");
7482
foreach (IStep step in steps)
7583
{
76-
Trace.Info($"Processing step: DisplayName='{step.DisplayName}', ContinueOnError={step.ContinueOnError}, Enabled={step.Enabled}");
84+
Trace.Info($"Processing step {stepIndex + 1}/{steps.Count}: DisplayName='{step.DisplayName}', ContinueOnError={step.ContinueOnError}, Enabled={step.Enabled}");
7785
ArgUtil.Equal(true, step.Enabled, nameof(step.Enabled));
7886
ArgUtil.NotNull(step.ExecutionContext, nameof(step.ExecutionContext));
7987
ArgUtil.NotNull(step.ExecutionContext.Variables, nameof(step.ExecutionContext.Variables));
8088
stepIndex++;
8189

90+
Trace.Info($"ExecutionContext startup initiated for step: '{step.DisplayName}'");
8291
// Start.
8392
step.ExecutionContext.Start();
8493
var taskStep = step as ITaskRunner;
8594
if (taskStep != null)
8695
{
8796
HostContext.WritePerfCounter($"TaskStart_{taskStep.Task.Reference.Name}_{stepIndex}");
97+
Trace.Info($"Task step initiated [TaskName:{taskStep.Task.Reference.Name}, TaskId:{taskStep.Task.Reference.Id}, Version:{taskStep.Task.Reference.Version}, Stage:{taskStep.Stage}]");
98+
}
99+
else
100+
{
101+
Trace.Info($"Non-task step {step.DisplayName} started [StepType:{step.GetType().Name}, Timeout:{step.Timeout?.TotalMinutes ?? 0}min]");
88102
}
89103

90104
// Change the current job context to the step context.
@@ -96,6 +110,7 @@ public async Task RunAsync(IExecutionContext jobContext, IList<IStep> steps)
96110
List<string> expansionWarnings;
97111
step.ExecutionContext.Variables.RecalculateExpanded(out expansionWarnings);
98112
expansionWarnings?.ForEach(x => step.ExecutionContext.Warning(x));
113+
Trace.Info($"Variable expansion completed [Step:'{step.DisplayName}', Warnings:{expansionWarnings?.Count ?? 0}, Target:{step.Target?.GetType()?.Name ?? "None"}]");
99114

100115
var expressionManager = HostContext.GetService<IExpressionManager>();
101116
try
@@ -104,9 +119,11 @@ public async Task RunAsync(IExecutionContext jobContext, IList<IStep> steps)
104119
// Register job cancellation call back only if job cancellation token not been fire before each step run
105120
if (!jobContext.CancellationToken.IsCancellationRequested)
106121
{
122+
Trace.Info($"Job cancellation registration setup [Step:'{step.DisplayName}', JobCancellationRequested:False, RegistrationActive:True]");
107123
// Test the condition again. The job was canceled after the condition was originally evaluated.
108124
jobCancelRegister = jobContext.CancellationToken.Register(() =>
109125
{
126+
Trace.Info($"Job cancellation callback triggered [Step:'{step.DisplayName}', AgentShutdown:{HostContext.AgentShutdownToken.IsCancellationRequested}]");
110127
// mark job as cancelled
111128
jobContext.Result = TaskResult.Canceled;
112129
jobContext.Variables.Agent_JobStatus = jobContext.Result;
@@ -120,15 +137,19 @@ public async Task RunAsync(IExecutionContext jobContext, IList<IStep> steps)
120137
PublishTelemetry(jobContext, TaskResult.Failed.ToString(), "120");
121138
jobContext.Result = TaskResult.Failed;
122139
jobContext.Variables.Agent_JobStatus = jobContext.Result;
140+
Trace.Info($"Agent shutdown failure applied [Step:'{step.DisplayName}', FailJobEnabled:True, JobResult:Failed]");
123141
}
124142
step.ExecutionContext.Debug($"Skip Re-evaluate condition on agent shutdown.");
125143
conditionReTestResult = false;
144+
Trace.Info($"Condition re-evaluation skipped [Step:'{step.DisplayName}', Reason:AgentShutdown]");
126145
}
127146
else
128147
{
129148
try
130149
{
150+
Trace.Info($"Condition re-evaluation initiated [Step:'{step.DisplayName}', Expression:'{step.Condition}', HostTracingOnly:True]");
131151
conditionReTestResult = expressionManager.Evaluate(step.ExecutionContext, step.Condition, hostTracingOnly: true);
152+
Trace.Info($"Condition re-evaluation completed [Step:'{step.DisplayName}', Result:{conditionReTestResult.Value}]");
132153
}
133154
catch (Exception ex)
134155
{
@@ -142,7 +163,7 @@ public async Task RunAsync(IExecutionContext jobContext, IList<IStep> steps)
142163
if (!conditionReTestResult.Value)
143164
{
144165
// Cancel the step.
145-
Trace.Info("Cancel current running step.");
166+
Trace.Info($"Cancel current running step: {step.DisplayName}");
146167
step.ExecutionContext.Error(StringUtil.Loc("StepCancelled"));
147168
step.ExecutionContext.CancelToken();
148169
}
@@ -177,12 +198,14 @@ public async Task RunAsync(IExecutionContext jobContext, IList<IStep> steps)
177198
{
178199
step.ExecutionContext.Debug($"Skip evaluate condition on agent shutdown.");
179200
conditionResult = false;
201+
Trace.Info($"Condition evaluation skipped due to agent shutdown: '{step.DisplayName}'");
180202
}
181203
else
182204
{
183205
try
184206
{
185207
conditionResult = expressionManager.Evaluate(step.ExecutionContext, step.Condition);
208+
Trace.Info($"Condition evaluation completed - Result: {conditionResult.Value}, Step: '{step.DisplayName}'");
186209
}
187210
catch (Exception ex)
188211
{
@@ -198,7 +221,7 @@ public async Task RunAsync(IExecutionContext jobContext, IList<IStep> steps)
198221
{
199222
// Condition == false
200223
string skipStepMessage = "Skipping step due to condition evaluation.";
201-
Trace.Info(skipStepMessage);
224+
Trace.Info(skipStepMessage + $"[Step: '{step.DisplayName}', Reason:ConditionFalse, Expression:'{step.Condition}', StepIndex:{stepIndex}/{steps.Count}]");
202225
step.ExecutionContext.Output($"{skipStepMessage}\n{conditionResult.Trace}");
203226
step.ExecutionContext.Complete(TaskResult.Skipped, resultCode: skipStepMessage);
204227
continue;
@@ -207,17 +230,21 @@ public async Task RunAsync(IExecutionContext jobContext, IList<IStep> steps)
207230
if (conditionEvaluateError != null)
208231
{
209232
// fail the step since there is an evaluate error.
233+
Trace.Error($"Condition evaluation failure context [Step:'{step.DisplayName}', Expression:'{step.Condition}', StepIndex:{stepIndex}/{steps.Count}]");
210234
step.ExecutionContext.Error(conditionEvaluateError);
211235
step.ExecutionContext.Complete(TaskResult.Failed);
212236
}
213237
else
214238
{
239+
Trace.Info($"RunStepAsync execution initiated for step: '{step.DisplayName}'");
215240
// Run the step.
216241
await RunStepAsync(step, jobContext.CancellationToken);
242+
Trace.Info($"RunStepAsync execution completed for step: '{step.DisplayName}' - Result: {step.ExecutionContext.Result}");
217243
}
218244
}
219245
finally
220246
{
247+
Trace.Info($"Step cancellation registration cleanup [Step:'{step.DisplayName}', RegistrationActive:{jobCancelRegister != null}]");
221248
if (jobCancelRegister != null)
222249
{
223250
jobCancelRegister?.Dispose();
@@ -229,80 +256,93 @@ public async Task RunAsync(IExecutionContext jobContext, IList<IStep> steps)
229256
if (step.ExecutionContext.Result == TaskResult.SucceededWithIssues ||
230257
step.ExecutionContext.Result == TaskResult.Failed)
231258
{
232-
Trace.Info($"Update job result with current step result '{step.ExecutionContext.Result}'.");
259+
Trace.Info($"Update job result with current step result - Step: '{step.DisplayName}', StepResult: {step.ExecutionContext.Result}, PreviousJobResult: {jobContext.Result}");
233260
jobContext.Result = TaskResultUtil.MergeTaskResults(jobContext.Result, step.ExecutionContext.Result.Value);
234261
jobContext.Variables.Agent_JobStatus = jobContext.Result;
262+
Trace.Info($"Job result after merge: {jobContext.Result}");
235263
}
236264
else
237265
{
238-
Trace.Info($"No need for updating job result with current step result '{step.ExecutionContext.Result}'.");
266+
Trace.Info($"Job result unchanged - Step: '{step.DisplayName}', StepResult: {step.ExecutionContext.Result}, JobResultKept:{jobContext.Result}");
239267
}
240268

241269
if (taskStep != null)
242270
{
243271
HostContext.WritePerfCounter($"TaskCompleted_{taskStep.Task.Reference.Name}_{stepIndex}");
272+
Trace.Info($"Task step completion - TaskName:{taskStep.Task.Reference.Name}, StepIndex:{stepIndex}/{steps.Count}, Result: {step.ExecutionContext.Result}, TaskStage:{taskStep.Stage}");
244273
}
245274

246-
Trace.Info($"Current state: job state = '{jobContext.Result}'");
247275
}
276+
Trace.Info($"Step iteration loop completed - All {steps.Count} steps processed, Final job result: {jobContext.Result}");
248277
}
249278

250279
private async Task RunStepAsync(IStep step, CancellationToken jobCancellationToken)
251280
{
281+
Trace.Info($"Individual step execution initiated: '{step.DisplayName}'");
252282
// Start the step.
253-
Trace.Info("Starting the step.");
283+
254284
step.ExecutionContext.Section(StringUtil.Loc("StepStarting", step.DisplayName));
255285
step.ExecutionContext.SetTimeout(timeout: step.Timeout);
256286

257287
step.ExecutionContext.Variables.Set(Constants.Variables.Task.SkipTranslatorForCheckout, Boolean.FalseString);
258288

289+
Trace.Info($"UTF-8 codepage switching initiated for step: '{step.DisplayName}'");
259290
// Windows may not be on the UTF8 codepage; try to fix that
260291
await SwitchToUtf8Codepage(step);
292+
Trace.Info($"UTF-8 codepage switching completed for step: '{step.DisplayName}'");
293+
// updated code log - Add codepage switching context and platform info
294+
Trace.Info($"Codepage configuration [Platform:{(PlatformUtil.RunningOnWindows ? "Windows" : "Unix")}, RetainEncoding:{step.ExecutionContext.Variables.Retain_Default_Encoding}, CurrentCodepage:{Console.InputEncoding?.CodePage}]");
261295

262296
try
263297
{
298+
Trace.Info($"Step main execution initiated: '{step.DisplayName}'");
264299
await step.RunAsync();
300+
Trace.Info($"Step main execution completed successfully: '{step.DisplayName}'");
265301
}
266302
catch (OperationCanceledException ex)
267303
{
268304
if (step.ExecutionContext.CancellationToken.IsCancellationRequested &&
269305
!jobCancellationToken.IsCancellationRequested)
270306
{
271-
Trace.Error($"Caught timeout exception from step: {ex.Message}");
307+
Trace.Error($"Caught timeout exception from step: Step: {step.DisplayName}, Exception: {ex.Message}, ConfiguredTimeout:{step.Timeout?.TotalMinutes ?? 0}min");
272308
step.ExecutionContext.Error(StringUtil.Loc("StepTimedOut"));
273309
step.ExecutionContext.Result = TaskResult.Failed;
274310
}
275311
else if (AgentKnobs.FailJobWhenAgentDies.GetValue(step.ExecutionContext).AsBoolean() &&
276312
HostContext.AgentShutdownToken.IsCancellationRequested)
277313
{
278314
PublishTelemetry(step.ExecutionContext, TaskResult.Failed.ToString(), "122");
279-
Trace.Error($"Caught Agent Shutdown exception from step: {ex.Message}");
315+
Trace.Error($"Caught Agent Shutdown exception from step: Step:'{step.DisplayName}', ShutdownReason:{HostContext.AgentShutdownReason}, Exception: {ex.Message}");
280316
step.ExecutionContext.Error(ex);
281317
step.ExecutionContext.Result = TaskResult.Failed;
282318
}
283319
else
284320
{
285321
// Log the exception and cancel the step.
286-
Trace.Error($"Caught cancellation exception from step: {ex}");
322+
Trace.Error($"Caught cancellation exception from step: Step:{step.DisplayName}, CancellationSource:JobLevel, JobCancelled:{jobCancellationToken.IsCancellationRequested}");
287323
step.ExecutionContext.Error(ex);
288324
step.ExecutionContext.Result = TaskResult.Canceled;
289325
}
290326
}
291327
catch (Exception ex)
292328
{
329+
Trace.Error($"Caught exception from step: - Step: '{step.DisplayName}', Exception: {ex}");
293330
// Log the error and fail the step.
294-
Trace.Error($"Caught exception from step: {ex}");
295331
step.ExecutionContext.Error(ex);
296332
step.ExecutionContext.Result = TaskResult.Failed;
297333
}
298334

335+
Trace.Info($"Async command completion wait initiated for step: '{step.DisplayName}' - Commands: {step.ExecutionContext.AsyncCommands?.Count ?? 0}");
299336
// Wait till all async commands finish.
300337
foreach (var command in step.ExecutionContext.AsyncCommands ?? new List<IAsyncCommandContext>())
301338
{
302339
try
303340
{
304341
// wait async command to finish.
342+
// check this - add log to mark start of this call as well, also add required meatadata to log for it
343+
Trace.Info($"Step async command initiated [Command:{command.Name}, Step:'{step.DisplayName}', CommandType:{command.GetType().Name}]");
305344
await command.WaitAsync();
345+
Trace.Info($"Step async command completion [Command:{command.Name}]");
306346
}
307347
catch (OperationCanceledException ex)
308348
{
@@ -346,27 +386,30 @@ private async Task RunStepAsync(IStep step, CancellationToken jobCancellationTok
346386
step.ExecutionContext.CommandResult = TaskResultUtil.MergeTaskResults(step.ExecutionContext.CommandResult, TaskResult.Failed);
347387
}
348388
}
389+
Trace.Info($"Step async command summary [Step:'{step.DisplayName}', TotalCommands:{step.ExecutionContext.AsyncCommands?.Count ?? 0}, CommandResult:{step.ExecutionContext.CommandResult}]");
349390

350391
// Merge executioncontext result with command result
351392
if (step.ExecutionContext.CommandResult != null)
352393
{
353394
step.ExecutionContext.Result = TaskResultUtil.MergeTaskResults(step.ExecutionContext.Result, step.ExecutionContext.CommandResult.Value);
395+
Trace.Info($"Step result merged with command result - Step: {step.DisplayName}, CommandResult:{step.ExecutionContext.CommandResult} FinalResult: {step.ExecutionContext.Result}");
354396
}
355397

356-
// Fixup the step result if ContinueOnError.
398+
// Fixup the step result if ContinueOnError.
357399
if (step.ExecutionContext.Result == TaskResult.Failed && step.ContinueOnError)
358400
{
359401
step.ExecutionContext.Result = TaskResult.SucceededWithIssues;
360-
Trace.Info($"Updated step result: {step.ExecutionContext.Result}");
402+
Trace.Info($"Step result updated due to ContinueOnError: '{step.DisplayName}', Result: Failed -> SucceededWithIssues");
361403
}
362404
else
363405
{
364-
Trace.Info($"Step result: {step.ExecutionContext.Result}");
406+
Trace.Info($"Step result: '{step.DisplayName}', Result: {step.ExecutionContext.Result}");
365407
}
366408

367409
// Complete the step context.
368410
step.ExecutionContext.Section(StringUtil.Loc("StepFinishing", step.DisplayName));
369411
step.ExecutionContext.Complete();
412+
Trace.Info($"Step execution summary - Step: '{step.DisplayName}', FinalResult: {step.ExecutionContext.Result}");
370413
}
371414

372415
private async Task SwitchToUtf8Codepage(IStep step)

0 commit comments

Comments
 (0)