Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
223 changes: 218 additions & 5 deletions PolyPilot.Tests/ChatExperienceSafetyTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,14 @@ private static void InvokeFlushCurrentResponse(CopilotService svc, object sessio
method.Invoke(svc, new object?[] { sessionState });
}

/// <summary>Invokes the private ClearFlushedReplayDedup helper to simulate a tool/sub-turn boundary.</summary>
private static void InvokeClearFlushedReplayDedup(object sessionState)
{
var method = typeof(CopilotService).GetMethod("ClearFlushedReplayDedup",
BindingFlags.NonPublic | BindingFlags.Static)!;
method.Invoke(null, new[] { sessionState });
}

/// <summary>Gets a field from SessionState by name.</summary>
private static T GetField<T>(object state, string fieldName)
{
Expand Down Expand Up @@ -318,6 +326,38 @@ public async Task CompleteResponse_FlushesContentToHistory()
Assert.Contains("model's response text", lastMessage.Content);
}

/// <summary>
/// Identical assistant text across DIFFERENT turns must still be persisted.
/// The replay dedup guard should only suppress content already flushed in the
/// current turn, not a legitimate repeated reply like "Done." in a later turn.
/// </summary>
[Fact]
public async Task CompleteResponse_IdenticalCrossTurnReply_IsStillPersisted()
{
var svc = CreateService();
await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
var session = await svc.CreateSessionAsync("cross-turn-complete-test");

var state = GetSessionState(svc, "cross-turn-complete-test");
session.IsProcessing = true;
SetField(state, "SendingFlag", 1);

session.History.Add(ChatMessage.AssistantMessage("Done."));
var historyBefore = session.History.Count;

GetCurrentResponse(state).Append("Done.");

var tcs = new TaskCompletionSource<string>(TaskCreationOptions.RunContinuationsAsynchronously);
SetResponseCompletion(state, tcs);

InvokeCompleteResponse(svc, state, null);

Assert.Equal(historyBefore + 1, session.History.Count);
Assert.Equal("Done.", session.History.Last().Content);
Assert.True(tcs.Task.IsCompleted);
Assert.Equal("Done.", tcs.Task.Result);
}

/// <summary>
/// CompleteResponse must include FlushedResponse (from mid-turn flushes on TurnEnd)
/// in the TCS result. Without this, orchestrator dispatch gets empty string.
Expand Down Expand Up @@ -355,6 +395,69 @@ public async Task CompleteResponse_IncludesFlushedResponseInTcsResult()
Assert.Contains("Second sub-turn continuation", result);
}

/// <summary>
/// If the SDK replays the exact text that was already flushed earlier in the SAME turn,
/// CompleteResponse must not duplicate it in either History or the TCS result.
/// </summary>
[Fact]
public async Task CompleteResponse_SameTurnReplay_DoesNotDuplicateHistoryOrTcs()
{
var svc = CreateService();
await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
var session = await svc.CreateSessionAsync("same-turn-replay-test");

var state = GetSessionState(svc, "same-turn-replay-test");
session.IsProcessing = true;
SetField(state, "SendingFlag", 1);

GetCurrentResponse(state).Append("Already flushed content");
InvokeFlushCurrentResponse(svc, state);
var historyBefore = session.History.Count;

GetCurrentResponse(state).Append("Already flushed content");

var tcs = new TaskCompletionSource<string>(TaskCreationOptions.RunContinuationsAsynchronously);
SetResponseCompletion(state, tcs);

InvokeCompleteResponse(svc, state, null);

Assert.Equal(historyBefore, session.History.Count);
Assert.True(tcs.Task.IsCompleted);
Assert.Equal("Already flushed content", tcs.Task.Result);
}

/// <summary>
/// Same-turn replay dedup must still work for ordinary multi-paragraph/model-formatted
/// responses that contain "\n\n" inside the content body.
/// </summary>
[Fact]
public async Task CompleteResponse_SameTurnReplay_MultiParagraphContent_DoesNotDuplicate()
{
var svc = CreateService();
await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
var session = await svc.CreateSessionAsync("same-turn-replay-multipara");

var state = GetSessionState(svc, "same-turn-replay-multipara");
session.IsProcessing = true;
SetField(state, "SendingFlag", 1);

const string content = "First paragraph.\n\n```csharp\nConsole.WriteLine(\"hi\");\n```\n\nFinal paragraph.";
GetCurrentResponse(state).Append(content);
InvokeFlushCurrentResponse(svc, state);
var historyBefore = session.History.Count;

GetCurrentResponse(state).Append(content);

var tcs = new TaskCompletionSource<string>(TaskCreationOptions.RunContinuationsAsynchronously);
SetResponseCompletion(state, tcs);

InvokeCompleteResponse(svc, state, null);

Assert.Equal(historyBefore, session.History.Count);
Assert.True(tcs.Task.IsCompleted);
Assert.Equal(content, tcs.Task.Result);
}

/// <summary>
/// CompleteResponse fires OnSessionComplete so orchestrator loops can unblock.
/// Without this (INV-O4), multi-agent workers hang forever waiting for completion.
Expand Down Expand Up @@ -712,8 +815,8 @@ public async Task FlushCurrentResponse_AddsToHistory()
}

/// <summary>
/// FlushCurrentResponse dedup guard: if the last assistant message has identical content,
/// the flush is skipped to prevent duplicates on session resume.
/// FlushCurrentResponse dedup guard: if the exact same segment was already flushed in
/// the CURRENT turn, the replay is skipped to prevent duplicates on resume/IDLE-DEFER.
/// </summary>
[Fact]
public async Task FlushCurrentResponse_DedupGuard_SkipsDuplicate()
Expand All @@ -724,11 +827,12 @@ public async Task FlushCurrentResponse_DedupGuard_SkipsDuplicate()

var state = GetSessionState(svc, "dedup-test");

// Add a message that looks like it was already flushed
session.History.Add(ChatMessage.AssistantMessage("Already flushed content"));
// Simulate the current turn already flushing this exact segment once.
GetCurrentResponse(state).Append("Already flushed content");
InvokeFlushCurrentResponse(svc, state);
var historyCountAfterFirst = session.History.Count;

// Simulate the same content appearing in CurrentResponse (SDK replay on resume)
// Simulate the same content appearing in CurrentResponse again (SDK replay)
GetCurrentResponse(state).Append("Already flushed content");

// Act
Expand All @@ -738,6 +842,80 @@ public async Task FlushCurrentResponse_DedupGuard_SkipsDuplicate()
Assert.Equal(historyCountAfterFirst, session.History.Count);
}

/// <summary>
/// Same-turn flush dedup must treat embedded paragraph breaks as normal content, not as
/// separators between separately flushed segments.
/// </summary>
[Fact]
public async Task FlushCurrentResponse_DedupGuard_MultiParagraphContent_SkipsDuplicate()
{
var svc = CreateService();
await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
var session = await svc.CreateSessionAsync("dedup-multipara-test");

var state = GetSessionState(svc, "dedup-multipara-test");

const string content = "Overview:\n\n- first item\n- second item\n\nDone.";
GetCurrentResponse(state).Append(content);
InvokeFlushCurrentResponse(svc, state);
var historyCountAfterFirst = session.History.Count;

GetCurrentResponse(state).Append(content);

InvokeFlushCurrentResponse(svc, state);

Assert.Equal(historyCountAfterFirst, session.History.Count);
}

/// <summary>
/// A brand-new turn that happens to produce the same assistant text as the prior turn
/// must still be preserved. Dedup is same-turn only.
/// </summary>
[Fact]
public async Task FlushCurrentResponse_IdenticalCrossTurnReply_IsStillPersisted()
{
var svc = CreateService();
await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
var session = await svc.CreateSessionAsync("cross-turn-flush-test");

var state = GetSessionState(svc, "cross-turn-flush-test");
session.History.Add(ChatMessage.AssistantMessage("Done."));
var historyBefore = session.History.Count;

GetCurrentResponse(state).Append("Done.");
InvokeFlushCurrentResponse(svc, state);

Assert.Equal(historyBefore + 1, session.History.Count);
Assert.Equal("Done.", session.History.Last().Content);
}

/// <summary>
/// A later same-turn sub-turn may legitimately produce the same short text again after a
/// tool/sub-turn boundary. That follow-up response must not be mistaken for an SDK replay.
/// </summary>
[Fact]
public async Task FlushCurrentResponse_IdenticalSameTurnAfterBoundary_IsStillPersisted()
{
var svc = CreateService();
await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
var session = await svc.CreateSessionAsync("same-turn-after-boundary");

var state = GetSessionState(svc, "same-turn-after-boundary");

GetCurrentResponse(state).Append("Done.");
InvokeFlushCurrentResponse(svc, state);
var historyAfterFirst = session.History.Count;

// Simulate a tool/sub-turn boundary before the assistant emits the same text again.
InvokeClearFlushedReplayDedup(state);

GetCurrentResponse(state).Append("Done.");
InvokeFlushCurrentResponse(svc, state);

Assert.Equal(historyAfterFirst + 1, session.History.Count);
Assert.Equal("Done.", session.History.Last().Content);
}

/// <summary>
/// FlushCurrentResponse accumulates text in FlushedResponse so CompleteResponse
/// can include it in the TCS result for orchestrator dispatch.
Expand Down Expand Up @@ -846,6 +1024,41 @@ public void CompleteResponse_Source_ClearsSendingFlag()
Assert.Contains("SendingFlag", afterCR);
}

/// <summary>
/// The UI must suppress the live streaming bubble once that exact assistant text has
/// already been flushed into History. Otherwise IDLE-DEFER sessions render the same
/// answer twice until the next prompt clears the streaming cache.
/// </summary>
[Fact]
public void ChatMessageList_Source_SuppressesStreamingDuplicateAfterFlush()
{
var source = File.ReadAllText(
Path.Combine(GetRepoRoot(), "PolyPilot", "Components", "ChatMessageList.razor"));

Assert.Contains("private bool ShouldShowStreamingContent()", source);
Assert.Contains("NormalizeStreamingText(lastAssistant?.Content)", source);
Assert.Contains("NormalizeStreamingText(StreamingContent)", source);
}

/// <summary>
/// Draft restore must not clobber newer user typing with a stale cached draft during
/// normal render cycles. The browser keeps a live draft map and restore logic skips
/// overwriting text that diverged from the last restored value.
/// </summary>
[Fact]
public void DraftRestore_Source_PreservesLiveTyping()
{
var indexHtml = File.ReadAllText(
Path.Combine(GetRepoRoot(), "PolyPilot", "wwwroot", "index.html"));
var dashboard = File.ReadAllText(
Path.Combine(GetRepoRoot(), "PolyPilot", "Components", "Pages", "Dashboard.razor"));

Assert.Contains("window.__liveDrafts", dashboard);
Assert.Contains("hasDivergedUserText", indexHtml);
Assert.Contains("current !== desired && current !== lastRestored", indexHtml);
Assert.Contains("delete window.__liveDrafts[elementId]", indexHtml);
}

/// <summary>
/// The "Session not found" reconnect path must include McpServers and SkillDirectories
/// in the fresh session config (PR #330 regression guard).
Expand Down
93 changes: 93 additions & 0 deletions PolyPilot.Tests/SessionPersistenceTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1671,4 +1671,97 @@ public void Merge_NameCollision_MissingDirStillExcluded()
Assert.Single(result);
Assert.Equal("new-id", result[0].SessionId);
}

[Fact]
public void Merge_NameCollision_DifferentGroupId_WithExplicitRecovery_DropsPersistedEntry()
{
// When a session is moved to a new group during scattered team reconstruction
// and then recreated with a new SessionId, the persisted entry from the old group
// should be silently dropped only if the replacement explicitly records that it
// recovered history from the old session.
var active = new List<ActiveSessionEntry>
{
new() { SessionId = "new-id", DisplayName = "Copilot Cli-worker-1", Model = "m",
WorkingDirectory = "/w", GroupId = "new-group-id", RecoveredFromSessionId = "old-id" }
};
var persisted = new List<ActiveSessionEntry>
{
new() { SessionId = "old-id", DisplayName = "Copilot Cli-worker-1", Model = "m",
WorkingDirectory = "/w", GroupId = "old-group-id" }
};

var result = CopilotService.MergeSessionEntries(active, persisted, new HashSet<string>(), new HashSet<string>(), _ => true);

// Only the active entry should remain — no "(previous)" duplicate
Assert.Single(result);
Assert.Equal("new-id", result[0].SessionId);
Assert.Equal("Copilot Cli-worker-1", result[0].DisplayName);
}

[Fact]
public void Merge_NameCollision_DifferentGroupId_WithoutRecoveryMarker_KeepsPrevious()
{
var active = new List<ActiveSessionEntry>
{
new() { SessionId = "new-id", DisplayName = "Copilot Cli-worker-1", Model = "m",
WorkingDirectory = "/w", GroupId = "new-group-id" }
};
var persisted = new List<ActiveSessionEntry>
{
new() { SessionId = "old-id", DisplayName = "Copilot Cli-worker-1", Model = "m",
WorkingDirectory = "/w", GroupId = "old-group-id" }
};

var result = CopilotService.MergeSessionEntries(active, persisted, new HashSet<string>(), new HashSet<string>(), _ => true);

Assert.Equal(2, result.Count);
Assert.Equal("Copilot Cli-worker-1", result[0].DisplayName);
Assert.Equal("Copilot Cli-worker-1 (previous)", result[1].DisplayName);
Assert.Equal("old-id", result[1].SessionId);
}

[Fact]
public void Merge_NameCollision_SameGroupId_StillCreatesPrevious()
{
// When the collision happens within the same group (e.g., reconnect replaced
// the session), the old entry should still be preserved as "(previous)".
var active = new List<ActiveSessionEntry>
{
new() { SessionId = "new-id", DisplayName = "MyWorker", Model = "m",
WorkingDirectory = "/w", GroupId = "same-group" }
};
var persisted = new List<ActiveSessionEntry>
{
new() { SessionId = "old-id", DisplayName = "MyWorker", Model = "m",
WorkingDirectory = "/w", GroupId = "same-group" }
};

var result = CopilotService.MergeSessionEntries(active, persisted, new HashSet<string>(), new HashSet<string>(), _ => true);

Assert.Equal(2, result.Count);
Assert.Equal("MyWorker", result[0].DisplayName);
Assert.Equal("MyWorker (previous)", result[1].DisplayName);
}

[Fact]
public void Merge_NameCollision_NullGroupIds_StillCreatesPrevious()
{
// When GroupId is null on either side (legacy entries), fall back to
// the existing "(previous)" behavior — don't silently drop.
var active = new List<ActiveSessionEntry>
{
new() { SessionId = "new-id", DisplayName = "MyWorker", Model = "m",
WorkingDirectory = "/w", GroupId = null }
};
var persisted = new List<ActiveSessionEntry>
{
new() { SessionId = "old-id", DisplayName = "MyWorker", Model = "m",
WorkingDirectory = "/w", GroupId = null }
};

var result = CopilotService.MergeSessionEntries(active, persisted, new HashSet<string>(), new HashSet<string>(), _ => true);

Assert.Equal(2, result.Count);
Assert.Equal("MyWorker (previous)", result[1].DisplayName);
}
}
21 changes: 21 additions & 0 deletions PolyPilot.Tests/SessionStabilityTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,27 @@ public void ForceCompleteProcessing_SkipsIfNotProcessing()
Assert.Contains("!state.Info.IsProcessing", method);
}

[Fact]
public void ForceCompleteProcessing_BoundsAbortAsyncTimeout()
{
var source = File.ReadAllText(TestPaths.OrganizationCs);
var method = ExtractMethod(source, "Task ForceCompleteProcessingAsync");

Assert.Contains("ForceCompleteAbortTimeoutSeconds", source);
Assert.Contains("new CancellationTokenSource(TimeSpan.FromSeconds(ForceCompleteAbortTimeoutSeconds))", method);
Assert.Contains("await session.AbortAsync(abortCts.Token);", method);
Assert.Contains("OperationCanceledException", method);
}

[Fact]
public void OrchestratorTimeout_ResultCollection_PreservesWorkerNames()
{
var source = File.ReadAllText(TestPaths.OrganizationCs);

Assert.Contains("var workerName = i < assignments.Count ? assignments[i].WorkerName : \"unknown\";", source);
Assert.DoesNotContain("new WorkerResult(\"unknown\", null, false", source);
}

// ─── Mixed Worker Success/Failure Synthesis Tests ───

[Fact]
Expand Down
Loading