diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows/Checkpointing/FileSystemJsonCheckpointStore.cs b/dotnet/src/Microsoft.Agents.AI.Workflows/Checkpointing/FileSystemJsonCheckpointStore.cs index 543fdeb530..c47298f112 100644 --- a/dotnet/src/Microsoft.Agents.AI.Workflows/Checkpointing/FileSystemJsonCheckpointStore.cs +++ b/dotnet/src/Microsoft.Agents.AI.Workflows/Checkpointing/FileSystemJsonCheckpointStore.cs @@ -5,11 +5,14 @@ using System.IO; using System.Text; using System.Text.Json; +using System.Text.Json.Serialization.Metadata; using System.Threading; using System.Threading.Tasks; namespace Microsoft.Agents.AI.Workflows.Checkpointing; +internal record CheckpointFileIndexEntry(CheckpointInfo CheckpointInfo, string FileName); + /// /// Provides a file system-based implementation of a JSON checkpoint store that persists checkpoint data and index /// information to disk using JSON files. @@ -28,6 +31,8 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos internal DirectoryInfo Directory { get; } internal HashSet CheckpointIndex { get; } + private static JsonTypeInfo EntryTypeInfo => WorkflowsJsonUtilities.JsonContext.Default.CheckpointFileIndexEntry; + /// /// Initializes a new instance of the class that uses the specified directory /// @@ -64,9 +69,11 @@ public FileSystemJsonCheckpointStore(DirectoryInfo directory) using StreamReader reader = new(this._indexFile, encoding: Encoding.UTF8, detectEncodingFromByteOrderMarks: false, BufferSize, leaveOpen: true); while (reader.ReadLine() is string line) { - if (JsonSerializer.Deserialize(line, KeyTypeInfo) is { } info) + if (JsonSerializer.Deserialize(line, EntryTypeInfo) is { } entry) { - this.CheckpointIndex.Add(info); + // We never actually use the file names from the index entries since they can be derived from the CheckpointInfo, but it is useful to + // have the UrlEncoded file names in the index file for human readability + this.CheckpointIndex.Add(entry.CheckpointInfo); } } } @@ -93,8 +100,14 @@ private void CheckDisposed() } } - private string GetFileNameForCheckpoint(string sessionId, CheckpointInfo key) - => Path.Combine(this.Directory.FullName, $"{sessionId}_{key.CheckpointId}.json"); + internal string GetFileNameForCheckpoint(string sessionId, CheckpointInfo key) + { + string protoPath = $"{sessionId}_{key.CheckpointId}.json"; + + // Escape the protoPath to ensure it is a valid file name, especially if sessionId or CheckpointId contain path separators, etc. + return Uri.EscapeDataString(protoPath) // This takes care of most of the invalid path characters + .Replace(".", "%2E"); // This takes care of escaping the root folder, since EscapeDataString does not escape dots + } private CheckpointInfo GetUnusedCheckpointInfo(string sessionId) { @@ -116,13 +129,16 @@ public override async ValueTask CreateCheckpointAsync(string ses CheckpointInfo key = this.GetUnusedCheckpointInfo(sessionId); string fileName = this.GetFileNameForCheckpoint(sessionId, key); + string filePath = Path.Combine(this.Directory.FullName, fileName); + try { - using Stream checkpointStream = File.Open(fileName, FileMode.Create, FileAccess.Write, FileShare.None); + using Stream checkpointStream = File.Open(filePath, FileMode.Create, FileAccess.Write, FileShare.None); using Utf8JsonWriter jsonWriter = new(checkpointStream, new JsonWriterOptions() { Indented = false }); value.WriteTo(jsonWriter); - JsonSerializer.Serialize(this._indexFile!, key, KeyTypeInfo); + CheckpointFileIndexEntry entry = new(key, fileName); + JsonSerializer.Serialize(this._indexFile!, entry, EntryTypeInfo); byte[] bytes = Encoding.UTF8.GetBytes(Environment.NewLine); await this._indexFile!.WriteAsync(bytes, 0, bytes.Length, CancellationToken.None).ConfigureAwait(false); await this._indexFile!.FlushAsync(CancellationToken.None).ConfigureAwait(false); @@ -136,7 +152,7 @@ public override async ValueTask CreateCheckpointAsync(string ses try { // try to clean up after ourselves - File.Delete(fileName); + File.Delete(filePath); } catch { } @@ -149,6 +165,7 @@ public override async ValueTask RetrieveCheckpointAsync(string sess { this.CheckDisposed(); string fileName = this.GetFileNameForCheckpoint(sessionId, key); + string filePath = Path.Combine(this.Directory.FullName, fileName); if (!this.CheckpointIndex.Contains(key) || !File.Exists(fileName)) @@ -156,7 +173,7 @@ public override async ValueTask RetrieveCheckpointAsync(string sess throw new KeyNotFoundException($"Checkpoint '{key.CheckpointId}' not found in store at '{this.Directory.FullName}'."); } - using FileStream checkpointFileStream = File.Open(fileName, FileMode.Open, FileAccess.Read, FileShare.Read); + using FileStream checkpointFileStream = File.Open(filePath, FileMode.Open, FileAccess.Read, FileShare.Read); using JsonDocument document = await JsonDocument.ParseAsync(checkpointFileStream).ConfigureAwait(false); return document.RootElement.Clone(); diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows/WorkflowsJsonUtilities.cs b/dotnet/src/Microsoft.Agents.AI.Workflows/WorkflowsJsonUtilities.cs index 08d1dcbcbb..4a94961522 100644 --- a/dotnet/src/Microsoft.Agents.AI.Workflows/WorkflowsJsonUtilities.cs +++ b/dotnet/src/Microsoft.Agents.AI.Workflows/WorkflowsJsonUtilities.cs @@ -71,6 +71,7 @@ private static JsonSerializerOptions CreateDefaultOptions() [JsonSerializable(typeof(PortableValue))] [JsonSerializable(typeof(PortableMessageEnvelope))] [JsonSerializable(typeof(InMemoryCheckpointManager))] + [JsonSerializable(typeof(CheckpointFileIndexEntry))] // Runtime State Types [JsonSerializable(typeof(ScopeKey))] diff --git a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/FileSystemJsonCheckpointStoreTests.cs b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/FileSystemJsonCheckpointStoreTests.cs index d21f9af035..10fbfddd64 100644 --- a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/FileSystemJsonCheckpointStoreTests.cs +++ b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/FileSystemJsonCheckpointStoreTests.cs @@ -9,49 +9,173 @@ namespace Microsoft.Agents.AI.Workflows.UnitTests; -public sealed class FileSystemJsonCheckpointStoreTests +internal sealed class TempDirectory : IDisposable { - [Fact] - public async Task CreateCheckpointAsync_ShouldPersistIndexToDiskBeforeDisposeAsync() + public DirectoryInfo DirectoryInfo { get; } + + public TempDirectory() { - // Arrange - DirectoryInfo tempDir = new(Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString())); - FileSystemJsonCheckpointStore? store = null; + string tempDirPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + this.DirectoryInfo = Directory.CreateDirectory(tempDirPath); + } + + public void Dispose() + { + this.DisposeInternal(); + GC.SuppressFinalize(this); + } + + private void DisposeInternal() + { + if (this.DirectoryInfo.Exists) + { + try + { + // Best efforts + this.DirectoryInfo.Delete(recursive: true); + } + catch { } + } + } + + ~TempDirectory() + { + // Best efforts + this.DisposeInternal(); + } + + public static implicit operator DirectoryInfo(TempDirectory tempDirectory) => tempDirectory.DirectoryInfo; + + public string FullName => this.DirectoryInfo.FullName; - try + public bool IsParentOf(FileInfo candidate) + { + if (candidate.Directory is null) { - store = new(tempDir); - string runId = Guid.NewGuid().ToString("N"); - JsonElement testData = JsonSerializer.SerializeToElement(new { test = "data" }); - - // Act - CheckpointInfo checkpoint = await store.CreateCheckpointAsync(runId, testData); - - // Assert - Check the file size before disposing to verify data was flushed to disk - // The index.jsonl file is held exclusively by the store, so we check via FileInfo - string indexPath = Path.Combine(tempDir.FullName, "index.jsonl"); - FileInfo indexFile = new(indexPath); - indexFile.Refresh(); - long fileSizeBeforeDispose = indexFile.Length; - - // Data should already be on disk (file size > 0) before we dispose - fileSizeBeforeDispose.Should().BeGreaterThan(0, "index.jsonl should be flushed to disk after CreateCheckpointAsync"); - - // Dispose to release file lock before final verification - store.Dispose(); - store = null; - - string[] lines = File.ReadAllLines(indexPath); - lines.Should().HaveCount(1); - lines[0].Should().Contain(checkpoint.CheckpointId); + return false; } - finally + + if (candidate.Directory.FullName == this.DirectoryInfo.FullName) + { + return true; + } + + return this.IsParentOf(candidate.Directory); + } + + public bool IsParentOf(DirectoryInfo candidate) + { + while (candidate.Parent is not null) { - store?.Dispose(); - if (tempDir.Exists) + if (candidate.Parent.FullName == this.DirectoryInfo.FullName) { - tempDir.Delete(recursive: true); + return true; } + + candidate = candidate.Parent; } + + return false; + } +} +public sealed class FileSystemJsonCheckpointStoreTests +{ + public static JsonElement TestData => JsonSerializer.SerializeToElement(new { test = "data" }); + + [Fact] + public async Task CreateCheckpointAsync_ShouldPersistIndexToDiskBeforeDisposeAsync() + { + // Arrange + using TempDirectory tempDirectory = new(); + using FileSystemJsonCheckpointStore? store = new(tempDirectory); + + string runId = Guid.NewGuid().ToString("N"); + + // Act + CheckpointInfo checkpoint = await store.CreateCheckpointAsync(runId, TestData); + + // Assert - Check the file size before disposing to verify data was flushed to disk + // The index.jsonl file is held exclusively by the store, so we check via FileInfo + string indexPath = Path.Combine(tempDirectory.FullName, "index.jsonl"); + FileInfo indexFile = new(indexPath); + indexFile.Refresh(); + long fileSizeBeforeDispose = indexFile.Length; + + // Data should already be on disk (file size > 0) before we dispose + fileSizeBeforeDispose.Should().BeGreaterThan(0, "index.jsonl should be flushed to disk after CreateCheckpointAsync"); + + // Dispose to release file lock before final verification + store.Dispose(); + + string[] lines = File.ReadAllLines(indexPath); + lines.Should().HaveCount(1); + lines[0].Should().Contain(checkpoint.CheckpointId); + } + + private async ValueTask Run_EscapeRootFolderTestAsync(string escapingPath) + { + // Arrange + using TempDirectory tempDirectory = new(); + using FileSystemJsonCheckpointStore store = new(tempDirectory); + + string naivePath = Path.Combine(tempDirectory.DirectoryInfo.FullName, escapingPath); + + // Check that the naive path is actually outside the temp directory to validate the test is meaningful + FileInfo naiveCheckpointFile = new(naivePath); + tempDirectory.IsParentOf(naiveCheckpointFile).Should().BeFalse("The naive path should be outside the root folder to validate that escaping is necessary."); + + // Act + CheckpointInfo checkpointInfo = await store.CreateCheckpointAsync(escapingPath, TestData); + + // Assert + string naivePathWithCheckpointId = Path.Combine(tempDirectory.DirectoryInfo.FullName, $"{escapingPath}_{checkpointInfo.CheckpointId}.json"); + new FileInfo(naivePathWithCheckpointId).Exists.Should().BeFalse("The naive path should not be used to save a checkpoint file."); + + string actualFileName = store.GetFileNameForCheckpoint(escapingPath, checkpointInfo); + string actualFilePath = Path.Combine(tempDirectory.DirectoryInfo.FullName, actualFileName); + FileInfo actualFile = new(actualFilePath); + + tempDirectory.IsParentOf(actualFile).Should().BeTrue("The actual checkpoint should be saved inside the root folder."); + actualFile.Exists.Should().BeTrue("The actual path should be used to save a checkpoint file."); + } + + [Fact] + public async Task CreateCheckpointAsync_ShouldNotEscapeRootFolderAsync() + { + // The SessionId is used as part of the file name, but if it contains path characters such as /.. it can escape the root folder. + // Testing that such characters are escaped properly to prevent directory traversal attacks, etc. + + await this.Run_EscapeRootFolderTestAsync("../valid_suffix"); + +#if !NETFRAMEWORK + if (OperatingSystem.IsWindows()) + { + // Windows allows both \ and / as path separators, so we test both + await this.Run_EscapeRootFolderTestAsync("..\\valid_suffix"); + } +#else + // .NET Framework is always on Windows + await this.Run_EscapeRootFolderTestAsync("..\\valid_suffix"); +#endif + } + + private const string InvalidPathCharsWin32 = "\\/:*?\"<>|"; + private const string InvalidPathCharsUnix = "/"; + private const string InvalidPathCharsMacOS = "/:"; + + [Theory] + [InlineData(InvalidPathCharsWin32)] + [InlineData(InvalidPathCharsUnix)] + [InlineData(InvalidPathCharsMacOS)] + public async Task CreateCheckpointAsync_EscapesInvalidCharsAsync(string invalidChars) + { + // Arrange + using TempDirectory tempDirectory = new(); + using FileSystemJsonCheckpointStore store = new(tempDirectory); + + string runId = $"prefix_{invalidChars}_suffix"; + + Func createCheckpointAction = async () => await store.CreateCheckpointAsync(runId, TestData); + await createCheckpointAction.Should().NotThrowAsync(); } }