Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
using System.IO;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization.Metadata;
using System.Threading;
using System.Threading.Tasks;

namespace Microsoft.Agents.AI.Workflows.Checkpointing;

internal record CheckpointFileIndexEntry(CheckpointInfo CheckpointInfo, string FileName);

/// <summary>
/// Provides a file system-based implementation of a JSON checkpoint store that persists checkpoint data and index
/// information to disk using JSON files.
Expand All @@ -28,6 +31,8 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos
internal DirectoryInfo Directory { get; }
internal HashSet<CheckpointInfo> CheckpointIndex { get; }

private static JsonTypeInfo<CheckpointFileIndexEntry> EntryTypeInfo => WorkflowsJsonUtilities.JsonContext.Default.CheckpointFileIndexEntry;

/// <summary>
/// Initializes a new instance of the <see cref="FileSystemJsonCheckpointStore"/> class that uses the specified directory
/// </summary>
Expand Down Expand Up @@ -64,9 +69,11 @@ public FileSystemJsonCheckpointStore(DirectoryInfo directory)
using StreamReader reader = new(this._indexFile, encoding: Encoding.UTF8, detectEncodingFromByteOrderMarks: false, BufferSize, leaveOpen: true);
while (reader.ReadLine() is string line)
{
if (JsonSerializer.Deserialize(line, KeyTypeInfo) is { } info)
if (JsonSerializer.Deserialize(line, EntryTypeInfo) is { } entry)
{
this.CheckpointIndex.Add(info);
// We never actually use the file names from the index entries since they can be derived from the CheckpointInfo, but it is useful to
// have the UrlEncoded file names in the index file for human readability
this.CheckpointIndex.Add(entry.CheckpointInfo);
}
}
}
Expand All @@ -93,8 +100,14 @@ private void CheckDisposed()
}
}

private string GetFileNameForCheckpoint(string sessionId, CheckpointInfo key)
=> Path.Combine(this.Directory.FullName, $"{sessionId}_{key.CheckpointId}.json");
internal string GetFileNameForCheckpoint(string sessionId, CheckpointInfo key)
{
string protoPath = $"{sessionId}_{key.CheckpointId}.json";

// Escape the protoPath to ensure it is a valid file name, especially if sessionId or CheckpointId contain path separators, etc.
return Uri.EscapeDataString(protoPath) // This takes care of most of the invalid path characters
.Replace(".", "%2E"); // This takes care of escaping the root folder, since EscapeDataString does not escape dots
}

private CheckpointInfo GetUnusedCheckpointInfo(string sessionId)
{
Expand All @@ -116,13 +129,16 @@ public override async ValueTask<CheckpointInfo> CreateCheckpointAsync(string ses

CheckpointInfo key = this.GetUnusedCheckpointInfo(sessionId);
string fileName = this.GetFileNameForCheckpoint(sessionId, key);
string filePath = Path.Combine(this.Directory.FullName, fileName);

try
{
using Stream checkpointStream = File.Open(fileName, FileMode.Create, FileAccess.Write, FileShare.None);
using Stream checkpointStream = File.Open(filePath, FileMode.Create, FileAccess.Write, FileShare.None);
using Utf8JsonWriter jsonWriter = new(checkpointStream, new JsonWriterOptions() { Indented = false });
value.WriteTo(jsonWriter);

JsonSerializer.Serialize(this._indexFile!, key, KeyTypeInfo);
CheckpointFileIndexEntry entry = new(key, fileName);
JsonSerializer.Serialize(this._indexFile!, entry, EntryTypeInfo);
byte[] bytes = Encoding.UTF8.GetBytes(Environment.NewLine);
await this._indexFile!.WriteAsync(bytes, 0, bytes.Length, CancellationToken.None).ConfigureAwait(false);
await this._indexFile!.FlushAsync(CancellationToken.None).ConfigureAwait(false);
Expand All @@ -136,7 +152,7 @@ public override async ValueTask<CheckpointInfo> CreateCheckpointAsync(string ses
try
{
// try to clean up after ourselves
File.Delete(fileName);
File.Delete(filePath);
}
catch { }

Expand All @@ -149,14 +165,15 @@ public override async ValueTask<JsonElement> RetrieveCheckpointAsync(string sess
{
this.CheckDisposed();
string fileName = this.GetFileNameForCheckpoint(sessionId, key);
string filePath = Path.Combine(this.Directory.FullName, fileName);

if (!this.CheckpointIndex.Contains(key) ||
!File.Exists(fileName))
{
throw new KeyNotFoundException($"Checkpoint '{key.CheckpointId}' not found in store at '{this.Directory.FullName}'.");
}

using FileStream checkpointFileStream = File.Open(fileName, FileMode.Open, FileAccess.Read, FileShare.Read);
using FileStream checkpointFileStream = File.Open(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
using JsonDocument document = await JsonDocument.ParseAsync(checkpointFileStream).ConfigureAwait(false);

return document.RootElement.Clone();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ private static JsonSerializerOptions CreateDefaultOptions()
[JsonSerializable(typeof(PortableValue))]
[JsonSerializable(typeof(PortableMessageEnvelope))]
[JsonSerializable(typeof(InMemoryCheckpointManager))]
[JsonSerializable(typeof(CheckpointFileIndexEntry))]

// Runtime State Types
[JsonSerializable(typeof(ScopeKey))]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,49 +9,173 @@

namespace Microsoft.Agents.AI.Workflows.UnitTests;

public sealed class FileSystemJsonCheckpointStoreTests
internal sealed class TempDirectory : IDisposable
{
[Fact]
public async Task CreateCheckpointAsync_ShouldPersistIndexToDiskBeforeDisposeAsync()
public DirectoryInfo DirectoryInfo { get; }

public TempDirectory()
{
// Arrange
DirectoryInfo tempDir = new(Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()));
FileSystemJsonCheckpointStore? store = null;
string tempDirPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString());
this.DirectoryInfo = Directory.CreateDirectory(tempDirPath);
}

public void Dispose()
{
this.DisposeInternal();
GC.SuppressFinalize(this);
}

private void DisposeInternal()
{
if (this.DirectoryInfo.Exists)
{
try
{
// Best efforts
this.DirectoryInfo.Delete(recursive: true);
}
catch { }
}
}

~TempDirectory()
{
// Best efforts
this.DisposeInternal();
}

public static implicit operator DirectoryInfo(TempDirectory tempDirectory) => tempDirectory.DirectoryInfo;

public string FullName => this.DirectoryInfo.FullName;

try
public bool IsParentOf(FileInfo candidate)
{
if (candidate.Directory is null)
{
store = new(tempDir);
string runId = Guid.NewGuid().ToString("N");
JsonElement testData = JsonSerializer.SerializeToElement(new { test = "data" });

// Act
CheckpointInfo checkpoint = await store.CreateCheckpointAsync(runId, testData);

// Assert - Check the file size before disposing to verify data was flushed to disk
// The index.jsonl file is held exclusively by the store, so we check via FileInfo
string indexPath = Path.Combine(tempDir.FullName, "index.jsonl");
FileInfo indexFile = new(indexPath);
indexFile.Refresh();
long fileSizeBeforeDispose = indexFile.Length;

// Data should already be on disk (file size > 0) before we dispose
fileSizeBeforeDispose.Should().BeGreaterThan(0, "index.jsonl should be flushed to disk after CreateCheckpointAsync");

// Dispose to release file lock before final verification
store.Dispose();
store = null;

string[] lines = File.ReadAllLines(indexPath);
lines.Should().HaveCount(1);
lines[0].Should().Contain(checkpoint.CheckpointId);
return false;
}
finally

if (candidate.Directory.FullName == this.DirectoryInfo.FullName)
{
return true;
}

return this.IsParentOf(candidate.Directory);
}

public bool IsParentOf(DirectoryInfo candidate)
{
while (candidate.Parent is not null)
{
store?.Dispose();
if (tempDir.Exists)
if (candidate.Parent.FullName == this.DirectoryInfo.FullName)
{
tempDir.Delete(recursive: true);
return true;
}

candidate = candidate.Parent;
}

return false;
}
}
public sealed class FileSystemJsonCheckpointStoreTests
{
public static JsonElement TestData => JsonSerializer.SerializeToElement(new { test = "data" });

[Fact]
public async Task CreateCheckpointAsync_ShouldPersistIndexToDiskBeforeDisposeAsync()
{
// Arrange
using TempDirectory tempDirectory = new();
using FileSystemJsonCheckpointStore? store = new(tempDirectory);

string runId = Guid.NewGuid().ToString("N");

// Act
CheckpointInfo checkpoint = await store.CreateCheckpointAsync(runId, TestData);

// Assert - Check the file size before disposing to verify data was flushed to disk
// The index.jsonl file is held exclusively by the store, so we check via FileInfo
string indexPath = Path.Combine(tempDirectory.FullName, "index.jsonl");
FileInfo indexFile = new(indexPath);
indexFile.Refresh();
long fileSizeBeforeDispose = indexFile.Length;

// Data should already be on disk (file size > 0) before we dispose
fileSizeBeforeDispose.Should().BeGreaterThan(0, "index.jsonl should be flushed to disk after CreateCheckpointAsync");

// Dispose to release file lock before final verification
store.Dispose();

string[] lines = File.ReadAllLines(indexPath);
lines.Should().HaveCount(1);
lines[0].Should().Contain(checkpoint.CheckpointId);
}

private async ValueTask Run_EscapeRootFolderTestAsync(string escapingPath)
{
// Arrange
using TempDirectory tempDirectory = new();
using FileSystemJsonCheckpointStore store = new(tempDirectory);

string naivePath = Path.Combine(tempDirectory.DirectoryInfo.FullName, escapingPath);

// Check that the naive path is actually outside the temp directory to validate the test is meaningful
FileInfo naiveCheckpointFile = new(naivePath);
tempDirectory.IsParentOf(naiveCheckpointFile).Should().BeFalse("The naive path should be outside the root folder to validate that escaping is necessary.");

// Act
CheckpointInfo checkpointInfo = await store.CreateCheckpointAsync(escapingPath, TestData);

// Assert
string naivePathWithCheckpointId = Path.Combine(tempDirectory.DirectoryInfo.FullName, $"{escapingPath}_{checkpointInfo.CheckpointId}.json");
new FileInfo(naivePathWithCheckpointId).Exists.Should().BeFalse("The naive path should not be used to save a checkpoint file.");

string actualFileName = store.GetFileNameForCheckpoint(escapingPath, checkpointInfo);
string actualFilePath = Path.Combine(tempDirectory.DirectoryInfo.FullName, actualFileName);
FileInfo actualFile = new(actualFilePath);

tempDirectory.IsParentOf(actualFile).Should().BeTrue("The actual checkpoint should be saved inside the root folder.");
actualFile.Exists.Should().BeTrue("The actual path should be used to save a checkpoint file.");
}

[Fact]
public async Task CreateCheckpointAsync_ShouldNotEscapeRootFolderAsync()
{
// The SessionId is used as part of the file name, but if it contains path characters such as /.. it can escape the root folder.
// Testing that such characters are escaped properly to prevent directory traversal attacks, etc.

await this.Run_EscapeRootFolderTestAsync("../valid_suffix");

#if !NETFRAMEWORK
if (OperatingSystem.IsWindows())
{
// Windows allows both \ and / as path separators, so we test both
await this.Run_EscapeRootFolderTestAsync("..\\valid_suffix");
}
#else
// .NET Framework is always on Windows
await this.Run_EscapeRootFolderTestAsync("..\\valid_suffix");
#endif
}

private const string InvalidPathCharsWin32 = "\\/:*?\"<>|";
private const string InvalidPathCharsUnix = "/";
private const string InvalidPathCharsMacOS = "/:";

[Theory]
[InlineData(InvalidPathCharsWin32)]
[InlineData(InvalidPathCharsUnix)]
[InlineData(InvalidPathCharsMacOS)]
public async Task CreateCheckpointAsync_EscapesInvalidCharsAsync(string invalidChars)
{
// Arrange
using TempDirectory tempDirectory = new();
using FileSystemJsonCheckpointStore store = new(tempDirectory);

string runId = $"prefix_{invalidChars}_suffix";

Func<Task> createCheckpointAction = async () => await store.CreateCheckpointAsync(runId, TestData);
await createCheckpointAction.Should().NotThrowAsync();
}
}
Loading