Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- Keep default prompts in static C# classes; do not rely on prompt files under `prompts/` for built-in templates.
- Register language models through Microsoft.Extensions.AI keyed services; avoid bespoke `LanguageModelConfig` providers.
- Always run `dotnet format GraphRag.slnx` before finishing work.
- Always run `dotnet test GraphRag.slnx` before finishing work, after building.

# Conversations
any resulting updates to agents.md should go under the section "## Rules to follow"
Expand Down
4 changes: 2 additions & 2 deletions Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
<RepositoryUrl>https://github.com/managedcode/graphrag</RepositoryUrl>
<PackageProjectUrl>https://github.com/managedcode/graphrag</PackageProjectUrl>
<Product>Managed Code GraphRag</Product>
<Version>0.0.3</Version>
<PackageVersion>0.0.3</PackageVersion>
<Version>0.0.4</Version>
<PackageVersion>0.0.4</PackageVersion>

</PropertyGroup>
<PropertyGroup Condition="'$(GITHUB_ACTIONS)' == 'true'">
Expand Down
249 changes: 151 additions & 98 deletions README.md

Large diffs are not rendered by default.

133 changes: 88 additions & 45 deletions src/ManagedCode.GraphRag/Community/CommunityBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,53 +23,12 @@ public static IReadOnlyList<CommunityRecord> Build(
return Array.Empty<CommunityRecord>();
}

var adjacency = BuildAdjacency(entities, relationships);
var titleLookup = entities.ToDictionary(entity => entity.Title, StringComparer.OrdinalIgnoreCase);
var random = new Random(config.Seed);

var orderedTitles = titleLookup.Keys
.OrderBy(_ => random.Next())
.ToList();

var visited = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var components = new List<List<string>>();

foreach (var title in orderedTitles)
var components = config.Algorithm switch
{
if (!visited.Add(title))
{
continue;
}

var component = new List<string>();
var queue = new Queue<string>();
queue.Enqueue(title);

while (queue.Count > 0)
{
var current = queue.Dequeue();
component.Add(current);

if (!adjacency.TryGetValue(current, out var neighbors) || neighbors.Count == 0)
{
continue;
}

var orderedNeighbors = neighbors
.OrderBy(_ => random.Next())
.ToList();

foreach (var neighbor in orderedNeighbors)
{
if (visited.Add(neighbor))
{
queue.Enqueue(neighbor);
}
}
}

components.Add(component);
}
CommunityDetectionAlgorithm.FastLabelPropagation => BuildUsingLabelPropagation(entities, relationships, config),
_ => BuildUsingConnectedComponents(entities, relationships, config)
};

if (config.UseLargestConnectedComponent && components.Count > 0)
{
Expand Down Expand Up @@ -183,6 +142,90 @@ public static IReadOnlyList<CommunityRecord> Build(
return communityRecords;
}

private static List<List<string>> BuildUsingConnectedComponents(
IReadOnlyList<EntityRecord> entities,
IReadOnlyList<RelationshipRecord> relationships,
ClusterGraphConfig config)
{
var adjacency = BuildAdjacency(entities, relationships);
var random = new Random(config.Seed);
var orderedTitles = adjacency.Keys
.OrderBy(_ => random.Next())
.ToList();

var visited = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var components = new List<List<string>>();

foreach (var title in orderedTitles)
{
if (!visited.Add(title))
{
continue;
}

var component = new List<string>();
var queue = new Queue<string>();
queue.Enqueue(title);

while (queue.Count > 0)
{
var current = queue.Dequeue();
component.Add(current);

if (!adjacency.TryGetValue(current, out var neighbors) || neighbors.Count == 0)
{
continue;
}

var orderedNeighbors = neighbors
.OrderBy(_ => random.Next())
.ToList();

foreach (var neighbor in orderedNeighbors.Where(visited.Add))
{
queue.Enqueue(neighbor);
}
}

components.Add(component);
}

return components;
}

private static List<List<string>> BuildUsingLabelPropagation(
IReadOnlyList<EntityRecord> entities,
IReadOnlyList<RelationshipRecord> relationships,
ClusterGraphConfig config)
{
var assignments = FastLabelPropagationCommunityDetector.AssignLabels(entities, relationships, config);
if (assignments.Count == 0)
{
return new List<List<string>>();
}

var groups = new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase);

foreach (var pair in assignments)
{
if (!groups.TryGetValue(pair.Value, out var members))
{
members = new List<string>();
groups[pair.Value] = members;
}

members.Add(pair.Key);
}

return groups.Values
.Select(list => list
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(title => title, StringComparer.OrdinalIgnoreCase)
.ToList())
.Where(list => list.Count > 0)
.ToList();
}

private static Dictionary<string, HashSet<string>> BuildAdjacency(
IReadOnlyList<EntityRecord> entities,
IReadOnlyList<RelationshipRecord> relationships)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
using GraphRag.Config;
using GraphRag.Entities;
using GraphRag.Relationships;

namespace GraphRag.Community;

internal static class FastLabelPropagationCommunityDetector
{
public static IReadOnlyDictionary<string, string> AssignLabels(
IReadOnlyList<EntityRecord> entities,
IReadOnlyList<RelationshipRecord> relationships,
ClusterGraphConfig config)
{
ArgumentNullException.ThrowIfNull(entities);
ArgumentNullException.ThrowIfNull(relationships);
ArgumentNullException.ThrowIfNull(config);

var adjacency = BuildAdjacency(entities, relationships);
if (adjacency.Count == 0)
{
return new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
}

var random = new Random(config.Seed);
var labels = adjacency.Keys.ToDictionary(node => node, node => node, StringComparer.OrdinalIgnoreCase);
var nodes = adjacency.Keys.ToList();
var maxIterations = Math.Max(1, config.MaxIterations);

for (var iteration = 0; iteration < maxIterations; iteration++)
{
var shuffled = nodes.OrderBy(_ => random.Next()).ToList();
var changed = false;

foreach (var node in shuffled)
{
var neighbors = adjacency[node];
if (neighbors.Count == 0)
{
continue;
}

var labelWeights = new Dictionary<string, double>(StringComparer.OrdinalIgnoreCase);
foreach (var (neighbor, weight) in neighbors)
{
if (!labels.TryGetValue(neighbor, out var neighborLabel))
{
continue;
}

labelWeights[neighborLabel] = labelWeights.GetValueOrDefault(neighborLabel) + (weight > 0 ? weight : 1);
}

if (labelWeights.Count == 0)
{
continue;
}

var maxWeight = labelWeights.Values.Max();
var candidates = labelWeights
.Where(pair => Math.Abs(pair.Value - maxWeight) < 1e-6)
.Select(pair => pair.Key)
.ToList();

var chosen = candidates.Count == 1
? candidates[0]
: candidates[random.Next(candidates.Count)];

if (!string.Equals(labels[node], chosen, StringComparison.OrdinalIgnoreCase))
{
labels[node] = chosen;
changed = true;
}
}

if (!changed)
{
break;
}
}

return labels;
}

private static Dictionary<string, List<(string Neighbor, double Weight)>> BuildAdjacency(
IReadOnlyList<EntityRecord> entities,
IReadOnlyList<RelationshipRecord> relationships)
{
var adjacency = entities
.ToDictionary(entity => entity.Title, _ => new List<(string, double)>(), StringComparer.OrdinalIgnoreCase);

foreach (var relationship in relationships)
{
if (!adjacency.TryGetValue(relationship.Source, out var sourceNeighbors))
{
sourceNeighbors = new List<(string, double)>();
adjacency[relationship.Source] = sourceNeighbors;
}

if (!adjacency.TryGetValue(relationship.Target, out var targetNeighbors))
{
targetNeighbors = new List<(string, double)>();
adjacency[relationship.Target] = targetNeighbors;
}

sourceNeighbors.Add((relationship.Target, relationship.Weight));
targetNeighbors.Add((relationship.Source, relationship.Weight));
}

return adjacency;
}
}
13 changes: 13 additions & 0 deletions src/ManagedCode.GraphRag/Config/ClusterGraphConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,17 @@ public sealed class ClusterGraphConfig
/// results deterministic across runs.
/// </summary>
public int Seed { get; set; } = unchecked((int)0xDEADBEEF);

/// <summary>
/// Gets or sets the maximum number of label propagation iterations when the
/// <see cref="CommunityDetectionAlgorithm.FastLabelPropagation"/> algorithm is used.
/// </summary>
public int MaxIterations { get; set; } = 20;

/// <summary>
/// Gets or sets the community detection algorithm. The fast label propagation
/// implementation mirrors the in-process routine provided by GraphRag.Net.
/// </summary>
public CommunityDetectionAlgorithm Algorithm { get; set; }
= CommunityDetectionAlgorithm.FastLabelPropagation;
}
6 changes: 6 additions & 0 deletions src/ManagedCode.GraphRag/Config/Enums.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,9 @@ public enum ModularityMetric
Lcc,
WeightedComponents
}

public enum CommunityDetectionAlgorithm
{
FastLabelPropagation,
ConnectedComponents
}
2 changes: 2 additions & 0 deletions src/ManagedCode.GraphRag/Config/GraphRagConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ public sealed class GraphRagConfig

public ClusterGraphConfig ClusterGraph { get; set; } = new();

public HeuristicMaintenanceConfig Heuristics { get; set; } = new();

public CommunityReportsConfig CommunityReports { get; set; } = new();

public PromptTuningConfig PromptTuning { get; set; } = new();
Expand Down
Loading
Loading