Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Added Elasticsearch database. #352

Merged
merged 4 commits into from
Aug 3, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions LangChain.sln
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Groq", "Groq", "{5DEC2707-D
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LangChain.Providers.Groq.Tests", "src\Providers\Groq\test\LangChain.Providers.Groq.Tests.csproj", "{CC7F58F4-C824-4BED-8C4A-760C9AB8FC6E}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Elasticsearch", "Elasticsearch", "{46A24203-9C28-4135-ACC6-484C4BE4A6F2}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LangChain.Databases.Elasticsearch", "src\Databases\Elasticsearch\src\LangChain.Databases.Elasticsearch.csproj", "{12007C28-2F0A-4D51-8DF6-5ED41F1ACF44}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -734,6 +738,10 @@ Global
{CC7F58F4-C824-4BED-8C4A-760C9AB8FC6E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{CC7F58F4-C824-4BED-8C4A-760C9AB8FC6E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{CC7F58F4-C824-4BED-8C4A-760C9AB8FC6E}.Release|Any CPU.Build.0 = Release|Any CPU
{12007C28-2F0A-4D51-8DF6-5ED41F1ACF44}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{12007C28-2F0A-4D51-8DF6-5ED41F1ACF44}.Debug|Any CPU.Build.0 = Debug|Any CPU
{12007C28-2F0A-4D51-8DF6-5ED41F1ACF44}.Release|Any CPU.ActiveCfg = Release|Any CPU
{12007C28-2F0A-4D51-8DF6-5ED41F1ACF44}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -878,6 +886,8 @@ Global
{FD0A56AD-AFB4-4A21-99C1-9BE5D074EB56} = {5DEC2707-DD62-4DCD-AC5D-6670AC2A1B01}
{5DEC2707-DD62-4DCD-AC5D-6670AC2A1B01} = {E2B9833C-0397-4FAF-A3A8-116E58749750}
{CC7F58F4-C824-4BED-8C4A-760C9AB8FC6E} = {5DEC2707-DD62-4DCD-AC5D-6670AC2A1B01}
{46A24203-9C28-4135-ACC6-484C4BE4A6F2} = {A098FF69-D8B5-4B2B-83D5-F777D3817F15}
{12007C28-2F0A-4D51-8DF6-5ED41F1ACF44} = {46A24203-9C28-4135-ACC6-484C4BE4A6F2}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {5C00D0F1-6138-4ED9-846B-97E43D6DFF1C}
Expand Down
102 changes: 102 additions & 0 deletions src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
using Elastic.Clients.Elasticsearch;

namespace LangChain.Databases.Elasticsearch;

/// <summary>
/// Elasticsearch vector collection.
/// </summary>
public class ElasticsearchVectorCollection(
ElasticsearchClient client,
string name = VectorCollection.DefaultName,
string? id = null)
: VectorCollection(name, id), IVectorCollection
{
/// <inheritdoc />
public async Task<Vector?> GetAsync(string id, CancellationToken cancellationToken = default)
{
var record = await client.GetAsync(Name, new Id(id), cancellationToken: cancellationToken).ConfigureAwait(false);
if (record == null)
{
return null;
}

return new Vector
{
Text = string.Empty,
Metadata = new Dictionary<string, object>(),
};
}

/// <inheritdoc />
public async Task<bool> DeleteAsync(
IEnumerable<string> ids,
CancellationToken cancellationToken = default)
{
ids = ids ?? throw new ArgumentNullException(nameof(ids));

foreach (var id in ids)
{
await client.DeleteAsync(Name, new Id(id), cancellationToken).ConfigureAwait(false);
}

return true;
}

/// <inheritdoc />
public Task<IReadOnlyCollection<string>> AddAsync(
IReadOnlyCollection<Vector> items,
CancellationToken cancellationToken = default)
{
items = items ?? throw new ArgumentNullException(nameof(items));

return Task.FromResult<IReadOnlyCollection<string>>([]);
}

/// <inheritdoc />
public Task<VectorSearchResponse> SearchAsync(
VectorSearchRequest request,
VectorSearchSettings? settings = default,
CancellationToken cancellationToken = default)
{
request = request ?? throw new ArgumentNullException(nameof(request));
settings ??= new VectorSearchSettings();

var response = await client.SearchAsync<MyDoc>(s => s

Check failure on line 64 in src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

The type or namespace name 'MyDoc' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 64 in src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

The 'await' operator can only be used within an async method. Consider marking this method with the 'async' modifier and changing its return type to 'Task<Task<VectorSearchResponse>>'.

Check warning on line 64 in src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

Consider calling ConfigureAwait on the awaited task (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/quality-rules/ca2007)

Check warning on line 64 in src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

Consider calling ConfigureAwait on the awaited task (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/quality-rules/ca2007)

Check warning on line 64 in src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

Consider calling ConfigureAwait on the awaited task (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/quality-rules/ca2007)
HavenDV marked this conversation as resolved.
Show resolved Hide resolved
.Index("my_index")
.From(0)
.Size(10)
.Query(q => q
.Knn()

Check failure on line 69 in src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

No overload for method 'Knn' takes 0 arguments
HavenDV marked this conversation as resolved.
Show resolved Hide resolved
.Term(t => t.User, "flobernd")

Check failure on line 70 in src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

'TermQueryDescriptor<MyDoc>' does not contain a definition for 'User' and no accessible extension method 'User' accepting a first argument of type 'TermQueryDescriptor<MyDoc>' could be found (are you missing a using directive or an assembly reference?)

Check failure on line 70 in src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

Only assignment, call, increment, decrement, await, and new object expressions can be used as a statement
HavenDV marked this conversation as resolved.
Show resolved Hide resolved
)
);

if (response.IsValidResponse)
{
var doc = response.Documents.FirstOrDefault();
}

return Task.FromResult(new VectorSearchResponse
{
Items = Array.Empty<string>()
.Select(record =>
{
return new Vector
{
Id = string.Empty,
Text = string.Empty,
Metadata = new Dictionary<string, object>(),
Embedding = [],
Distance = 0.0F,
};
})
.ToArray(),
});
}

/// <inheritdoc />
public Task<bool> IsEmptyAsync(CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
}
}
72 changes: 72 additions & 0 deletions src/Databases/Elasticsearch/src/ElasticsearchVectorDatabase.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
using Elastic.Clients.Elasticsearch;

namespace LangChain.Databases.Elasticsearch;

/// <summary>
/// Elasticsearch vector store.
/// </summary>
public class ElasticsearchVectorDatabase(
ElasticsearchClient client)
: IVectorDatabase
{
/// <inheritdoc />
public async Task<IVectorCollection> GetCollectionAsync(string collectionName, CancellationToken cancellationToken = default)
{
try
{
var collection = await client.GetCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false) ?? throw new InvalidOperationException("Collection not found");

Check failure on line 17 in src/Databases/Elasticsearch/src/ElasticsearchVectorDatabase.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

'ElasticsearchClient' does not contain a definition for 'GetCollectionAsync' and no accessible extension method 'GetCollectionAsync' accepting a first argument of type 'ElasticsearchClient' could be found (are you missing a using directive or an assembly reference?)
HavenDV marked this conversation as resolved.
Show resolved Hide resolved

return new ElasticsearchVectorCollection(
client,
name: collection.Name,
id: collection.Id);
}
catch (Exception exception)
{
throw new InvalidOperationException("Collection not found", innerException: exception);
}
}

/// <inheritdoc />
public async Task<bool> IsCollectionExistsAsync(string collectionName, CancellationToken cancellationToken = default)
{
await foreach (var name in client.ListCollectionsAsync(cancellationToken).ConfigureAwait(false))

Check failure on line 33 in src/Databases/Elasticsearch/src/ElasticsearchVectorDatabase.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

'ElasticsearchClient' does not contain a definition for 'ListCollectionsAsync' and no accessible extension method 'ListCollectionsAsync' accepting a first argument of type 'ElasticsearchClient' could be found (are you missing a using directive or an assembly reference?)
HavenDV marked this conversation as resolved.
Show resolved Hide resolved
{
if (name == collectionName)
{
return true;
}
}

return false;
}

/// <inheritdoc />
public async Task CreateCollectionAsync(string collectionName, int dimensions, CancellationToken cancellationToken = default)
{
await client.CreateCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);

Check failure on line 47 in src/Databases/Elasticsearch/src/ElasticsearchVectorDatabase.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

'ElasticsearchClient' does not contain a definition for 'CreateCollectionAsync' and no accessible extension method 'CreateCollectionAsync' accepting a first argument of type 'ElasticsearchClient' could be found (are you missing a using directive or an assembly reference?)
HavenDV marked this conversation as resolved.
Show resolved Hide resolved
}

/// <inheritdoc />
public async Task<IReadOnlyList<string>> ListCollectionsAsync(CancellationToken cancellationToken = default)
{
return await client.ListCollectionsAsync(cancellationToken).ToListAsync(cancellationToken: cancellationToken).ConfigureAwait(false);

Check failure on line 53 in src/Databases/Elasticsearch/src/ElasticsearchVectorDatabase.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

'ElasticsearchClient' does not contain a definition for 'ListCollectionsAsync' and no accessible extension method 'ListCollectionsAsync' accepting a first argument of type 'ElasticsearchClient' could be found (are you missing a using directive or an assembly reference?)
}

/// <inheritdoc />
public async Task<IVectorCollection> GetOrCreateCollectionAsync(string collectionName, int dimensions, CancellationToken cancellationToken = default)
{
if (!await IsCollectionExistsAsync(collectionName, cancellationToken).ConfigureAwait(false))
{
await client.CreateCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);

Check failure on line 61 in src/Databases/Elasticsearch/src/ElasticsearchVectorDatabase.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

'ElasticsearchClient' does not contain a definition for 'CreateCollectionAsync' and no accessible extension method 'CreateCollectionAsync' accepting a first argument of type 'ElasticsearchClient' could be found (are you missing a using directive or an assembly reference?)
}

return await GetCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);
}

/// <inheritdoc />
public async Task DeleteCollectionAsync(string collectionName, CancellationToken cancellationToken = default)
{
await client.DeleteCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks>net4.6.2;netstandard2.0;net6.0;net8.0</TargetFrameworks>
<NoWarn>$(NoWarn)</NoWarn>
</PropertyGroup>

<PropertyGroup Label="NuGet">
<Description>Elasticsearch support for LangChain.</Description>
<PackageTags>$(PackageTags);elasticsearch</PackageTags>
</PropertyGroup>

<ItemGroup Label="Usings">
<Using Include="System.Net.Http" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="Elastic.Clients.Elasticsearch" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\Abstractions\src\LangChain.Databases.Abstractions.csproj" />
</ItemGroup>

</Project>
17 changes: 17 additions & 0 deletions src/Databases/IntegrationTests/DatabaseTests.Configure.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
using DotNet.Testcontainers.Builders;
using Elastic.Clients.Elasticsearch;
using LangChain.Databases.Chroma;
using LangChain.Databases.Elasticsearch;
using LangChain.Databases.InMemory;
using LangChain.Databases.OpenSearch;
using LangChain.Databases.Postgres;
using LangChain.Databases.Sqlite;
using LangChain.Databases.Mongo;
using Testcontainers.Elasticsearch;
using Testcontainers.MongoDb;
using Testcontainers.PostgreSql;

Expand Down Expand Up @@ -121,6 +124,20 @@ private static async Task<DatabaseTestEnvironment> StartEnvironmentForAsync(Supp
Container = container,
};
}

case SupportedDatabase.Elasticsearch:
{
var container = new ElasticsearchBuilder().Build();

await container.StartAsync(cancellationToken);

var client = new ElasticsearchClient(new Uri($"http://localhost:{container.GetMappedPublicPort(9200)}"));
return new DatabaseTestEnvironment
{
VectorDatabase = new ElasticsearchVectorDatabase(client),
Container = container,
};
}
default:
throw new ArgumentOutOfRangeException(nameof(database), database, null);
}
Expand Down
7 changes: 7 additions & 0 deletions src/Databases/IntegrationTests/DatabaseTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ public partial class DatabaseTests
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.Elasticsearch)]
public async Task CreateAndDeleteCollection_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -58,6 +59,7 @@ await vectorDatabase.Invoking(y => y.GetCollectionAsync(environment.CollectionNa
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.Elasticsearch)]
public async Task AddDocuments_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -103,6 +105,7 @@ public async Task AddDocuments_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.Elasticsearch)]
public async Task AddTexts_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -152,6 +155,7 @@ public async Task AddTexts_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.Elasticsearch)]
public async Task DeleteDocuments_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -189,6 +193,7 @@ public async Task DeleteDocuments_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.OpenSearch)]
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Elasticsearch)]
public async Task SimilaritySearch_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -220,6 +225,7 @@ public async Task SimilaritySearch_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.OpenSearch)]
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Elasticsearch)]
public async Task SimilaritySearchByVector_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -247,6 +253,7 @@ public async Task SimilaritySearchByVector_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.OpenSearch)]
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Elasticsearch)]
public async Task SimilaritySearchWithScores_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
<ProjectReference Include="..\AzureSearch\src\LangChain.Databases.AzureSearch.csproj" />
<ProjectReference Include="..\Chroma\src\LangChain.Databases.Chroma.csproj" />
<ProjectReference Include="..\DuckDb\src\LangChain.Databases.DuckDb.csproj" />
<ProjectReference Include="..\Elasticsearch\src\LangChain.Databases.Elasticsearch.csproj" />
<ProjectReference Include="..\InMemory\src\LangChain.Databases.InMemory.csproj" />
<ProjectReference Include="..\Kendra\src\LangChain.Databases.Kendra.csproj" />
<ProjectReference Include="..\Mongo\src\LangChain.Databases.Mongo.csproj" />
Expand All @@ -26,6 +27,7 @@

<ItemGroup>
<PackageReference Include="Testcontainers" />
<PackageReference Include="Testcontainers.Elasticsearch" />
<PackageReference Include="Testcontainers.PostgreSql" />
<PackageReference Include="Testcontainers.MongoDb" />
<PackageReference Include="Testcontainers.Redis" />
Expand Down
1 change: 1 addition & 0 deletions src/Databases/IntegrationTests/SupportedDatabase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ public enum SupportedDatabase
Postgres,
Redis,
Mongo,
Elasticsearch,
}
2 changes: 2 additions & 0 deletions src/Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
<PackageVersion Include="Docker.DotNet" Version="3.125.15" />
<PackageVersion Include="DocumentFormat.OpenXml" Version="3.0.2" />
<PackageVersion Include="DotNet.ReproducibleBuilds" Version="1.1.1" />
<PackageVersion Include="Elastic.Clients.Elasticsearch" Version="8.14.2" />
<PackageVersion Include="FluentAssertions" Version="6.12.0" />
<PackageVersion Include="GitHubActionsTestLogger" Version="2.4.1" />
<PackageVersion Include="Google_GenerativeAI" Version="1.0.1" />
Expand Down Expand Up @@ -78,6 +79,7 @@
<PackageVersion Include="System.Text.Json" Version="8.0.3" />
<PackageVersion Include="System.ValueTuple" Version="4.5.0" />
<PackageVersion Include="Testcontainers" Version="3.9.0" />
<PackageVersion Include="Testcontainers.Elasticsearch" Version="3.9.0" />
<PackageVersion Include="Testcontainers.MongoDb" Version="3.9.0" />
<PackageVersion Include="Testcontainers.PostgreSql" Version="3.9.0" />
<PackageVersion Include="Testcontainers.Redis" Version="3.9.0" />
Expand Down
Loading