Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Added Elasticsearch database. #352

Merged
merged 4 commits into from
Aug 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions LangChain.sln
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,10 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LangChain.Providers.Generat
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LangChain.Providers.Anthropic", "src\Providers\Anthropic\src\LangChain.Providers.Anthropic.csproj", "{BD9AF0B6-15C5-4365-9B78-6EF0C9CC3E2F}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Elasticsearch", "Elasticsearch", "{3FCA798D-2CBE-41E7-B5F7-65B4AF6EBC8A}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LangChain.Databases.Elasticsearch", "src\Databases\Elasticsearch\src\LangChain.Databases.Elasticsearch.csproj", "{6C053245-EB8B-4C98-85AC-B00BA222ABBB}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -717,6 +721,10 @@ Global
{BD9AF0B6-15C5-4365-9B78-6EF0C9CC3E2F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{BD9AF0B6-15C5-4365-9B78-6EF0C9CC3E2F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{BD9AF0B6-15C5-4365-9B78-6EF0C9CC3E2F}.Release|Any CPU.Build.0 = Release|Any CPU
{6C053245-EB8B-4C98-85AC-B00BA222ABBB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{6C053245-EB8B-4C98-85AC-B00BA222ABBB}.Debug|Any CPU.Build.0 = Debug|Any CPU
{6C053245-EB8B-4C98-85AC-B00BA222ABBB}.Release|Any CPU.ActiveCfg = Release|Any CPU
{6C053245-EB8B-4C98-85AC-B00BA222ABBB}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -859,6 +867,8 @@ Global
{FCD9A4CB-C70A-499F-8179-A50836B809D6} = {FC28B45D-2604-4F4C-BC3E-F2301EDB3469}
{F78D9189-24B8-4403-89D2-7FBBB5D83556} = {FC28B45D-2604-4F4C-BC3E-F2301EDB3469}
{BD9AF0B6-15C5-4365-9B78-6EF0C9CC3E2F} = {7A2A589D-F8EF-4744-9BEE-B06A5F109851}
{3FCA798D-2CBE-41E7-B5F7-65B4AF6EBC8A} = {A098FF69-D8B5-4B2B-83D5-F777D3817F15}
{6C053245-EB8B-4C98-85AC-B00BA222ABBB} = {3FCA798D-2CBE-41E7-B5F7-65B4AF6EBC8A}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {5C00D0F1-6138-4ED9-846B-97E43D6DFF1C}
Expand Down
106 changes: 106 additions & 0 deletions src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
using Elastic.Clients.Elasticsearch;

namespace LangChain.Databases.Elasticsearch;

/// <summary>
/// Elasticsearch vector collection.
/// </summary>
public class ElasticsearchVectorCollection(
ElasticsearchClient client,
string name = VectorCollection.DefaultName,
string? id = null)
: VectorCollection(name, id), IVectorCollection
{
/// <inheritdoc />
public async Task<Vector?> GetAsync(string id, CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
// var record = await client.GetAsync(Name, new Id(id), cancellationToken: cancellationToken).ConfigureAwait(false);
// if (record == null)
// {
// return null;
// }
//
// return new Vector
// {
// Text = string.Empty,
// Metadata = new Dictionary<string, object>(),
// };
}

/// <inheritdoc />
public async Task<bool> DeleteAsync(
IEnumerable<string> ids,
CancellationToken cancellationToken = default)
{
ids = ids ?? throw new ArgumentNullException(nameof(ids));

throw new NotImplementedException();
// foreach (var id in ids)
// {
// await client.DeleteAsync(Name, new Id(id), cancellationToken).ConfigureAwait(false);
// }
//
// return true;
}

/// <inheritdoc />
public async Task<IReadOnlyCollection<string>> AddAsync(
IReadOnlyCollection<Vector> items,
CancellationToken cancellationToken = default)
{
items = items ?? throw new ArgumentNullException(nameof(items));

throw new NotImplementedException();
//return Task.FromResult<IReadOnlyCollection<string>>([]);
}

/// <inheritdoc />
public async Task<VectorSearchResponse> SearchAsync(
VectorSearchRequest request,
VectorSearchSettings? settings = default,
CancellationToken cancellationToken = default)
{
request = request ?? throw new ArgumentNullException(nameof(request));
settings ??= new VectorSearchSettings();

throw new NotImplementedException();
// var response = await client.SearchAsync<MyDoc>(s => s
// .Index("my_index")
// .From(0)
// .Size(10)
// .Query(q => q
// .Knn()
// .Term(t => t.User, "flobernd")
// )
// );
//
// if (response.IsValidResponse)
// {
// var doc = response.Documents.FirstOrDefault();
// }
//
// return Task.FromResult(new VectorSearchResponse
// {
// Items = Array.Empty<string>()
// .Select(record =>
// {
// return new Vector
// {
// Id = string.Empty,
// Text = string.Empty,
// Metadata = new Dictionary<string, object>(),
// Embedding = [],
// Distance = 0.0F,
// };
// })
// .ToArray(),
// });
}

/// <inheritdoc />
public Task<bool> IsEmptyAsync(CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
}
}
78 changes: 78 additions & 0 deletions src/Databases/Elasticsearch/src/ElasticsearchVectorDatabase.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
using Elastic.Clients.Elasticsearch;

namespace LangChain.Databases.Elasticsearch;

/// <summary>
/// Elasticsearch vector store.
/// </summary>
public class ElasticsearchVectorDatabase(
ElasticsearchClient client)
: IVectorDatabase
{
/// <inheritdoc />
public async Task<IVectorCollection> GetCollectionAsync(string collectionName, CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
// try
// {
// // var collection = await client.GetCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false) ?? throw new InvalidOperationException("Collection not found");
// //
// // return new ElasticsearchVectorCollection(
// // client,
// // name: collection.Name,
// // id: collection.Id);
// }
// catch (Exception exception)
// {
// throw new InvalidOperationException("Collection not found", innerException: exception);
// }
}

/// <inheritdoc />
public async Task<bool> IsCollectionExistsAsync(string collectionName, CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
// await foreach (var name in client.ListCollectionsAsync(cancellationToken).ConfigureAwait(false))
// {
// if (name == collectionName)
// {
// return true;
// }
// }
//
// return false;
}

/// <inheritdoc />
public async Task CreateCollectionAsync(string collectionName, int dimensions, CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
//await client.CreateCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);
}

/// <inheritdoc />
public async Task<IReadOnlyList<string>> ListCollectionsAsync(CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
//return await client.ListCollectionsAsync(cancellationToken).ToListAsync(cancellationToken: cancellationToken).ConfigureAwait(false);
}

/// <inheritdoc />
public async Task<IVectorCollection> GetOrCreateCollectionAsync(string collectionName, int dimensions, CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
// if (!await IsCollectionExistsAsync(collectionName, cancellationToken).ConfigureAwait(false))
// {
// await client.CreateCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);
// }
//
// return await GetCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);
}

/// <inheritdoc />
public async Task DeleteCollectionAsync(string collectionName, CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
//await client.DeleteCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks>net4.6.2;netstandard2.0;net6.0;net8.0</TargetFrameworks>
<NoWarn>$(NoWarn);CS9113;CS1998</NoWarn>
</PropertyGroup>

<PropertyGroup Label="NuGet">
<Description>Elasticsearch support for LangChain.</Description>
<PackageTags>$(PackageTags);elasticsearch</PackageTags>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Elastic.Clients.Elasticsearch" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\Abstractions\src\LangChain.Databases.Abstractions.csproj" />
</ItemGroup>

</Project>
15 changes: 15 additions & 0 deletions src/Databases/IntegrationTests/DatabaseTests.Configure.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
using DotNet.Testcontainers.Builders;
using Elastic.Clients.Elasticsearch;
using LangChain.Databases.Chroma;
using LangChain.Databases.Elasticsearch;
using LangChain.Databases.InMemory;
using LangChain.Databases.OpenSearch;
using LangChain.Databases.Postgres;
using LangChain.Databases.Sqlite;
using LangChain.Databases.Mongo;
using Testcontainers.Elasticsearch;
using LangChain.Databases.DuckDb;
using Testcontainers.MongoDb;
using Testcontainers.PostgreSql;
Expand Down Expand Up @@ -129,7 +132,19 @@ private static async Task<DatabaseTestEnvironment> StartEnvironmentForAsync(Supp
{
VectorDatabase = new DuckDbVectorDatabase(store)
};
case SupportedDatabase.Elasticsearch:
{
var container = new ElasticsearchBuilder().Build();

await container.StartAsync(cancellationToken);

var client = new ElasticsearchClient(new Uri($"http://localhost:{container.GetMappedPublicPort(9200)}"));
return new DatabaseTestEnvironment
{
VectorDatabase = new ElasticsearchVectorDatabase(client),
Container = container,
};
}
default:
throw new ArgumentOutOfRangeException(nameof(database), database, null);
}
Expand Down
7 changes: 7 additions & 0 deletions src/Databases/IntegrationTests/DatabaseTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ public partial class DatabaseTests
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
//[TestCase(SupportedDatabase.Elasticsearch)]
[TestCase(SupportedDatabase.DuckDb)]
public async Task CreateAndDeleteCollection_Ok(SupportedDatabase database)
{
Expand Down Expand Up @@ -60,6 +61,7 @@ await vectorDatabase.Invoking(y => y.GetCollectionAsync(environment.CollectionNa
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.DuckDb)]
//[TestCase(SupportedDatabase.Elasticsearch)]
public async Task AddDocuments_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -106,6 +108,7 @@ public async Task AddDocuments_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.DuckDb)]
//[TestCase(SupportedDatabase.Elasticsearch)]
public async Task AddTexts_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -156,6 +159,7 @@ public async Task AddTexts_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.DuckDb)]
//[TestCase(SupportedDatabase.Elasticsearch)]
public async Task DeleteDocuments_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -194,6 +198,7 @@ public async Task DeleteDocuments_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.DuckDb)]
//[TestCase(SupportedDatabase.Elasticsearch)]
public async Task SimilaritySearch_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -226,6 +231,7 @@ public async Task SimilaritySearch_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.DuckDb)]
//[TestCase(SupportedDatabase.Elasticsearch)]
public async Task SimilaritySearchByVector_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -253,6 +259,7 @@ public async Task SimilaritySearchByVector_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.OpenSearch)]
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
//[TestCase(SupportedDatabase.Elasticsearch)]
[TestCase(SupportedDatabase.DuckDb)]
public async Task SimilaritySearchWithScores_Ok(SupportedDatabase database)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
<ProjectReference Include="..\AzureSearch\src\LangChain.Databases.AzureSearch.csproj" />
<ProjectReference Include="..\Chroma\src\LangChain.Databases.Chroma.csproj" />
<ProjectReference Include="..\DuckDb\src\LangChain.Databases.DuckDb.csproj" />
<ProjectReference Include="..\Elasticsearch\src\LangChain.Databases.Elasticsearch.csproj" />
<ProjectReference Include="..\InMemory\src\LangChain.Databases.InMemory.csproj" />
<ProjectReference Include="..\Kendra\src\LangChain.Databases.Kendra.csproj" />
<ProjectReference Include="..\Mongo\src\LangChain.Databases.Mongo.csproj" />
Expand All @@ -26,6 +27,7 @@

<ItemGroup>
<PackageReference Include="Testcontainers" />
<PackageReference Include="Testcontainers.Elasticsearch" />
<PackageReference Include="Testcontainers.PostgreSql" />
<PackageReference Include="Testcontainers.MongoDb" />
<PackageReference Include="Testcontainers.Redis" />
Expand Down
1 change: 1 addition & 0 deletions src/Databases/IntegrationTests/SupportedDatabase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ public enum SupportedDatabase
Postgres,
Redis,
Mongo,
Elasticsearch,
DuckDb
}
Loading
Loading