-
Notifications
You must be signed in to change notification settings - Fork 4
/
_55_AstraVectorTest.java
78 lines (65 loc) · 3 KB
/
_55_AstraVectorTest.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
package devoxx.demo._5_vectorsearch;
import com.datastax.astra.client.model.Document;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.astradb.AstraDbEmbeddingStore;
import devoxx.demo.devoxx.Quote;
import devoxx.demo.utils.AbstractDevoxxTestSupport;
import lombok.extern.slf4j.Slf4j;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import static dev.langchain4j.store.embedding.filter.MetadataFilterBuilder.metadataKey;
import static devoxx.demo.devoxx.Utilities.loadQuotes;
@Slf4j
class _55_AstraVectorTest extends AbstractDevoxxTestSupport {
@Test
void shouldIngestDocuments() throws IOException {
getCollectionQuote().deleteAll();
EmbeddingModel embeddingModel = getEmbeddingModelGecko();
loadQuotes("philo_quotes.json") // extraction
.stream()
.map(quote -> mapAsDocument(embeddingModel, quote))// no chunking (single sentences)
.forEach(doc -> {getCollectionQuote().insertOne(doc);
try {
Thread.sleep(100);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
});
//createCollectionQuote().insertMany(
// loadQuotes("philo_quotes.json") // extraction
// .stream()
// .map(quote -> mapAsDocument(embeddingModel, quote))// no chunking (single sentences)
// .toList()
//);
}
@Test
void langchain4jEmbeddingStore() {
// I have to create a EmbeddingModel
EmbeddingModel embeddingModel = getEmbeddingModelGecko();
// Embed the question
Embedding questionEmbedding = embeddingModel.embed("We struggle all our life for nothing").content();
// We need the store
EmbeddingStore<TextSegment> embeddingStore = new AstraDbEmbeddingStore(getCollectionQuote());
// Query with a filter(2)
log.info("Querying with filter");
embeddingStore.search(EmbeddingSearchRequest.builder()
.queryEmbedding(questionEmbedding)
.filter(metadataKey("authors").isEqualTo("aristotle"))
.maxResults(3).minScore(0.1d).build())
.matches()
.stream().map(r -> r.embedded().text())
.forEach(System.out::println);
}
Document mapAsDocument(EmbeddingModel embeddingModel , Quote quote) {
log.info("Mapping quote: {}", quote.rowId());
return new Document(quote.rowId())
.append("content", quote.body())
.append("authors", quote.author())
.append("tags", quote.tags())
.vector(embeddingModel.embed(quote.body()).content().vector());
}
}