Mix.install(
[
{:scholar, "~> 0.1.0"},
{:text_mining, path: Path.join([__DIR__, "text_mining"])}
],
config: [nx: [default_backend: EXLA.Backend]]
)
alias TextMining.{
EmbeddingComparator,
EmbeddingClusterer,
TextComparator,
DocumentCreator,
TextClusterer
}
text_comparator = EmbeddingComparator.new()
documents =
["a cat walks into a bar", "another text"]
|> Enum.map(fn text -> text_comparator |> DocumentCreator.make_document(text) end)
comparison_results =
text_comparator
|> TextComparator.compare_documents(
documents,
documents,
2
)
clusterer = EmbeddingClusterer.new(text_comparator.text_embedder)
document_clusters =
clusterer
|> EmbeddingClusterer.fit_clustering(documents, 2)
|> EmbeddingClusterer.get_clustered_documents(documents)
text_comparator
|> TextClusterer.get_closest_cluster(Enum.at(documents, 1), centroids)