Skip to content

Latest commit

 

History

History
53 lines (44 loc) · 984 Bytes

bumblebee_text_mining.livemd

File metadata and controls

53 lines (44 loc) · 984 Bytes

Bumblebee

Mix.install(
  [
    {:scholar, "~> 0.1.0"},
    {:text_mining, path: Path.join([__DIR__, "text_mining"])}
  ],
  config: [nx: [default_backend: EXLA.Backend]]
)

Section

alias TextMining.{
  EmbeddingComparator,
  EmbeddingClusterer,
  TextComparator,
  DocumentCreator,
  TextClusterer
}

text_comparator = EmbeddingComparator.new()

documents =
  ["a cat walks into a bar", "another text"]
  |> Enum.map(fn text -> text_comparator |> DocumentCreator.make_document(text) end)
comparison_results =
  text_comparator
  |> TextComparator.compare_documents(
    documents,
    documents,
    2
  )
clusterer = EmbeddingClusterer.new(text_comparator.text_embedder)

document_clusters =
  clusterer
  |> EmbeddingClusterer.fit_clustering(documents, 2)
  |> EmbeddingClusterer.get_clustered_documents(documents)
text_comparator
|> TextClusterer.get_closest_cluster(Enum.at(documents, 1), centroids)