fixedd test 2 bed nn

khoroshevskyi · khoroshevskyi · commit caf20ab2f5ce · 2024-04-15T09:54:41.000-04:00
diff --git a/autodoc.py b/autodoc.py
@@ -99,4 +99,4 @@
             with open(out, "w") as stream:
                 stream.write(md_result)
 else:
-    print("Skipping jupyter notebooks")
+    print("Skipping jupyter notebooks")
diff --git a/ci/scripts/count_records.py b/ci/scripts/count_records.py
@@ -7,15 +7,26 @@
 
 parser = ArgumentParser(description="Count records in a PostgreSQL table and verify")
 
-parser.add_argument("-t", "--table", help="Table to count records in", 
-	required=True, type=str)
-parser.add_argument("-e", "--expected-count", help="Expected number of records", 
-	type=int, required=False, default=None)
+parser.add_argument(
+    "-t", "--table", help="Table to count records in", required=True, type=str
+)
+parser.add_argument(
+    "-e",
+    "--expected-count",
+    help="Expected number of records",
+    type=int,
+    required=False,
+    default=None,
+)
 
 args = parser.parse_args()
 
-bbc = BedBaseConf(get_bedbase_cfg('$GITHUB_WORKSPACE/ci/cfg/config_min.yaml'))
+bbc = BedBaseConf(get_bedbase_cfg("$GITHUB_WORKSPACE/ci/cfg/config_min.yaml"))
 row_count = bbc._count_rows(table_name=args.table)
 if args.expected_count:
-	assert row_count == args.expected_count, "Number of records in the '{}' table ({}) not equal {}".format(args.table, row_count, args.expected_count)
+    assert (
+        row_count == args.expected_count
+    ), "Number of records in the '{}' table ({}) not equal {}".format(
+        args.table, row_count, args.expected_count
+    )
 sys.exit(0)
diff --git a/docs/geniml/tutorials/text2bednn-search-interface.md b/docs/geniml/tutorials/text2bednn-search-interface.md
@@ -1,15 +1,15 @@
 # How to create a natural language search backend for BED files
 The metadata of each BED file is needed to build a natural language search backend. BED files embedding vectors are created by
-`Region2Vec`, and metadata embedding vectors are created by [`FastEmbed`](https://github.com/qdrant/fastembed), [`SentenceTransformers`](https://www.sbert.net/), or other text embedding models.
+`Region2Vec` model, and metadata embedding vectors are created by [`FastEmbed`](https://github.com/qdrant/fastembed), [`SentenceTransformers`](https://www.sbert.net/), or other text embedding models.
 
 `Vec2VecFNN`, a feedforward neural network (FNN), is trained to maps vectors from the embedding space of natural language to the embedding
-space of BED files. When a natural language query string is given, it will first be encoded to a vector by the text embedding model, and that 
+space of BED files. When a natural language query string is given, it will first be encoded to a vector by the text embedding model, and then created 
 vector will be encoded to a query vector by the FNN. `search` backend can perform k-nearest neighbors (KNN) search among the stored BED
 file embedding vectors, and the BED files whose embedding vectors are closest to that query vector are the search results.
 
 ## Store embedding vectors
 It is recommended to use `geniml.search.backend.HNSWBackend` to store embedding vectors. In the `HNSWBackend` that stores each BED file embedding
-vector, the `payload` should contain the name of BED file. In the `HNSWBackend` that stores the embedding vectors of each 
+vector, the `payload` should contain the name or identifier of BED file. In the `HNSWBackend` that stores the embedding vectors of each 
 metadata string, the `payload` should contain the original string text and the names of BED files that have that string in metadata.
 
 ## Train the model
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -102,6 +102,7 @@ nav:
       - Fine-tune embeddings: geniml/tutorials/fine-tune-region2vec-model.md
       - Randomize bed files: geniml/tutorials/bedshift.md
       - Create evaluation dataset with bedshift: geniml/tutorials/bedshift-evaluation-guide.md
+      - Create search backend: geniml/tutorials/text2bednn-search-interface.md
     - Reference:
       - How to cite: citations.md
       - API documentation: geniml/autodoc_build/geniml.md