Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions python/python/lance/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2337,7 +2337,6 @@ def create_scalar_index(
Literal["BITMAP"],
Literal["LABEL_LIST"],
Literal["INVERTED"],
Literal["FTS"],
Literal["NGRAM"],
Literal["ZONEMAP"],
Literal["BLOOMFILTER"],
Expand Down Expand Up @@ -2406,7 +2405,7 @@ def create_scalar_index(
called zones and stores summary statistics for each zone (min, max,
null_count, nan_count, fragment_id, local_row_offset). It's very small but
only effective if the column is at least approximately in sorted order.
* ``FTS/INVERTED``. It is used to index document columns. This index
* ``INVERTED``. It is used to index document columns. This index
can conduct full-text searches. For example, a column that contains any word
of query string "hello world". The results will be ranked by BM25.
* ``BLOOMFILTER``. This inexact index uses a bloom filter. It is small
Expand All @@ -2426,8 +2425,8 @@ def create_scalar_index(
or string column.
index_type : str
The type of the index. One of ``"BTREE"``, ``"BITMAP"``,
``"LABEL_LIST"``, ``"NGRAM"``, ``"ZONEMAP"``, ``"FTS"``,
``"INVERTED"`` or ``"BLOOMFILTER"``.
``"LABEL_LIST"``, ``"NGRAM"``, ``"ZONEMAP"``, ``"INVERTED"``, or
``"BLOOMFILTER"``.
name : str, optional
The index name. If not provided, it will be generated from the
column name.
Expand Down Expand Up @@ -2456,8 +2455,8 @@ def create_scalar_index(
It won't impact the performance of non-phrase queries even if it is set to
True.
base_tokenizer: str, default "simple"
This is for the ``INVERTED`` index. The base tokenizer to use. The value
can be:
This is for the ``INVERTED`` index. The base tokenizer to use. The
value can be:
* "simple": splits tokens on whitespace and punctuation.
* "whitespace": splits tokens on whitespace.
* "raw": no tokenization.
Expand Down Expand Up @@ -2548,7 +2547,7 @@ def create_scalar_index(
raise NotImplementedError(
(
'Only "BTREE", "BITMAP", "NGRAM", "ZONEMAP", "LABEL_LIST", '
'or "INVERTED" or "BLOOMFILTER" are supported for '
'"INVERTED", or "BLOOMFILTER" are supported for '
f"scalar columns. Received {index_type}",
)
)
Expand Down Expand Up @@ -2581,7 +2580,7 @@ def create_scalar_index(
field_type
):
raise TypeError(f"NGRAM index column {column} must be a string")
elif index_type in ["INVERTED", "FTS"]:
elif index_type in ["INVERTED"]:
value_type = field_type
if pa.types.is_list(field_type) or pa.types.is_large_list(field_type):
value_type = field_type.value_type
Expand Down
Loading