Skip to content

Commit

Permalink
Add FFI function: tantivy_indexed_doc_counts
Browse files Browse the repository at this point in the history
  • Loading branch information
MochiXu committed Feb 26, 2024
1 parent ee308da commit 70d8b8a
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 3 deletions.
10 changes: 10 additions & 0 deletions include/tantivy_search_cxx.h
Original file line number Diff line number Diff line change
Expand Up @@ -862,3 +862,13 @@ ::rust::Vec<::RowIdWithScore> tantivy_bm25_search(::std::string const &index_pat
// Returns:
// - row_ids u8 bitmap.
::rust::Vec<::std::uint8_t> tantivy_search_bitmap_results(::std::string const &index_path, ::std::string const &query, bool use_regex);

// Get the number of documents stored in the index file.
// In general, we can consider the number of stored documents as 'n',
// and the range of row_id is [0, n-1].
// Arguments:
// - `index_path`: The directory path for building the index.
//
// Returns:
// - The count of documents stored in the index file.
::std::uint64_t tantivy_indexed_doc_counts(::std::string const &index_path);
12 changes: 12 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,18 @@ pub mod ffi {
use_regex: bool,
) -> Result<Vec<u8>>;

/// Get the number of documents stored in the index file.
/// In general, we can consider the number of stored documents as 'n',
/// and the range of row_id is [0, n-1].
/// Arguments:
/// - `index_path`: The directory path for building the index.
///
/// Returns:
/// - The count of documents stored in the index file.
fn tantivy_indexed_doc_counts(
index_path: &CxxString,
) -> Result<u64>;

}
}

Expand Down
27 changes: 27 additions & 0 deletions src/search/ffi_index_searcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -430,3 +430,30 @@ pub fn tantivy_search_bitmap_results(
let u8_bitmap: Vec<u8> = ConvertUtils::row_ids_to_u8_bitmap(&row_ids_number);
Ok(u8_bitmap)
}


/// Get the number of documents stored in the index file.
/// In general, we can consider the number of stored documents as 'n',
/// and the range of row_id is [0, n-1].
/// Arguments:
/// - `index_path`: The directory path for building the index.
///
/// Returns:
/// - The count of documents stored in the index file.
pub fn tantivy_indexed_doc_counts(
index_path: &CxxString,
) -> Result<u64, String> {
// Parse parameter.
let index_path_str = convert_cxx_string("tantivy_indexed_doc_counts", "index_path", index_path)?;

// get index writer from CACHE
let index_r = match FFI_INDEX_SEARCHER_CACHE.get_index_reader_bridge(index_path_str.clone()) {
Ok(content) => content,
Err(e) => {
ERROR!(function: "tantivy_indexed_doc_counts", "Index reader already been removed: {}", e);
return Ok(0);
}
};
let num_docs = index_r.reader.searcher().num_docs();
Ok(num_docs)
}
41 changes: 38 additions & 3 deletions src/search/ffi_index_searcher_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@ mod tests {
use tempfile::TempDir;

use crate::{
tantivy_bm25_search, tantivy_bm25_search_with_filter, tantivy_count_in_rowid_range,
tantivy_load_index, tantivy_reader_free, tantivy_search_bitmap_results,
tantivy_search_in_rowid_range, update_logger_for_test,
tantivy_bm25_search, tantivy_bm25_search_with_filter, tantivy_count_in_rowid_range, tantivy_indexed_doc_counts, tantivy_load_index, tantivy_reader_free, tantivy_search_bitmap_results, tantivy_search_in_rowid_range, update_logger_for_test
};

fn commit_some_docs_for_test(index_directory: String, need_store_doc: bool) {
Expand Down Expand Up @@ -557,4 +555,41 @@ mod tests {
let result = tantivy_search_bitmap_results(empty_path_cxx, query_cxx, false);
assert!(result.is_err());
}

#[test]
fn test_tantivy_indexed_doc_counts() {
let temp_path = TempDir::new().unwrap();
let temp_path_str = temp_path.path().to_str().unwrap();
let_cxx_string!(temp_path_cxx = temp_path_str);
let index_directory = temp_path_cxx.as_ref().get_ref();

// Index some docs and load index reader.
commit_some_docs_for_test(temp_path_str.to_string(), false);
assert!(tantivy_load_index(index_directory).is_ok());

assert_eq!(tantivy_indexed_doc_counts(index_directory).unwrap(), 5);
}

#[test]
fn test_tantivy_indexed_doc_counts_boundary_1() {
let temp_path = TempDir::new().unwrap();
let temp_path_str = temp_path.path().to_str().unwrap();
let_cxx_string!(temp_path_cxx = temp_path_str);
let index_directory = temp_path_cxx.as_ref().get_ref();

// Index some docs and doesn't load index reader.
commit_some_docs_for_test(temp_path_str.to_string(), false);

assert_eq!(tantivy_indexed_doc_counts(index_directory).unwrap(), 0);
}

#[test]
fn test_tantivy_indexed_doc_counts_boundary_2() {
let temp_path = TempDir::new().unwrap();
let temp_path_str = temp_path.path().to_str().unwrap();
let_cxx_string!(temp_path_cxx = temp_path_str);
let index_directory = temp_path_cxx.as_ref().get_ref();

assert_eq!(tantivy_indexed_doc_counts(index_directory).unwrap(), 0);
}
}

0 comments on commit 70d8b8a

Please sign in to comment.