diff --git a/searchcore/src/tests/proton/matching/querynodes_test.cpp b/searchcore/src/tests/proton/matching/querynodes_test.cpp index edd23e5effed..07a37bbc5d5d 100644 --- a/searchcore/src/tests/proton/matching/querynodes_test.cpp +++ b/searchcore/src/tests/proton/matching/querynodes_test.cpp @@ -533,11 +533,11 @@ TEST(QueryNodesTest, requireThatSimpleIntermediatesGetProperBlending) TEST(QueryNodesTest, control_query_nodes_size) { - EXPECT_EQ(64u + sizeof(std::string), sizeof(ProtonTermData)); + EXPECT_EQ(72u + sizeof(std::string), sizeof(ProtonTermData)); EXPECT_EQ(32u + 2 * sizeof(std::string), sizeof(search::query::NumberTerm)); - EXPECT_EQ(96u + 3 * sizeof(std::string), sizeof(ProtonNodeTypes::NumberTerm)); + EXPECT_EQ(104u + 3 * sizeof(std::string), sizeof(ProtonNodeTypes::NumberTerm)); EXPECT_EQ(32u + 2 * sizeof(std::string), sizeof(search::query::StringTerm)); - EXPECT_EQ(96u + 3 * sizeof(std::string), sizeof(ProtonNodeTypes::StringTerm)); + EXPECT_EQ(104u + 3 * sizeof(std::string), sizeof(ProtonNodeTypes::StringTerm)); } } // namespace diff --git a/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp b/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp index 3edb9ee61120..1caae2169105 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp @@ -52,7 +52,8 @@ ProtonTermData::resolve(const ViewResolver &resolver, const IIndexEnvironment &i for (size_t i = 0; i < fields.size(); ++i) { const FieldInfo *info = idxEnv.getFieldByName(fields[i]); if (info != nullptr) { - _fields.emplace_back(fields[i], info->id(), forceFilter || info->isFilter()); + _fields.emplace_back(fields[i], info->id(), + (forceFilter ? search::fef::FilterThreshold(true) : info->get_filter_threshold())); FieldEntry & field = _fields.back(); field.attribute_field = is_attribute(info->type()); } else { diff --git a/searchcore/src/vespa/searchcore/proton/matching/querynodes.h b/searchcore/src/vespa/searchcore/proton/matching/querynodes.h index fcf12bd3b92d..979d36b394d7 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/querynodes.h +++ b/searchcore/src/vespa/searchcore/proton/matching/querynodes.h @@ -31,8 +31,11 @@ class ProtonTermData : public search::fef::ITermData bool attribute_field; FieldEntry(const std::string &name, uint32_t fieldId, bool is_filter) noexcept + : FieldEntry(name, fieldId, search::fef::FilterThreshold(is_filter)) + {} + FieldEntry(const std::string &name, uint32_t fieldId, search::fef::FilterThreshold threshold) noexcept : ITermFieldData(fieldId), - _field_spec(name, fieldId, search::fef::IllegalHandle, is_filter), + _field_spec(name, fieldId, search::fef::IllegalHandle, threshold), attribute_field(false) {} diff --git a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp index d2d42ecef34b..f3fc58c6b044 100644 --- a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp +++ b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp @@ -27,6 +27,7 @@ using search::diskindex::DiskTermBlueprint; using search::diskindex::FieldIndex; using search::diskindex::TestDiskIndex; using search::diskindex::ZcRareWordPosOccIterator; +using search::fef::FilterThreshold; using search::fef::TermFieldMatchDataArray; using search::index::DictionaryLookupResult; using search::index::DummyFileHeaderContext; @@ -408,22 +409,20 @@ DiskIndexTest::requireThatBlueprintCanCreateSearchIterators() EXPECT_EQ(result_f1_w1, SimpleResult().search(*s)); EXPECT_EQ(result_f1_w1, SimpleResult().search(*leaf_b.createFilterSearch(upper_bound))); } - { // bitvector used due to bitvector_limit set. + { // bitvector used due to filter threshold set. // The term 'w2' hits 17 docs in field 'f2' (bitvector for term exists). - double bitvector_limit = 16.0 / 100.0; - _requestContext.get_create_blueprint_params().disk_index_bitvector_limit = bitvector_limit; - b = create_blueprint(FieldSpec("f2", 0, 0, false), makeTerm("w2"), 100); + double threshold = 16.0 / 100.0; + b = create_blueprint(FieldSpec("f2", 0, 0, FilterThreshold(threshold)), makeTerm("w2"), 100); auto& leaf_b = dynamic_cast(*b); s = leaf_b.createLeafSearch(mda); EXPECT_TRUE(dynamic_cast(s.get()) != nullptr); EXPECT_EQ(result_f2_w2, SimpleResult().search(*s)); EXPECT_EQ(result_f2_w2, SimpleResult().search(*leaf_b.createFilterSearch(upper_bound))); } - { // fake bitvector (wrapping posocc iterator) used due to bitvector_limit set. + { // fake bitvector (wrapping posocc iterator) used due to filter threshold set. // The term 'w1' hits 2 docs in field 'f1' (bitvector for term doesn't exist). - double bitvector_limit = 1.0 / 100.0; - _requestContext.get_create_blueprint_params().disk_index_bitvector_limit = bitvector_limit; - b = create_blueprint(FieldSpec("f1", 0, 0, false), makeTerm("w1"), 100); + double threshold = 1.0 / 100.0; + b = create_blueprint(FieldSpec("f1", 0, 0, FilterThreshold(threshold)), makeTerm("w1"), 100); auto& leaf_b = dynamic_cast(*b); s = leaf_b.createLeafSearch(mda); EXPECT_TRUE((dynamic_cast(s.get()) != nullptr)); diff --git a/searchlib/src/tests/queryeval/queryeval_test.cpp b/searchlib/src/tests/queryeval/queryeval_test.cpp index d91507d7da66..8bd70af5d7b7 100644 --- a/searchlib/src/tests/queryeval/queryeval_test.cpp +++ b/searchlib/src/tests/queryeval/queryeval_test.cpp @@ -653,7 +653,7 @@ TEST(QueryEvalTest, test_dump) TEST(QueryEvalTest, test_field_spec) { EXPECT_EQ(8u, sizeof(FieldSpecBase)); - EXPECT_EQ(8u + sizeof(std::string), sizeof(FieldSpec)); + EXPECT_EQ(16u + sizeof(std::string), sizeof(FieldSpec)); } diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp index a1e6ea38a674..ce48716df9d6 100644 --- a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp @@ -173,9 +173,7 @@ class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper { const std::string termStr = termAsString(n); auto lookup_result = _field_index.lookup(termStr); if (lookup_result.valid()) { - double bitvector_limit = getRequestContext().get_create_blueprint_params().disk_index_bitvector_limit; - setResult(std::make_unique - (_field, _field_index, termStr, lookup_result, _field.isFilter(), bitvector_limit)); + setResult(std::make_unique(_field, _field_index, termStr, lookup_result)); } else { setResult(std::make_unique(_field)); } diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp index f24653ab4c3a..e569df49f785 100644 --- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp @@ -41,17 +41,14 @@ getName(uint32_t indexId) DiskTermBlueprint::DiskTermBlueprint(const FieldSpec & field, const FieldIndex& field_index, const std::string& query_term, - DictionaryLookupResult lookupRes, - bool is_filter_field, - double bitvector_limit) + DictionaryLookupResult lookupRes) : SimpleLeafBlueprint(field), _field(field), _field_index(field_index), _query_term(query_term), _lookupRes(std::move(lookupRes)), _bitvector_lookup_result(_field_index.lookup_bit_vector(_lookupRes)), - _is_filter_field(is_filter_field), - _bitvector_limit(bitvector_limit), + _is_filter_field(_field.isFilter()), _fetchPostingsDone(false), _postingHandle(), _bitVector(), @@ -118,8 +115,8 @@ DiskTermBlueprint::calculate_flow_stats(uint32_t docid_limit) const bool DiskTermBlueprint::use_bitvector() const { - return _is_filter_field || - ((get_docid_limit() > 0) && ((double)_lookupRes.counts._numDocs / (double)get_docid_limit()) > _bitvector_limit); + return _is_filter_field || + ((get_docid_limit() > 0) && _field.get_filter_threshold().is_filter((double)_lookupRes.counts._numDocs / (double)get_docid_limit())); } const BitVector * diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h index a35df216cd8f..ba742f57f66a 100644 --- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h @@ -19,7 +19,6 @@ class DiskTermBlueprint : public queryeval::SimpleLeafBlueprint index::DictionaryLookupResult _lookupRes; index::BitVectorDictionaryLookupResult _bitvector_lookup_result; bool _is_filter_field; - double _bitvector_limit; bool _fetchPostingsDone; index::PostingListHandle _postingHandle; std::shared_ptr _bitVector; @@ -34,20 +33,21 @@ class DiskTermBlueprint : public queryeval::SimpleLeafBlueprint /** * Create a new blueprint. * + * The filter threshold setting for the field determines whether bitvector is used for searching. + * If the field is a filter: force use of bitvector. + * Otherwise the filter threshold is compared against the hit estimate of the query term after dictionary lookup. + * If the hit estimate is above the filter threshold: force use of bitvector. + * If no bitvector exists for the term, a fake bitvector wrapping the posocc iterator is used. + * * @param field The field to search in. * @param field_index The field index used to read the bit vector or posting list. + * @param query_term The query term to search for. * @param lookupRes The result after disk dictionary lookup. - * @param is_filter_field Whether this field is filter and we should force use of bit vector. - * @param bitvector_limit The hit estimate limit for whether bitvector should be used for searching this term. - This can be used to tune performance at the cost of quality. - If no bitvector exists for the term, a fake bitvector wrapping the posocc iterator is used. **/ DiskTermBlueprint(const queryeval::FieldSpec & field, const FieldIndex& field_index, const std::string& query_term, - index::DictionaryLookupResult lookupRes, - bool is_filter_field, - double bitvector_limit); + index::DictionaryLookupResult lookupRes); queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override; diff --git a/searchlib/src/vespa/searchlib/fef/filter_threshold.h b/searchlib/src/vespa/searchlib/fef/filter_threshold.h index 76b8862171ab..da91cd8f40e7 100644 --- a/searchlib/src/vespa/searchlib/fef/filter_threshold.h +++ b/searchlib/src/vespa/searchlib/fef/filter_threshold.h @@ -16,10 +16,10 @@ class FilterThreshold { float _threshold; public: - FilterThreshold() noexcept : _threshold(1.0) { } - FilterThreshold(bool is_filter_in) noexcept : _threshold(is_filter_in ? 0.0 : 1.0) { } - FilterThreshold(float threshold) noexcept : _threshold(threshold) { } - FilterThreshold(double threshold) noexcept : _threshold(threshold) { } + explicit FilterThreshold() noexcept : _threshold(1.0) { } + explicit FilterThreshold(bool is_filter_in) noexcept : _threshold(is_filter_in ? 0.0 : 1.0) { } + explicit FilterThreshold(float threshold) noexcept : _threshold(threshold) { } + explicit FilterThreshold(double threshold) noexcept : _threshold(threshold) { } float threshold() const noexcept { return _threshold; } bool is_filter() const noexcept { return _threshold == 0.0; } diff --git a/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp b/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp index c7aab3a5f84b..00a70facdf21 100644 --- a/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp @@ -11,9 +11,18 @@ FieldSpec::FieldSpec(const std::string & name, uint32_t fieldId, fef::TermFieldH FieldSpec::FieldSpec(const std::string & name, uint32_t fieldId, fef::TermFieldHandle handle, bool isFilter_) noexcept - : FieldSpecBase(fieldId, handle, isFilter_), - _name(name) + : FieldSpec(name, fieldId, handle, fef::FilterThreshold(isFilter_)) +{} + +FieldSpec::FieldSpec(const std::string & name, uint32_t fieldId, + fef::TermFieldHandle handle, fef::FilterThreshold threshold) noexcept + : FieldSpecBase(fieldId, handle, threshold.is_filter()), + _name(name), + _threshold(threshold) { + // NOTE: Whether the field is a filter is still tracked in FieldSpecBase + // to ensure this information is available in code where only the base class is used. + // This also ensures that the size of FieldSpecBase is not changed. assert(fieldId < 0x1000000); // Can be represented by 24 bits } diff --git a/searchlib/src/vespa/searchlib/queryeval/field_spec.h b/searchlib/src/vespa/searchlib/queryeval/field_spec.h index 2a13df14276f..8bb5f8b94a7f 100644 --- a/searchlib/src/vespa/searchlib/queryeval/field_spec.h +++ b/searchlib/src/vespa/searchlib/queryeval/field_spec.h @@ -2,6 +2,7 @@ #pragma once +#include #include #include #include @@ -47,16 +48,18 @@ class FieldSpec : public FieldSpecBase { public: FieldSpec(const std::string & name, uint32_t fieldId, fef::TermFieldHandle handle) noexcept; - FieldSpec(const std::string & name, uint32_t fieldId, - fef::TermFieldHandle handle, bool isFilter_) noexcept; + FieldSpec(const std::string & name, uint32_t fieldId, fef::TermFieldHandle handle, bool isFilter_) noexcept; + FieldSpec(const std::string & name, uint32_t fieldId, fef::TermFieldHandle handle, fef::FilterThreshold threshold) noexcept; ~FieldSpec(); void setBase(FieldSpecBase base) noexcept { FieldSpecBase::operator =(base); } const std::string & getName() const noexcept { return _name; } + fef::FilterThreshold get_filter_threshold() const noexcept { return _threshold; } private: - std::string _name; // field name + std::string _name; // field name + fef::FilterThreshold _threshold; }; /**