Skip to content

Commit

Permalink
Merge pull request #33165 from vespa-engine/vekterli/add-field-specif…
Browse files Browse the repository at this point in the history
…ic-rank-filter-threshold-setting

Add field-specific rank `filter-threshold` setting
  • Loading branch information
vekterli authored Jan 24, 2025
2 parents da1e0a0 + 37c2cd5 commit f402ed5
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 4 deletions.
11 changes: 11 additions & 0 deletions config-model/src/main/java/com/yahoo/schema/RankProfile.java
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@ public class RankProfile implements Cloneable {

private Set<String> filterFields = new HashSet<>();

// Field-level `rank my_field { filter-threshold: ... }` that overrides the profile-level `filter-threshold` (if any)
private Map<String, Double> explicitFieldRankFilterThresholds = new LinkedHashMap<>();

private final RankProfileRegistry rankProfileRegistry;

private final TypeSettings attributeTypes = new TypeSettings();
Expand Down Expand Up @@ -1012,6 +1015,14 @@ public Set<String> allFilterFields() {
return combined;
}

public void setExplicitFieldRankFilterThresholds(Map<String, Double> fieldFilterThresholds) {
explicitFieldRankFilterThresholds = new LinkedHashMap<>(fieldFilterThresholds);
}

public Map<String, Double> explicitFieldRankFilterThresholds() {
return explicitFieldRankFilterThresholds;
}

private ExpressionFunction parseRankingExpression(String name, List<String> arguments, String expression) throws ParseException {
if (expression.trim().isEmpty())
throw new ParseException("Empty expression");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ private static class Deriver {
private final Map<String, String> attributeTypes;
private final Map<Reference, RankProfile.Input> inputs;
private final Set<String> filterFields = new java.util.LinkedHashSet<>();
private Map<String, Double> explicitFieldRankFilterThresholds = new LinkedHashMap<>();
private final String rankprofileName;

private RankingExpression firstPhaseRanking;
Expand Down Expand Up @@ -271,6 +272,7 @@ private void deriveFeatureDeclarations(Collection<ReferenceNode> features,

private void deriveFilterFields(RankProfile rp) {
filterFields.addAll(rp.allFilterFields());
explicitFieldRankFilterThresholds.putAll(rp.explicitFieldRankFilterThresholds());
}

private void derivePropertiesAndFeaturesFromFunctions(Map<String, RankProfile.RankingExpressionFunction> functions,
Expand Down Expand Up @@ -498,6 +500,9 @@ else if (RankingExpression.propertyName(RankProfile.GLOBAL_PHASE).equals(propert
if (filterThreshold.isPresent()) {
properties.add(new Pair<>("vespa.matching.filter_threshold", String.valueOf(filterThreshold.getAsDouble())));
}
for (var fieldAndThreshold : explicitFieldRankFilterThresholds.entrySet()) {
properties.add(new Pair<>("vespa.matching.filter_threshold.%s".formatted(fieldAndThreshold.getKey()), String.valueOf(fieldAndThreshold.getValue())));
}
if (matchPhaseSettings != null) {
properties.add(new Pair<>("vespa.matchphase.degradation.attribute", matchPhaseSettings.getAttribute()));
properties.add(new Pair<>("vespa.matchphase.degradation.ascendingorder", matchPhaseSettings.getAscending() + ""));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ public class ParsedRankProfile extends ParsedBlock {
private final List<MutateOperation> mutateOperations = new ArrayList<>();
private final List<String> inherited = new ArrayList<>();
private final Map<String, Boolean> fieldsRankFilter = new LinkedHashMap<>();
private final Map<String, Double> fieldsRankFilterThreshold = new LinkedHashMap<>();
private final Map<String, Integer> fieldsRankWeight = new LinkedHashMap<>();
private final Map<String, ParsedRankFunction> functions = new LinkedHashMap<>();
private final Map<String, String> fieldsRankType = new LinkedHashMap<>();
Expand Down Expand Up @@ -94,6 +95,7 @@ public ParsedRankProfile(String name) {
Optional<String> getGlobalPhaseExpression() { return Optional.ofNullable(this.globalPhaseExpression); }

Map<String, Boolean> getFieldsWithRankFilter() { return Collections.unmodifiableMap(fieldsRankFilter); }
Map<String, Double> getFieldsWithRankFilterThreshold() { return Collections.unmodifiableMap(fieldsRankFilterThreshold); }
Map<String, Integer> getFieldsWithRankWeight() { return Collections.unmodifiableMap(fieldsRankWeight); }
Map<String, String> getFieldsWithRankType() { return Collections.unmodifiableMap(fieldsRankType); }
Map<String, List<String>> getRankProperties() { return Collections.unmodifiableMap(rankProperties); }
Expand Down Expand Up @@ -140,6 +142,12 @@ public void addFieldRankFilter(String field, boolean filter) {
fieldsRankFilter.put(field, filter);
}

public void addFieldRankFilterThreshold(String field, double filterThreshold) {
verifyThat(!fieldsRankFilterThreshold.containsKey(field), "already has rank filter-threshold for field", field);
verifyThat(filterThreshold >= 0.0 && filterThreshold <= 1.0, "must be a value in range [0, 1]", field);
fieldsRankFilterThreshold.put(field, filterThreshold);
}

public void addFieldRankType(String field, String type) {
verifyThat(! fieldsRankType.containsKey(field), "already has rank type for field", field);
fieldsRankType.put(field, type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ private void populateFrom(ParsedRankProfile parsed, RankProfile profile) {
parsed.getFieldsWithRankFilter().forEach
((fieldName, isFilter) -> profile.addRankSetting(fieldName, RankProfile.RankSetting.Type.PREFERBITVECTOR, isFilter));

profile.setExplicitFieldRankFilterThresholds(parsed.getFieldsWithRankFilterThreshold());

parsed.getFieldsWithRankWeight().forEach
((fieldName, weight) -> profile.addRankSetting(fieldName, RankProfile.RankSetting.Type.WEIGHT, weight));

Expand Down
9 changes: 7 additions & 2 deletions config-model/src/main/javacc/SchemaParser.jj
Original file line number Diff line number Diff line change
Expand Up @@ -2428,10 +2428,15 @@ void fieldRankType(ParsedRankProfile profile) :
void fieldRankFilter(ParsedRankProfile profile) :
{
String name;
double filterThreshold;
}
{
<RANK> name = identifier() <COLON> <FILTER>
{ profile.addFieldRankFilter(name, true); }
<RANK> name = identifier()
( ( <COLON> <FILTER> ) { profile.addFieldRankFilter(name, true); }
| ( lbrace() <FILTER_THRESHOLD> <COLON> filterThreshold = floatValue()
{ profile.addFieldRankFilterThreshold(name, filterThreshold); }
( <NL> )* <RBRACE> )
)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,49 @@ private void verifyFilterThreshold(Double threshold) throws ParseException {
threshold, "vespa.matching.filter_threshold");
}

private static OptionalDouble optionalDoubleOfNullable(Double maybeDouble) {
// No ofNullable in OptionalDouble, probably due to auto boxing magics
return maybeDouble != null ? OptionalDouble.of(maybeDouble) : OptionalDouble.empty();
}

@Test
void field_specific_filter_threshold_is_configurable() throws ParseException {
var rps = """
search test {
document test {
field f1 type string {
indexing: index
}
field f2 type string {
indexing: index
}
field f3 type string {
indexing: index
}
}
rank-profile my_profile {
rank f1 {
filter-threshold: 0.08
}
rank f2 {
filter-threshold: 0.11
}
}
}
""";
var rp = createRankProfile(rps);

verifyRankProfileSetting(rp.getFirst(), rp.getSecond(),
(myRp) -> optionalDoubleOfNullable(myRp.explicitFieldRankFilterThresholds().get("f1")),
0.08, "vespa.matching.filter_threshold.f1");
verifyRankProfileSetting(rp.getFirst(), rp.getSecond(),
(myRp) -> optionalDoubleOfNullable(myRp.explicitFieldRankFilterThresholds().get("f2")),
0.11, "vespa.matching.filter_threshold.f2");
verifyRankProfileSetting(rp.getFirst(), rp.getSecond(),
(myRp) -> optionalDoubleOfNullable(myRp.explicitFieldRankFilterThresholds().get("f3")),
null, "vespa.matching.filter_threshold.f3");
}

private void verifyRankProfileSetting(RankProfile rankProfile, RawRankProfile rawRankProfile, Function<RankProfile, OptionalDouble> func,
Double expValue, String expPropertyName) {
if (expValue != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,31 @@ void filter_threshold_can_be_parsed() throws Exception {
assertEquals(0.05, target.get());
}

@Test
void field_rank_specific_filter_threshold_can_be_parsed() throws Exception {
String input = """
schema foo {
rank-profile rp {
rank bar {
filter-threshold: 0.05
}
rank zoid {
filter-threshold: 0.07
}
rank baz: filter
}
}""";
var schema = parseString(input);
var rp = schema.getRankProfiles().get(0);
var thresholds = rp.getFieldsWithRankFilterThreshold();
assertEquals(2, thresholds.size());
assertEquals(0.05, thresholds.getOrDefault("bar", 0.0), 0.000001);
assertEquals(0.07, thresholds.getOrDefault("zoid", 0.0), 0.000001);
// Old-school binary rank filter still supported as expected
assertEquals(1, rp.getFieldsWithRankFilter().size());
assertTrue(rp.getFieldsWithRankFilter().get("baz"));
}

@Test
void maxOccurrencesCanBeParsed() throws Exception {
String input = joinLines
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2755,10 +2755,15 @@ void fieldRankType(ParsedRankProfile profile) :
void fieldRankFilter(ParsedRankProfile profile) :
{
String name;
double filterThreshold;
}

<RANK> name = identifierStr() <COLON> <FILTER>
{ profile.addFieldRankFilter(name, true); }
<RANK> name = identifierStr()
( ( <COLON> <FILTER> ) { profile.addFieldRankFilter(name, true); }
| ( openLbrace() <FILTER_THRESHOLD> <COLON> filterThreshold = floatValue()
{ profile.addFieldRankFilterThreshold(name, filterThreshold); }
( <NL> )* <RBRACE> )
)
;

/**
Expand Down

0 comments on commit f402ed5

Please sign in to comment.