Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add field-specific rank filter-threshold setting #33165

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions config-model/src/main/java/com/yahoo/schema/RankProfile.java
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@ public class RankProfile implements Cloneable {

private Set<String> filterFields = new HashSet<>();

// Field-level `rank my_field { filter-threshold: ... }` that overrides the profile-level `filter-threshold` (if any)
private Map<String, Double> explicitFieldRankFilterThresholds = new LinkedHashMap<>();

private final RankProfileRegistry rankProfileRegistry;

private final TypeSettings attributeTypes = new TypeSettings();
Expand Down Expand Up @@ -1012,6 +1015,14 @@ public Set<String> allFilterFields() {
return combined;
}

public void setExplicitFieldRankFilterThresholds(Map<String, Double> fieldFilterThresholds) {
explicitFieldRankFilterThresholds = new LinkedHashMap<>(fieldFilterThresholds);
}

public Map<String, Double> explicitFieldRankFilterThresholds() {
return explicitFieldRankFilterThresholds;
}

private ExpressionFunction parseRankingExpression(String name, List<String> arguments, String expression) throws ParseException {
if (expression.trim().isEmpty())
throw new ParseException("Empty expression");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ private static class Deriver {
private final Map<String, String> attributeTypes;
private final Map<Reference, RankProfile.Input> inputs;
private final Set<String> filterFields = new java.util.LinkedHashSet<>();
private Map<String, Double> explicitFieldRankFilterThresholds = new LinkedHashMap<>();
private final String rankprofileName;

private RankingExpression firstPhaseRanking;
Expand Down Expand Up @@ -271,6 +272,7 @@ private void deriveFeatureDeclarations(Collection<ReferenceNode> features,

private void deriveFilterFields(RankProfile rp) {
filterFields.addAll(rp.allFilterFields());
explicitFieldRankFilterThresholds.putAll(rp.explicitFieldRankFilterThresholds());
}

private void derivePropertiesAndFeaturesFromFunctions(Map<String, RankProfile.RankingExpressionFunction> functions,
Expand Down Expand Up @@ -498,6 +500,9 @@ else if (RankingExpression.propertyName(RankProfile.GLOBAL_PHASE).equals(propert
if (filterThreshold.isPresent()) {
properties.add(new Pair<>("vespa.matching.filter_threshold", String.valueOf(filterThreshold.getAsDouble())));
}
for (var fieldAndThreshold : explicitFieldRankFilterThresholds.entrySet()) {
properties.add(new Pair<>("vespa.matching.filter_threshold.%s".formatted(fieldAndThreshold.getKey()), String.valueOf(fieldAndThreshold.getValue())));
}
if (matchPhaseSettings != null) {
properties.add(new Pair<>("vespa.matchphase.degradation.attribute", matchPhaseSettings.getAttribute()));
properties.add(new Pair<>("vespa.matchphase.degradation.ascendingorder", matchPhaseSettings.getAscending() + ""));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ public class ParsedRankProfile extends ParsedBlock {
private final List<MutateOperation> mutateOperations = new ArrayList<>();
private final List<String> inherited = new ArrayList<>();
private final Map<String, Boolean> fieldsRankFilter = new LinkedHashMap<>();
private final Map<String, Double> fieldsRankFilterThreshold = new LinkedHashMap<>();
private final Map<String, Integer> fieldsRankWeight = new LinkedHashMap<>();
private final Map<String, ParsedRankFunction> functions = new LinkedHashMap<>();
private final Map<String, String> fieldsRankType = new LinkedHashMap<>();
Expand Down Expand Up @@ -94,6 +95,7 @@ public ParsedRankProfile(String name) {
Optional<String> getGlobalPhaseExpression() { return Optional.ofNullable(this.globalPhaseExpression); }

Map<String, Boolean> getFieldsWithRankFilter() { return Collections.unmodifiableMap(fieldsRankFilter); }
Map<String, Double> getFieldsWithRankFilterThreshold() { return Collections.unmodifiableMap(fieldsRankFilterThreshold); }
Map<String, Integer> getFieldsWithRankWeight() { return Collections.unmodifiableMap(fieldsRankWeight); }
Map<String, String> getFieldsWithRankType() { return Collections.unmodifiableMap(fieldsRankType); }
Map<String, List<String>> getRankProperties() { return Collections.unmodifiableMap(rankProperties); }
Expand Down Expand Up @@ -140,6 +142,12 @@ public void addFieldRankFilter(String field, boolean filter) {
fieldsRankFilter.put(field, filter);
}

public void addFieldRankFilterThreshold(String field, double filterThreshold) {
verifyThat(!fieldsRankFilterThreshold.containsKey(field), "already has rank filter-threshold for field", field);
verifyThat(filterThreshold >= 0.0 && filterThreshold <= 1.0, "must be a value in range [0, 1]", field);
fieldsRankFilterThreshold.put(field, filterThreshold);
}

public void addFieldRankType(String field, String type) {
verifyThat(! fieldsRankType.containsKey(field), "already has rank type for field", field);
fieldsRankType.put(field, type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ private void populateFrom(ParsedRankProfile parsed, RankProfile profile) {
parsed.getFieldsWithRankFilter().forEach
((fieldName, isFilter) -> profile.addRankSetting(fieldName, RankProfile.RankSetting.Type.PREFERBITVECTOR, isFilter));

profile.setExplicitFieldRankFilterThresholds(parsed.getFieldsWithRankFilterThreshold());

parsed.getFieldsWithRankWeight().forEach
((fieldName, weight) -> profile.addRankSetting(fieldName, RankProfile.RankSetting.Type.WEIGHT, weight));

Expand Down
9 changes: 7 additions & 2 deletions config-model/src/main/javacc/SchemaParser.jj
Original file line number Diff line number Diff line change
Expand Up @@ -2428,10 +2428,15 @@ void fieldRankType(ParsedRankProfile profile) :
void fieldRankFilter(ParsedRankProfile profile) :
{
String name;
double filterThreshold;
}
{
<RANK> name = identifier() <COLON> <FILTER>
{ profile.addFieldRankFilter(name, true); }
<RANK> name = identifier()
( ( <COLON> <FILTER> ) { profile.addFieldRankFilter(name, true); }
| ( lbrace() <FILTER_THRESHOLD> <COLON> filterThreshold = floatValue()
{ profile.addFieldRankFilterThreshold(name, filterThreshold); }
( <NL> )* <RBRACE> )
)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,49 @@ private void verifyFilterThreshold(Double threshold) throws ParseException {
threshold, "vespa.matching.filter_threshold");
}

private static OptionalDouble optionalDoubleOfNullable(Double maybeDouble) {
// No ofNullable in OptionalDouble, probably due to auto boxing magics
return maybeDouble != null ? OptionalDouble.of(maybeDouble) : OptionalDouble.empty();
}

@Test
void field_specific_filter_threshold_is_configurable() throws ParseException {
var rps = """
search test {
document test {
field f1 type string {
indexing: index
}
field f2 type string {
indexing: index
}
field f3 type string {
indexing: index
}
}
rank-profile my_profile {
rank f1 {
filter-threshold: 0.08
}
rank f2 {
filter-threshold: 0.11
}
}
}
""";
var rp = createRankProfile(rps);

verifyRankProfileSetting(rp.getFirst(), rp.getSecond(),
(myRp) -> optionalDoubleOfNullable(myRp.explicitFieldRankFilterThresholds().get("f1")),
0.08, "vespa.matching.filter_threshold.f1");
verifyRankProfileSetting(rp.getFirst(), rp.getSecond(),
(myRp) -> optionalDoubleOfNullable(myRp.explicitFieldRankFilterThresholds().get("f2")),
0.11, "vespa.matching.filter_threshold.f2");
verifyRankProfileSetting(rp.getFirst(), rp.getSecond(),
(myRp) -> optionalDoubleOfNullable(myRp.explicitFieldRankFilterThresholds().get("f3")),
null, "vespa.matching.filter_threshold.f3");
}

private void verifyRankProfileSetting(RankProfile rankProfile, RawRankProfile rawRankProfile, Function<RankProfile, OptionalDouble> func,
Double expValue, String expPropertyName) {
if (expValue != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,31 @@ void filter_threshold_can_be_parsed() throws Exception {
assertEquals(0.05, target.get());
}

@Test
void field_rank_specific_filter_threshold_can_be_parsed() throws Exception {
String input = """
schema foo {
rank-profile rp {
rank bar {
filter-threshold: 0.05
}
rank zoid {
filter-threshold: 0.07
}
rank baz: filter
}
}""";
var schema = parseString(input);
var rp = schema.getRankProfiles().get(0);
var thresholds = rp.getFieldsWithRankFilterThreshold();
assertEquals(2, thresholds.size());
assertEquals(0.05, thresholds.getOrDefault("bar", 0.0), 0.000001);
assertEquals(0.07, thresholds.getOrDefault("zoid", 0.0), 0.000001);
// Old-school binary rank filter still supported as expected
assertEquals(1, rp.getFieldsWithRankFilter().size());
assertTrue(rp.getFieldsWithRankFilter().get("baz"));
}

@Test
void maxOccurrencesCanBeParsed() throws Exception {
String input = joinLines
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2755,10 +2755,15 @@ void fieldRankType(ParsedRankProfile profile) :
void fieldRankFilter(ParsedRankProfile profile) :
{
String name;
double filterThreshold;
}

<RANK> name = identifierStr() <COLON> <FILTER>
{ profile.addFieldRankFilter(name, true); }
<RANK> name = identifierStr()
( ( <COLON> <FILTER> ) { profile.addFieldRankFilter(name, true); }
| ( openLbrace() <FILTER_THRESHOLD> <COLON> filterThreshold = floatValue()
{ profile.addFieldRankFilterThreshold(name, filterThreshold); }
( <NL> )* <RBRACE> )
)
;

/**
Expand Down
Loading