Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Sec. indexes on relations #2670

Merged
2 changes: 1 addition & 1 deletion client/normal_nil.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func NewNormalNil(kind FieldKind) (NormalValue, error) {
return NewNormalNillableFloat(immutable.None[float64]()), nil
case FieldKind_NILLABLE_DATETIME:
return NewNormalNillableTime(immutable.None[time.Time]()), nil
case FieldKind_NILLABLE_STRING, FieldKind_NILLABLE_JSON:
case FieldKind_NILLABLE_STRING, FieldKind_NILLABLE_JSON, FieldKind_DocID:
return NewNormalNillableString(immutable.None[string]()), nil
case FieldKind_NILLABLE_BLOB:
return NewNormalNillableBytes(immutable.None[[]byte]()), nil
Expand Down
2 changes: 1 addition & 1 deletion client/normal_value_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1404,7 +1404,7 @@ func TestNormalValue_NewNormalNil(t *testing.T) {
assert.True(t, v.IsNil())
} else {
_, err := NewNormalNil(kind)
require.Error(t, err)
require.Error(t, err, "field kind: "+kind.String())
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion client/schema_field_description.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func (k ScalarKind) Underlying() string {
}

func (k ScalarKind) IsNillable() bool {
return k != FieldKind_DocID
return true
}

func (k ScalarKind) IsObject() bool {
Expand Down
3 changes: 3 additions & 0 deletions docs/data_format_changes/i2670-sec-index-on-relations.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Enable secondary index on relations

This naturally caused some explain metrics to change and change detector complain about it.
24 changes: 12 additions & 12 deletions internal/db/collection_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/sourcenetwork/immutable"

"github.com/sourcenetwork/defradb/client"
"github.com/sourcenetwork/defradb/client/request"
"github.com/sourcenetwork/defradb/datastore"
"github.com/sourcenetwork/defradb/internal/core"
"github.com/sourcenetwork/defradb/internal/db/base"
Expand Down Expand Up @@ -264,7 +265,7 @@ func (c *collection) createIndex(
return nil, err
}

err = c.checkExistingFields(desc.Fields)
err = c.checkExistingFieldsAndAdjustRelFieldNames(desc.Fields)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -493,20 +494,19 @@ func (c *collection) GetIndexes(ctx context.Context) ([]client.IndexDescription,
return c.Description().Indexes, nil
}

func (c *collection) checkExistingFields(
// checkExistingFieldsAndAdjustRelFieldNames checks if the fields in the index description
// exist in the collection schema.
// If a field is a relation, it will be adjusted to relation id field name, a.k.a. `field_name + _id`.
func (c *collection) checkExistingFieldsAndAdjustRelFieldNames(
fields []client.IndexedFieldDescription,
) error {
collectionFields := c.Schema().Fields
for _, field := range fields {
found := false
for _, colField := range collectionFields {
if field.Name == colField.Name {
found = true
break
}
}
for i := range fields {
field, found := c.Schema().GetFieldByName(fields[i].Name)
if !found {
return NewErrNonExistingFieldForIndex(field.Name)
return NewErrNonExistingFieldForIndex(fields[i].Name)
}
if field.Kind.IsObject() {
fields[i].Name = fields[i].Name + request.RelatedObjectID
}
}
return nil
Expand Down
2 changes: 1 addition & 1 deletion internal/db/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func getValidateIndexFieldFunc(kind client.FieldKind) func(any) bool {
}

switch kind {
case client.FieldKind_NILLABLE_STRING:
case client.FieldKind_NILLABLE_STRING, client.FieldKind_DocID:
return canConvertIndexFieldValue[string]
case client.FieldKind_NILLABLE_INT:
return canConvertIndexFieldValue[int64]
Expand Down
16 changes: 8 additions & 8 deletions internal/planner/explain.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,17 +92,17 @@ func buildDebugExplainGraph(source planNode) (map[string]any, error) {
var explainGraphBuilder = map[string]any{}

// If root is not the last child then keep walking and explaining the root graph.
if node.root != nil {
indexJoinRootExplainGraph, err := buildDebugExplainGraph(node.root)
if node.parentSide.plan != nil {
indexJoinRootExplainGraph, err := buildDebugExplainGraph(node.parentSide.plan)
if err != nil {
return nil, err
}
// Add the explaination of the rest of the explain graph under the "root" graph.
explainGraphBuilder[joinRootLabel] = indexJoinRootExplainGraph
}

if node.subType != nil {
indexJoinSubTypeExplainGraph, err := buildDebugExplainGraph(node.subType)
if node.childSide.plan != nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

praise: I like these!

indexJoinSubTypeExplainGraph, err := buildDebugExplainGraph(node.childSide.plan)
if err != nil {
return nil, err
}
Expand All @@ -117,8 +117,8 @@ func buildDebugExplainGraph(source planNode) (map[string]any, error) {
var explainGraphBuilder = map[string]any{}

// If root is not the last child then keep walking and explaining the root graph.
if node.root != nil {
indexJoinRootExplainGraph, err := buildDebugExplainGraph(node.root)
if node.parentSide.plan != nil {
indexJoinRootExplainGraph, err := buildDebugExplainGraph(node.parentSide.plan)
if err != nil {
return nil, err
}
Expand All @@ -128,8 +128,8 @@ func buildDebugExplainGraph(source planNode) (map[string]any, error) {
explainGraphBuilder[joinRootLabel] = nil
}

if node.subType != nil {
indexJoinSubTypeExplainGraph, err := buildDebugExplainGraph(node.subType)
if node.childSide.plan != nil {
indexJoinSubTypeExplainGraph, err := buildDebugExplainGraph(node.childSide.plan)
if err != nil {
return nil, err
}
Expand Down
35 changes: 17 additions & 18 deletions internal/planner/mapper/mapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -413,25 +413,23 @@
childMapping = childMapping.CloneWithoutRender()
mapping.SetChildAt(index, childMapping)

if !childIsMapped {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was a nasty bug that took many hours to spot and fix.
It because visible for TestOneToManyToOneWithSumOfDeepFilterSubTypeOfBothDescAndAsc test where 2 aggregates are used on the same related object:

query {
    Author {
        name
        s1: _sum(book: {field: rating, filter: {publisher: {yearOpened: {_eq: 2013}}}})
        s2: _sum(book: {field: rating, filter: {publisher: {yearOpened: {_ge: 2020}}}})
    }
}

After the first aggregate is processed with correct []Requestable fields, the second would reuse the results of the frst which doesn't include resolving of fields. So Fields field would have a nil value. Which happened to be interpreted by the fetcher as all-fields-requested.
So when dealing with secondary indexes I explicitly added to the fields array a relation id field (like book_id). That made the fetcher to fetch only the related id field and would prevent the filter from letting yearOpened: {_ge: 2020} condition to pass, because it's not fetched.

That's why filter dependencies should be resolved for every aggregate.

Let me know if you think there is a better solution.

filterDependencies, err := resolveFilterDependencies(
ctx,
store,
rootSelectType,
childCollectionName,
target.filter,
mapping.ChildMappings[index],
childFields,
)
if err != nil {
return nil, err
}
childFields = append(childFields, filterDependencies...)

// If the child was not mapped, the filter will not have been converted yet
// so we must do that now.
convertedFilter = ToFilter(target.filter.Value(), mapping.ChildMappings[index])
filterDependencies, err := resolveFilterDependencies(
ctx,
store,
rootSelectType,
childCollectionName,
target.filter,
mapping.ChildMappings[index],
childFields,
)
if err != nil {
return nil, err

Check warning on line 426 in internal/planner/mapper/mapper.go

View check run for this annotation

Codecov / codecov/patch

internal/planner/mapper/mapper.go#L426

Added line #L426 was not covered by tests
}
childFields = append(childFields, filterDependencies...)

// If the child was not mapped, the filter will not have been converted yet
// so we must do that now.
convertedFilter = ToFilter(target.filter.Value(), mapping.ChildMappings[index])

dummyJoin := &Select{
Targetable: Targetable{
Expand Down Expand Up @@ -989,6 +987,7 @@
return nil, err
}

childSelect.SkipResolve = true
newFields = append(newFields, childSelect)
}

Expand Down
5 changes: 5 additions & 0 deletions internal/planner/mapper/select.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ type Select struct {
// These can include stuff such as version information, aggregates, and other
// Selects.
Fields []Requestable

// SkipResolve is a flag that indicates that the fields in this Select don't need to be resolved,
// i.e. it's value doesn't need to be fetched and provided to the user.
// It is used to avoid resolving related objects if they are used only in a filter and not requested in a response.
SkipResolve bool
islamaliev marked this conversation as resolved.
Show resolved Hide resolved
}

func (s *Select) AsTargetable() (*Targetable, bool) {
Expand Down
12 changes: 7 additions & 5 deletions internal/planner/planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,18 +357,20 @@ func (p *Planner) tryOptimizeJoinDirection(node *invertibleTypeJoin, parentPlan
parentPlan.selectNode.filter.Conditions,
node.documentMapping,
)
slct := node.subType.(*selectTopNode).selectNode
slct := node.childSide.plan.(*selectTopNode).selectNode
desc := slct.collection.Description()
for subFieldName, subFieldInd := range filteredSubFields {
indexes := desc.GetIndexesOnField(subFieldName)
if len(indexes) > 0 && !filter.IsComplex(parentPlan.selectNode.filter) {
subInd := node.documentMapping.FirstIndexOfName(node.subTypeName)
relatedField := mapper.Field{Name: node.subTypeName, Index: subInd}
subInd := node.documentMapping.FirstIndexOfName(node.parentSide.relFieldDef.Name)
relatedField := mapper.Field{Name: node.parentSide.relFieldDef.Name, Index: subInd}
fieldFilter := filter.UnwrapRelation(filter.CopyField(
parentPlan.selectNode.filter,
relatedField,
mapper.Field{Name: subFieldName, Index: subFieldInd},
), relatedField)
// At the moment we just take the first index, but later we want to run some kind of analysis to
// determine which index is best to use. https://github.com/sourcenetwork/defradb/issues/2680
err := node.invertJoinDirectionWithIndex(fieldFilter, indexes[0])
if err != nil {
return err
Expand All @@ -383,15 +385,15 @@ func (p *Planner) tryOptimizeJoinDirection(node *invertibleTypeJoin, parentPlan
// expandTypeJoin does a plan graph expansion and other optimizations on invertibleTypeJoin.
func (p *Planner) expandTypeJoin(node *invertibleTypeJoin, parentPlan *selectTopNode) error {
if parentPlan.selectNode.filter == nil {
return p.expandPlan(node.subType, parentPlan)
return p.expandPlan(node.childSide.plan, parentPlan)
}

err := p.tryOptimizeJoinDirection(node, parentPlan)
if err != nil {
return err
}

return p.expandPlan(node.subType, parentPlan)
return p.expandPlan(node.childSide.plan, parentPlan)
}

func (p *Planner) expandGroupNodePlan(topNodeSelect *selectTopNode) error {
Expand Down
15 changes: 15 additions & 0 deletions internal/planner/select.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,21 @@ func findIndexByFilteringField(scanNode *scanNode) immutable.Option[client.Index
return immutable.None[client.IndexDescription]()
}

func findIndexByFieldName(col client.Collection, fieldName string) immutable.Option[client.IndexDescription] {
for _, field := range col.Schema().Fields {
if field.Name != fieldName {
continue
}
indexes := col.Description().GetIndexesOnField(field.Name)
if len(indexes) > 0 {
// At the moment we just take the first index, but later we want to run some kind of analysis to
// determine which index is best to use. https://github.com/sourcenetwork/defradb/issues/2680
return immutable.Some(indexes[0])
islamaliev marked this conversation as resolved.
Show resolved Hide resolved
}
}
return immutable.None[client.IndexDescription]()
}

func (n *selectNode) initFields(selectReq *mapper.Select) ([]aggregateNode, error) {
aggregates := []aggregateNode{}
// loop over the sub type
Expand Down
Loading
Loading