Skip to content

Commit

Permalink
[Optimization] Changed the layer scanning tracing logic to reduce the…
Browse files Browse the repository at this point in the history
… number of `ToPURL` calls, since they can be expensive.

PiperOrigin-RevId: 729675877
  • Loading branch information
Mario Leyva authored and copybara-github committed Feb 24, 2025
1 parent 93fce00 commit 5815da7
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 177 deletions.
60 changes: 32 additions & 28 deletions artifact/image/layerscanning/trace/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@ import (
"context"
"errors"
"io/fs"
"slices"
"sort"

"github.com/google/osv-scalibr/extractor"
"github.com/google/osv-scalibr/extractor/filesystem"
"github.com/google/osv-scalibr/log"

scalibrImage "github.com/google/osv-scalibr/artifact/image"
scalibrfs "github.com/google/osv-scalibr/fs"
Expand Down Expand Up @@ -54,6 +53,13 @@ type locationAndIndex struct {
// Note that a precondition of this algorithm is that the chain layers are ordered by order of
// creation.
func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory, chainLayers []scalibrImage.ChainLayer, config *filesystem.Config) {
// If there are no chain layers, then there is nothing to trace. This should not happen, but we
// should handle it gracefully.
if len(chainLayers) == 0 {
log.Warnf("No chain layers found, cannot trace inventory.")
return
}

chainLayerDetailsList := []*extractor.LayerDetails{}

// Create list of layer details struct to be referenced by inventory.
Expand Down Expand Up @@ -100,6 +106,11 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
continue
}

var invPURL string
if inv.Extractor != nil {
invPURL = inv.Extractor.ToPURL(inv).String()
}

var foundOrigin bool

// Go backwards through the chain layers and find the first layer where the inventory is not
Expand Down Expand Up @@ -142,10 +153,22 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,

foundPackage := false
for _, oldInv := range oldInventory {
if areInventoriesEqual(inv, oldInv) {
foundPackage = true
break
if oldInv.Extractor == nil {
continue
}

// PURLs are being used as a package key, so if they are different, skip this inventory.
oldInvPURL := oldInv.Extractor.ToPURL(oldInv).String()
if oldInvPURL != invPURL {
continue
}

if !areLocationsEqual(oldInv.Locations, inv.Locations) {
continue
}

foundPackage = true
break
}

// If the inventory is not present in the old layer, then it was introduced in layer i+1.
Expand All @@ -165,30 +188,11 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
}
}

// areInventoriesEqual checks if two inventories are equal. It does this by comparing the PURLs and
// the locations of the inventories.
func areInventoriesEqual(inv1 *extractor.Inventory, inv2 *extractor.Inventory) bool {
if inv1.Extractor == nil || inv2.Extractor == nil {
return false
}

// Check if the PURLs are equal.
purl1 := inv1.Extractor.ToPURL(inv1)
purl2 := inv2.Extractor.ToPURL(inv2)

if purl1.String() != purl2.String() {
// areLocationsEqual checks if the inventory location strings are equal.
func areLocationsEqual(fileLocations []string, otherFileLocations []string) bool {
if len(fileLocations) == 0 || len(otherFileLocations) == 0 {
return false
}

// Check if the locations are equal.
locations1 := inv1.Locations[:]
sort.Strings(locations1)

locations2 := inv2.Locations[:]
sort.Strings(locations2)

if !slices.Equal(locations1, locations2) {
return false
}
return true
return fileLocations[0] == otherFileLocations[0]
}
185 changes: 36 additions & 149 deletions artifact/image/layerscanning/trace/trace_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,42 @@ func TestPopulateLayerDetails(t *testing.T) {
chainLayers: []image.ChainLayer{},
wantInventory: []*extractor.Inventory{},
},
{
name: "empty chain layers",
inventory: []*extractor.Inventory{
{
Name: fooPackage,
Locations: []string{fooFile},
Extractor: fakeExtractor1,
},
},
chainLayers: []image.ChainLayer{},
wantInventory: []*extractor.Inventory{
{
Name: fooPackage,
Locations: []string{fooFile},
Extractor: fakeExtractor1,
},
},
},
{
name: "inventory with nil extractor",
inventory: []*extractor.Inventory{
{
Name: fooPackage,
Locations: []string{fooFile},
},
},
chainLayers: []image.ChainLayer{
fakeChainLayer1,
},
wantInventory: []*extractor.Inventory{
{
Name: fooPackage,
Locations: []string{fooFile},
},
},
},
{
name: "inventory in single chain layer",
inventory: []*extractor.Inventory{
Expand Down Expand Up @@ -329,152 +365,3 @@ func TestPopulateLayerDetails(t *testing.T) {
})
}
}

func TestAreInventoriesEqual(t *testing.T) {
tests := []struct {
name string
inv1 *extractor.Inventory
inv2 *extractor.Inventory
want bool
}{
{
name: "nil extractor",
inv1: &extractor.Inventory{
Name: "foo",
Version: "1.0",
Locations: []string{"foo.txt"},
Extractor: nil,
},
inv2: &extractor.Inventory{
Name: "foo",
Version: "1.0",
Locations: []string{"foo.txt"},
Extractor: nil,
},
want: false,
},
{
name: "same inventory",
inv1: &extractor.Inventory{
Name: "foo",
Version: "1.0",
Locations: []string{"foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
inv2: &extractor.Inventory{
Name: "foo",
Version: "1.0",
Locations: []string{"foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
want: true,
},
{
name: "same inventory with multiple locations",
inv1: &extractor.Inventory{
Name: "foo",
Version: "1.0",
Locations: []string{"foo.txt", "another-foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
inv2: &extractor.Inventory{
Name: "foo",
Version: "1.0",
Locations: []string{"another-foo.txt", "foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
want: true,
},
{
name: "different name",
inv1: &extractor.Inventory{
Name: "foo",
Locations: []string{"foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
inv2: &extractor.Inventory{
Name: "bar",
Locations: []string{"foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
want: false,
},
{
name: "different version",
inv1: &extractor.Inventory{
Name: "foo",
Version: "1.0",
Locations: []string{"foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
inv2: &extractor.Inventory{
Name: "foo",
Version: "2.0",
Locations: []string{"foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
want: false,
},
{
name: "different locations",
inv1: &extractor.Inventory{
Name: "foo",
Locations: []string{"foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
inv2: &extractor.Inventory{
Name: "foo",
Locations: []string{"another-foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
want: false,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
if got := areInventoriesEqual(tc.inv1, tc.inv2); got != tc.want {
t.Errorf("areInventoriesEqual(%v, %v) = %v, want: %v", tc.inv1, tc.inv2, got, tc.want)
}
})
}
}

0 comments on commit 5815da7

Please sign in to comment.