Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small refactor in the package tracing logic. #494

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions artifact/image/layerscanning/image/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) {
defer layerReader.Close()

tarReader := tar.NewReader(layerReader)
requiredTargets, err = fillChainLayerWithFilesFromTar(&outputImage, tarReader, originLayerID, dirPath, chainLayersToFill, config.Requirer, requiredTargets)
requiredTargets, err = fillChainLayersWithFilesFromTar(&outputImage, tarReader, originLayerID, dirPath, chainLayersToFill, config.Requirer, requiredTargets)
if err != nil {
return fmt.Errorf("failed to fill chain layer with v1 layer tar: %w", err)
}
Expand Down Expand Up @@ -304,6 +304,7 @@ func initializeChainLayers(v1Layers []v1.Layer, configFile *v1.ConfigFile, maxSy
latestLayer: &Layer{
buildCommand: entry.CreatedBy,
isEmpty: true,
fileNodeTree: pathtree.NewNode[fileNode](),
},
maxSymlinkDepth: maxSymlinkDepth,
})
Expand Down Expand Up @@ -353,10 +354,10 @@ func initializeChainLayers(v1Layers []v1.Layer, configFile *v1.ConfigFile, maxSy
return chainLayers, nil
}

// fillChainLayerWithFilesFromTar fills the chain layers with the files found in the tar. The
// fillChainLayersWithFilesFromTar fills the chain layers with the files found in the tar. The
// chainLayersToFill are the chain layers that will be filled with the files via the virtual
// filesystem.
func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLayerID string, dirPath string, chainLayersToFill []*chainLayer, requirer require.FileRequirer, requiredTargets map[string]bool) (map[string]bool, error) {
func fillChainLayersWithFilesFromTar(img *Image, tarReader *tar.Reader, originLayerID string, dirPath string, chainLayersToFill []*chainLayer, requirer require.FileRequirer, requiredTargets map[string]bool) (map[string]bool, error) {
currentChainLayer := chainLayersToFill[0]

for {
Expand Down Expand Up @@ -477,6 +478,10 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
// outer loop is looping backwards (latest layer first), we ignore any files that are already in
// each chainLayer, as they would have been overwritten.
fillChainLayersWithFileNode(chainLayersToFill, newNode)

// Add the fileNode to the node tree of the underlying layer.
layer := currentChainLayer.latestLayer.(*Layer)
layer.fileNodeTree.Insert(virtualPath, newNode)
}
return requiredTargets, nil
}
Expand Down
2 changes: 1 addition & 1 deletion artifact/image/layerscanning/image/image_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -936,7 +936,7 @@ func TestInitializeChainLayers(t *testing.T) {
t.Fatalf("initializeChainLayers(%v, %v, %v) returned an unexpected error: %v", tc.v1Layers, tc.configFile, tc.maxSymlinkDepth, err)
}

if diff := cmp.Diff(tc.want, gotChainLayers, cmp.AllowUnexported(chainLayer{}, Layer{}, fakev1layer.FakeV1Layer{}), cmpopts.IgnoreFields(chainLayer{}, "fileNodeTree")); diff != "" {
if diff := cmp.Diff(tc.want, gotChainLayers, cmp.AllowUnexported(chainLayer{}, Layer{}, fakev1layer.FakeV1Layer{}), cmpopts.IgnoreFields(chainLayer{}, "fileNodeTree"), cmpopts.IgnoreFields(Layer{}, "fileNodeTree")); diff != "" {
t.Fatalf("initializeChainLayers(%v, %v, %v) returned an unexpected diff (-want +got): %v", tc.v1Layers, tc.configFile, tc.maxSymlinkDepth, diff)
}
})
Expand Down
6 changes: 5 additions & 1 deletion artifact/image/layerscanning/image/layer.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,14 @@ type Layer struct {
diffID digest.Digest
buildCommand string
isEmpty bool
fileNodeTree *pathtree.Node[fileNode]
}

// FS returns a scalibr compliant file system.
func (layer *Layer) FS() scalibrfs.FS {
return nil
return &FS{
tree: layer.fileNodeTree,
}
}

// IsEmpty returns whether the layer is empty.
Expand Down Expand Up @@ -96,6 +99,7 @@ func convertV1Layer(v1Layer v1.Layer, command string, isEmpty bool) (*Layer, err
diffID: digest.Digest(diffID.String()),
buildCommand: command,
isEmpty: isEmpty,
fileNodeTree: pathtree.NewNode[fileNode](),
}, nil
}

Expand Down
3 changes: 2 additions & 1 deletion artifact/image/layerscanning/image/layer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ func TestConvertV1Layer(t *testing.T) {
diffID: "sha256:abc123",
buildCommand: "ADD file",
isEmpty: false,
fileNodeTree: pathtree.NewNode[fileNode](),
},
},
{
Expand All @@ -77,7 +78,7 @@ func TestConvertV1Layer(t *testing.T) {
if tc.wantError != nil && gotError == tc.wantError {
t.Errorf("convertV1Layer(%v, %v, %v) returned error: %v, want error: %v", tc.v1Layer, tc.command, tc.isEmpty, gotError, tc.wantError)
}
if diff := cmp.Diff(gotLayer, tc.wantLayer, cmp.AllowUnexported(Layer{}, fakev1layer.FakeV1Layer{})); tc.wantLayer != nil && diff != "" {
if diff := cmp.Diff(gotLayer, tc.wantLayer, cmp.AllowUnexported(Layer{}, fakev1layer.FakeV1Layer{}, pathtree.Node[fileNode]{})); tc.wantLayer != nil && diff != "" {
t.Errorf("convertV1Layer(%v, %v, %v) returned layer: %v, want layer: %v", tc.v1Layer, tc.command, tc.isEmpty, gotLayer, tc.wantLayer)
}
})
Expand Down
53 changes: 50 additions & 3 deletions artifact/image/layerscanning/testing/fakelayer/fake_layer.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,48 @@ package fakelayer
import (
"fmt"
"io"
"io/fs"
"os"
"path"
"path/filepath"

scalibrfs "github.com/google/osv-scalibr/fs"
"github.com/opencontainers/go-digest"
)

// FakeLayer is a fake implementation of the image.Layer interface for testing purposes.
type FakeLayer struct {
testDir string
diffID digest.Digest
buildCommand string
files map[string]string
}

// New creates a new FakeLayer.
func New(diffID digest.Digest, buildCommand string) *FakeLayer {
func New(testDir string, diffID digest.Digest, buildCommand string, files map[string]string, filesAlreadyExist bool) (*FakeLayer, error) {
if !filesAlreadyExist {
for name, contents := range files {
filename := filepath.Join(testDir, name)
if err := os.MkdirAll(filepath.Dir(filename), 0700); err != nil {
return nil, err
}

if err := os.WriteFile(filename, []byte(contents), 0600); err != nil {
return nil, err
}
}
}
return &FakeLayer{
testDir: testDir,
diffID: diffID,
buildCommand: buildCommand,
}
files: files,
}, nil
}

// FS is not currently used for the purposes of layer scanning, thus a nil value is returned.
func (fakeLayer *FakeLayer) FS() scalibrfs.FS {
return nil
return fakeLayer
}

// DiffID returns the diffID of the layer.
Expand All @@ -62,3 +82,30 @@ func (fakeLayer *FakeLayer) IsEmpty() bool {
func (fakeLayer *FakeLayer) Uncompressed() (io.ReadCloser, error) {
return nil, fmt.Errorf("not implemented")
}

// -------------------------------------------------------------------------------------------------
// scalibrfs.FS implementation
// -------------------------------------------------------------------------------------------------

// Open returns a file if it exists in the files map.
func (fakeLayer *FakeLayer) Open(name string) (fs.File, error) {
if _, ok := fakeLayer.files[name]; ok {
filename := filepath.Join(fakeLayer.testDir, name)
return os.Open(filename)
}
return nil, os.ErrNotExist
}

// Stat returns the file info of a file if it exists in the files map.
func (fakeLayer *FakeLayer) Stat(name string) (fs.FileInfo, error) {
if _, ok := fakeLayer.files[name]; ok {
return os.Stat(path.Join(fakeLayer.testDir, name))
}
return nil, os.ErrNotExist
}

// ReadDir is not used in the trace package since individual files are opened instead of
// directories.
func (fakeLayer *FakeLayer) ReadDir(name string) ([]fs.DirEntry, error) {
return nil, fmt.Errorf("not implemented")
}
56 changes: 45 additions & 11 deletions artifact/image/layerscanning/trace/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package trace
import (
"context"
"errors"
"fmt"
"io/fs"
"slices"
"sort"
Expand Down Expand Up @@ -54,7 +55,7 @@ type locationAndIndex struct {
// Note that a precondition of this algorithm is that the chain layers are ordered by order of
// creation.
func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory, chainLayers []scalibrImage.ChainLayer, config *filesystem.Config) {
chainLayerDetailsList := []*extractor.LayerDetails{}
layerDetailsList := []*extractor.LayerDetails{}

// Create list of layer details struct to be referenced by inventory.
for i, chainLayer := range chainLayers {
Expand All @@ -65,7 +66,7 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
diffID = chainLayer.Layer().DiffID().Encoded()
}

chainLayerDetailsList = append(chainLayerDetailsList, &extractor.LayerDetails{
layerDetailsList = append(layerDetailsList, &extractor.LayerDetails{
Index: i,
DiffID: diffID,
Command: chainLayer.Layer().Command(),
Expand All @@ -90,7 +91,7 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
lastLayerIndex := len(chainLayers) - 1

for _, inv := range inventory {
layerDetails := chainLayerDetailsList[lastLayerIndex]
layerDetails := layerDetailsList[lastLayerIndex]
invExtractor, isFilesystemExtractor := inv.Extractor.(filesystem.Extractor)

// Only filesystem extractors are supported for layer scanning. Also, if the inventory has no
Expand All @@ -101,33 +102,39 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
}

var foundOrigin bool
fileLocation := inv.Locations[0]

// Go backwards through the chain layers and find the first layer where the inventory is not
// present. Such layer is the layer in which the inventory was introduced. If the inventory is
// present in all layers, then it means it was introduced in the first layer.
// TODO: b/381249869 - Optimization: Skip layers if file not found.
for i := len(chainLayers) - 2; i >= 0; i-- {
oldChainLayer := chainLayers[i]

invLocationAndIndex := locationAndIndex{
location: inv.Locations[0],
location: fileLocation,
index: i,
}

// If the file of interest is not present in the underlying layer, then there will be no
// difference in the extracted inventory from oldChainLayer, so extraction can be skipped in
// the chain layer. This is an optimization to avoid extracting the same inventory multiple
// times.
if !isFileInLayerDiff(oldChainLayer, fileLocation) {
continue
}

var oldInventory []*extractor.Inventory
if cachedInventory, ok := locationIndexToInventory[invLocationAndIndex]; ok {
oldInventory = cachedInventory
} else {
// Check if file still exist in this layer, if not skip extraction.
// This is both an optimization, and avoids polluting the log output with false file not found errors.
if _, err := oldChainLayer.FS().Stat(inv.Locations[0]); errors.Is(err, fs.ErrNotExist) {
oldInventory = []*extractor.Inventory{}
} else {
if _, err := oldChainLayer.FS().Stat(fileLocation); err == nil {
// Update the extractor config to use the files from the current layer.
// We only take extract the first location because other locations are derived from the initial
// extraction location. If other locations can no longer be determined from the first location
// they should not be included here, and the trace for those packages stops here.
updateExtractorConfig([]string{inv.Locations[0]}, invExtractor, oldChainLayer.FS())
updateExtractorConfig([]string{fileLocation}, invExtractor, oldChainLayer.FS())

var err error
oldInventory, _, err = filesystem.Run(ctx, config)
Expand All @@ -150,7 +157,7 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,

// If the inventory is not present in the old layer, then it was introduced in layer i+1.
if !foundPackage {
layerDetails = chainLayerDetailsList[i+1]
layerDetails = layerDetailsList[i+1]
foundOrigin = true
break
}
Expand All @@ -159,7 +166,7 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
// If the inventory is present in every layer, then it means it was introduced in the first
// layer.
if !foundOrigin {
layerDetails = chainLayerDetailsList[0]
layerDetails = layerDetailsList[0]
}
inv.LayerDetails = layerDetails
}
Expand Down Expand Up @@ -192,3 +199,30 @@ func areInventoriesEqual(inv1 *extractor.Inventory, inv2 *extractor.Inventory) b
}
return true
}

// getSingleLayerFSFromChainLayer returns the filesystem of the underlying layer in the chain layer.
func getLayerFSFromChainLayer(chainLayer scalibrImage.ChainLayer) (scalibrfs.FS, error) {
layer := chainLayer.Layer()
if layer == nil {
return nil, fmt.Errorf("chain layer has no layer")
}

fs := layer.FS()
if fs == nil {
return nil, fmt.Errorf("layer has no filesystem")
}

return fs, nil
}

// isFileInLayerDiff checks if a file is present in the underlying layer of a chain layer.
func isFileInLayerDiff(chainLayer scalibrImage.ChainLayer, fileLocation string) bool {
layerFS, err := getLayerFSFromChainLayer(chainLayer)
if err != nil {
return false
}
if _, err := layerFS.Stat(fileLocation); errors.Is(err, fs.ErrNotExist) {
return true
}
return false
}
Loading
Loading