diff --git a/internal/pkg/features/features.go b/internal/pkg/features/features.go index 754da1c8..eb0f0883 100644 --- a/internal/pkg/features/features.go +++ b/internal/pkg/features/features.go @@ -36,6 +36,10 @@ type Config struct { // clangscandeps ExperimentalGomaDepsCache bool + // ExperimentalGomaDepsCacheSize is the maximum number of elements to + // be stored in the cache. + ExperimentalGomaDepsCacheSize int + // ExperimentalSysrootDoNotUpload disables upload of the files/directories // under the directory specified by the --sysroot flag. ExperimentalSysrootDoNotUpload bool @@ -66,4 +70,5 @@ func init() { flag.BoolVar(&GetConfig().ExperimentalGomaDepsCache, "experimental_goma_deps_cache", false, "Use go deps cache with goma instead of goma's deps cache") flag.BoolVar(&GetConfig().ExperimentalExitOnStuckActions, "experimental_exit_on_stuck_actions", false, "Stops reproxy with exit_code=1 if the command didn't finish within 2*reclient_timeout") flag.BoolVar(&GetConfig().EnableCredentialCache, "enable_creds_cache", true, "If false, disables the credentials cache even if used auth mechanism supports it") + flag.IntVar(&GetConfig().ExperimentalGomaDepsCacheSize, "experimental_goma_deps_cache_size", 300000, "Maximum number of entries to hold in the experimental deps cache.") } diff --git a/internal/pkg/inputprocessor/action/cppcompile/preprocessor_test.go b/internal/pkg/inputprocessor/action/cppcompile/preprocessor_test.go index 3c5f0cb9..686d49a9 100644 --- a/internal/pkg/inputprocessor/action/cppcompile/preprocessor_test.go +++ b/internal/pkg/inputprocessor/action/cppcompile/preprocessor_test.go @@ -172,13 +172,14 @@ func TestComputeSpecWithDepsCache(t *testing.T) { c := &Preprocessor{ CPPDepScanner: s, BasePreprocessor: &inputprocessor.BasePreprocessor{Ctx: ctx, FileMetadataCache: fmc}, - DepsCache: depscache.New(filemetadata.NewSingleFlightCache()), + DepsCache: depscache.New(), } existingFiles := []string{ filepath.Clean("bin/clang++"), filepath.Clean("src/test.cpp"), filepath.Clean("include/foo/a"), + filepath.Clean("include/foo.h"), filepath.Clean("out/dummy"), } er, cleanup := execroot.Setup(t, existingFiles) @@ -200,6 +201,7 @@ func TestComputeSpecWithDepsCache(t *testing.T) { want := &command.InputSpec{ Inputs: []string{ filepath.Clean("src/test.cpp"), + filepath.Clean("include/foo.h"), filepath.Clean("bin/clang++"), }, VirtualInputs: []*command.VirtualInput{ @@ -214,7 +216,7 @@ func TestComputeSpecWithDepsCache(t *testing.T) { c = &Preprocessor{ CPPDepScanner: s, BasePreprocessor: &inputprocessor.BasePreprocessor{Ctx: ctx, FileMetadataCache: fmc}, - DepsCache: depscache.New(filemetadata.NewSingleFlightCache()), + DepsCache: depscache.New(), } c.DepsCache.LoadFromDir(er) got, err = inputprocessor.Compute(c, opts) @@ -244,7 +246,7 @@ func TestComputeSpecWithDepsCache_ResourceDirChanged(t *testing.T) { FileMetadataCache: fmc, Executor: &stubExecutor{outStr: "/first/resource/dir"}, }, - DepsCache: depscache.New(filemetadata.NewSingleFlightCache()), + DepsCache: depscache.New(), } existingFiles := []string{ @@ -292,7 +294,7 @@ func TestComputeSpecWithDepsCache_ResourceDirChanged(t *testing.T) { FileMetadataCache: fmc, Executor: &stubExecutor{outStr: "/second/resource/dir"}, }, - DepsCache: depscache.New(filemetadata.NewSingleFlightCache()), + DepsCache: depscache.New(), } c.DepsCache.LoadFromDir(er) got, err = inputprocessor.Compute(c, opts) @@ -602,13 +604,14 @@ func TestComputeSpecEventTimes(t *testing.T) { c := &Preprocessor{ CPPDepScanner: s, BasePreprocessor: &inputprocessor.BasePreprocessor{Ctx: ctx, FileMetadataCache: fmc}, - DepsCache: depscache.New(filemetadata.NewSingleFlightCache()), + DepsCache: depscache.New(), } existingFiles := []string{ filepath.Clean("bin/clang++"), filepath.Clean("src/test.cpp"), filepath.Clean("include/foo/a"), + filepath.Clean("include/foo.h"), filepath.Clean("out/dummy"), } er, cleanup := execroot.Setup(t, existingFiles) @@ -641,7 +644,7 @@ func TestComputeSpecEventTimes(t *testing.T) { c = &Preprocessor{ CPPDepScanner: s, BasePreprocessor: &inputprocessor.BasePreprocessor{Ctx: ctx, FileMetadataCache: fmc}, - DepsCache: depscache.New(filemetadata.NewSingleFlightCache()), + DepsCache: depscache.New(), } c.DepsCache.LoadFromDir(er) wg.Add(1) diff --git a/internal/pkg/inputprocessor/depscache/BUILD.bazel b/internal/pkg/inputprocessor/depscache/BUILD.bazel index 857f08ac..20efa51a 100644 --- a/internal/pkg/inputprocessor/depscache/BUILD.bazel +++ b/internal/pkg/inputprocessor/depscache/BUILD.bazel @@ -3,17 +3,19 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "depscache", srcs = [ - "cppfilecache.go", "depscache.go", + "minimalfilecache.go", ], importpath = "github.com/bazelbuild/reclient/internal/pkg/inputprocessor/depscache", visibility = ["//:__subpackages__"], deps = [ "//api/proxy", "//internal/pkg/event", + "//internal/pkg/features", "//internal/pkg/logger", "//internal/pkg/version", - "@com_github_bazelbuild_remote_apis_sdks//go/pkg/filemetadata", + "@com_github_bazelbuild_remote_apis_sdks//go/pkg/cache", + "@com_github_bazelbuild_remote_apis_sdks//go/pkg/digest", "@com_github_golang_glog//:glog", "@org_golang_google_protobuf//proto", "@org_golang_google_protobuf//types/known/timestamppb", diff --git a/internal/pkg/inputprocessor/depscache/cppfilecache.go b/internal/pkg/inputprocessor/depscache/cppfilecache.go deleted file mode 100644 index ca1c4e6b..00000000 --- a/internal/pkg/inputprocessor/depscache/cppfilecache.go +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2023 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package depscache - -import ( - "sync" - - ppb "github.com/bazelbuild/reclient/api/proxy" - - "github.com/bazelbuild/remote-apis-sdks/go/pkg/filemetadata" - "golang.org/x/sync/singleflight" - "google.golang.org/protobuf/types/known/timestamppb" -) - -type fileInfo struct { - *ppb.FileInfo - updated bool -} - -type cppFileCache struct { - fmc filemetadata.Cache - files map[string]fileInfo - mu sync.RWMutex - g singleflight.Group -} - -func (m *cppFileCache) load(absPath string) (*ppb.FileInfo, error) { - m.mu.RLock() - fi, ok := m.files[absPath] - m.mu.RUnlock() - if ok { - if fi.updated { - return fi.FileInfo, nil - } - } - v, err, _ := m.g.Do(absPath, func() (interface{}, error) { - md := m.fmc.Get(absPath) - fi := fileInfo{ - FileInfo: &ppb.FileInfo{ - AbsPath: absPath, - Digest: md.Digest.String(), - Mtime: timestamppb.New(md.MTime), - }, - updated: true, - } - m.mu.Lock() - defer m.mu.Unlock() - m.files[absPath] = fi - return fi, nil - }) - if err != nil { - return nil, err - } - return v.(fileInfo).FileInfo, nil -} - -func (m *cppFileCache) init(fInfos []*ppb.FileInfo) { - for _, fi := range fInfos { - m.files[fi.AbsPath] = fileInfo{FileInfo: fi} - } -} diff --git a/internal/pkg/inputprocessor/depscache/depscache.go b/internal/pkg/inputprocessor/depscache/depscache.go index 2e322f98..561a1e7a 100644 --- a/internal/pkg/inputprocessor/depscache/depscache.go +++ b/internal/pkg/inputprocessor/depscache/depscache.go @@ -25,9 +25,9 @@ import ( ppb "github.com/bazelbuild/reclient/api/proxy" "github.com/bazelbuild/reclient/internal/pkg/event" + "github.com/bazelbuild/reclient/internal/pkg/features" "github.com/bazelbuild/reclient/internal/pkg/logger" "github.com/bazelbuild/reclient/internal/pkg/version" - "github.com/bazelbuild/remote-apis-sdks/go/pkg/filemetadata" log "github.com/golang/glog" "google.golang.org/protobuf/proto" @@ -52,7 +52,7 @@ type Cache struct { Logger *logger.Logger depsCache map[Key][]*ppb.FileInfo depsMu sync.RWMutex - filesCache cppFileCache + filesCache minimalFileCache // last use time of a key lutByKey map[Key]time.Time lutMu sync.Mutex @@ -62,13 +62,12 @@ type Cache struct { } // New creates a new empty deps cache. -func New(fmc filemetadata.Cache) *Cache { +func New() *Cache { return &Cache{ - MaxEntries: 100000, + MaxEntries: features.GetConfig().ExperimentalGomaDepsCacheSize, depsCache: make(map[Key][]*ppb.FileInfo), lutByKey: make(map[Key]time.Time), - filesCache: cppFileCache{ - fmc: fmc, + filesCache: minimalFileCache{ files: make(map[string]fileInfo), }, } diff --git a/internal/pkg/inputprocessor/depscache/depscache_test.go b/internal/pkg/inputprocessor/depscache/depscache_test.go index 4e20a7a4..0930cd76 100644 --- a/internal/pkg/inputprocessor/depscache/depscache_test.go +++ b/internal/pkg/inputprocessor/depscache/depscache_test.go @@ -32,11 +32,11 @@ func TestSetGet(t *testing.T) { er, cleanup := execroot.Setup(t, nil) defer cleanup() files := map[string][]byte{ - filepath.Join(er, "foo"): []byte("foo"), - filepath.Join(er, "bar"): []byte("bar"), + filepath.Join(er, "foo"): []byte("#foo"), + filepath.Join(er, "bar"): []byte("#bar"), } execroot.AddFilesWithContent(t, "", files) - dc := New(filemetadata.NewSingleFlightCache()) + dc := New() dc.LoadFromDir(er) k := Key{ CommandDigest: "hash/123", @@ -60,11 +60,11 @@ func TestSetGetBeforeLoaded(t *testing.T) { er, cleanup := execroot.Setup(t, nil) defer cleanup() files := map[string][]byte{ - filepath.Join(er, "foo"): []byte("foo"), - filepath.Join(er, "bar"): []byte("bar"), + filepath.Join(er, "foo"): []byte("#foo"), + filepath.Join(er, "bar"): []byte("#bar"), } execroot.AddFilesWithContent(t, "", files) - dc := New(filemetadata.NewSingleFlightCache()) + dc := New() k := Key{ CommandDigest: "hash/123", SrcFilePath: filepath.Join(er, "foo"), @@ -86,19 +86,23 @@ func TestWriteLoad(t *testing.T) { barPath := filepath.Join(er, "bar") bazPath := filepath.Join(er, "baz") quxPath := filepath.Join(er, "qux") + chaPath := filepath.Join(er, "cha") files := map[string][]byte{ // Below files will not change between instances of deps cache. - fooPath: []byte("foo"), - barPath: []byte("bar"), + fooPath: []byte("#foo"), + barPath: []byte("#bar"), // Below file will change between instance of deps cache in a way // that affects dependencies (minimized header changes). - bazPath: []byte("baz"), + bazPath: []byte("#baz"), // Below file will change between instances of deps cache but not // affect dependencies (minimized header doesn't change). - quxPath: []byte("qux"), + quxPath: []byte("#qux"), + // Below file will change in a way that affects dependencies via + // a multi-line directive. + chaPath: []byte("#cha \\\nbuz"), } execroot.AddFilesWithContent(t, "", files) - dc := New(filemetadata.NewSingleFlightCache()) + dc := New() dc.LoadFromDir(er) k1 := Key{ CommandDigest: "hash/123", @@ -124,10 +128,19 @@ func TestWriteLoad(t *testing.T) { if err != nil { t.Errorf("SetDeps(k3) returned error: %v", err) } + k4 := Key{ + CommandDigest: "hash4/123", + SrcFilePath: chaPath, + } + err = dc.SetDeps(k4, []string{chaPath}) + if err != nil { + t.Errorf("SetDeps(k4) returned error: %v", err) + } dc.WriteToDisk(er) time.Sleep(1 * time.Second) - execroot.AddFileWithContent(t, bazPath, []byte("baz2")) - execroot.AddFileWithContent(t, quxPath, []byte("qux")) + execroot.AddFileWithContent(t, bazPath, []byte("#baz2")) + execroot.AddFileWithContent(t, quxPath, []byte("#qux\ncomment")) + execroot.AddFileWithContent(t, chaPath, []byte("#cha \\\nbop")) // Update mtime to sometime in the past to check whether we erroneously rely on stale // mtime of modified files. if err := os.Chtimes(bazPath, time.Now().Add(-1*time.Hour), time.Now().Add(-1*time.Hour)); err != nil { @@ -135,7 +148,7 @@ func TestWriteLoad(t *testing.T) { } time.Sleep(1 * time.Second) filemetadata.ResetGlobalCache() - dc = New(filemetadata.NewSingleFlightCache()) + dc = New() dc.LoadFromDir(er) deps, ok := dc.GetDeps(k1) if !ok { @@ -155,6 +168,10 @@ func TestWriteLoad(t *testing.T) { if diff := cmp.Diff([]string{quxPath}, deps); diff != "" { t.Errorf("GetDeps(k3) returned diff, (-want +got): %s", diff) } + deps, ok = dc.GetDeps(k4) + if ok || deps != nil { + t.Errorf("GetDeps(k4) returned %v, %v, want nil, false", deps, ok) + } } func TestWriteLoadKeysDependingOnSameFile(t *testing.T) { @@ -163,11 +180,11 @@ func TestWriteLoadKeysDependingOnSameFile(t *testing.T) { fooPath := filepath.Join(er, "foo") barPath := filepath.Join(er, "bar") files := map[string][]byte{ - fooPath: []byte("foo"), - barPath: []byte("bar"), + fooPath: []byte("#foo"), + barPath: []byte("#bar"), } execroot.AddFilesWithContent(t, "", files) - dc := New(filemetadata.NewSingleFlightCache()) + dc := New() dc.LoadFromDir(er) k1 := Key{ CommandDigest: "hash/123", @@ -187,10 +204,10 @@ func TestWriteLoadKeysDependingOnSameFile(t *testing.T) { } dc.WriteToDisk(er) time.Sleep(1 * time.Second) - execroot.AddFileWithContent(t, barPath, []byte("bar2")) + execroot.AddFileWithContent(t, barPath, []byte("#bar2")) time.Sleep(1 * time.Second) filemetadata.ResetGlobalCache() - dc = New(filemetadata.NewSingleFlightCache()) + dc = New() dc.LoadFromDir(er) // bar has changed, we should not get a cache hit. deps, ok := dc.GetDeps(k1) @@ -206,7 +223,7 @@ func TestWriteLoadKeysDependingOnSameFile(t *testing.T) { dc.WriteToDisk(er) time.Sleep(1 * time.Second) filemetadata.ResetGlobalCache() - dc = New(filemetadata.NewSingleFlightCache()) + dc = New() dc.LoadFromDir(er) deps, ok = dc.GetDeps(k2) // k2 should still get a cache miss because it refers to the old version of bar. @@ -223,13 +240,13 @@ func TestEviction(t *testing.T) { bazPath := filepath.Join(er, "baz") quxPath := filepath.Join(er, "qux") files := map[string][]byte{ - fooPath: []byte("foo"), - barPath: []byte("bar"), - bazPath: []byte("baz"), - quxPath: []byte("qux"), + fooPath: []byte("#foo"), + barPath: []byte("#bar"), + bazPath: []byte("#baz"), + quxPath: []byte("#qux"), } execroot.AddFilesWithContent(t, "", files) - dc := New(filemetadata.NewSingleFlightCache()) + dc := New() dc.LoadFromDir(er) k1 := Key{ CommandDigest: "hash/123", @@ -253,7 +270,7 @@ func TestEviction(t *testing.T) { } dc.WriteToDisk(er) filemetadata.ResetGlobalCache() - dc = New(filemetadata.NewSingleFlightCache()) + dc = New() dc.MaxEntries = 2 dc.LoadFromDir(er) wg := sync.WaitGroup{} @@ -276,7 +293,7 @@ func TestEviction(t *testing.T) { } wg.Wait() dc.WriteToDisk(er) - dc = New(filemetadata.NewSingleFlightCache()) + dc = New() dc.LoadFromDir(er) wantKeys := []Key{privateKey(dc, k1), privateKey(dc, k3)} gotKeys := make([]Key, 0) diff --git a/internal/pkg/inputprocessor/depscache/minimalfilecache.go b/internal/pkg/inputprocessor/depscache/minimalfilecache.go new file mode 100644 index 00000000..1c9dcedd --- /dev/null +++ b/internal/pkg/inputprocessor/depscache/minimalfilecache.go @@ -0,0 +1,148 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package depscache + +import ( + "bufio" + "encoding/hex" + "os" + "strings" + "sync" + "time" + + ppb "github.com/bazelbuild/reclient/api/proxy" + + "github.com/bazelbuild/remote-apis-sdks/go/pkg/cache" + "github.com/bazelbuild/remote-apis-sdks/go/pkg/digest" + "golang.org/x/sync/singleflight" + "google.golang.org/protobuf/types/known/timestamppb" +) + +// Similar to the cppfilecache, but instead of using the digest for the +// contents of the entire file, this cache only digests lines that start +// with #. The idea being that changes to the file that don't impact +// dependencies won't be considered. + +type fileInfo struct { + *ppb.FileInfo + updated bool +} + +type minimalFileCache struct { + minfc cache.SingleFlight + files map[string]fileInfo + mu sync.RWMutex + g singleflight.Group +} + +type minDigestInfo struct { + digest digest.Digest + mtime time.Time +} + +func (m *minimalFileCache) load(absPath string) (*ppb.FileInfo, error) { + m.mu.RLock() + fi, ok := m.files[absPath] + m.mu.RUnlock() + if ok { + if fi.updated { + return fi.FileInfo, nil + } + } + v, err, _ := m.g.Do(absPath, func() (interface{}, error) { + res, err := m.minfc.LoadOrStore(absPath, func() (interface{}, error) { + return minimalDigest(absPath) + }) + if err != nil { + return nil, err + } + mdinfo := res.(*minDigestInfo) + + fi := fileInfo{ + FileInfo: &ppb.FileInfo{ + AbsPath: absPath, + Digest: mdinfo.digest.String(), + Mtime: timestamppb.New(mdinfo.mtime), + }, + updated: true, + } + m.mu.Lock() + defer m.mu.Unlock() + m.files[absPath] = fi + return fi, nil + }) + if err != nil { + return nil, err + } + return v.(fileInfo).FileInfo, nil +} + +func (m *minimalFileCache) init(fInfos []*ppb.FileInfo) { + for _, fi := range fInfos { + m.files[fi.AbsPath] = fileInfo{FileInfo: fi} + } +} + +// minimalDigest computes a digest for just preprocessor directives of +// a file. +// +// One caveat, it will also digest lines that look like preprocessor +// directives that are included in /* */ comment blocks. This will only +// cause possible cache misses, which doesn't affect overall correctness. +func minimalDigest(absPath string) (*minDigestInfo, error) { + fstat, err := os.Stat(absPath) + if err != nil { + return nil, err + } + f, err := os.Open(absPath) + if err != nil { + return nil, err + } + defer f.Close() + + // Scan through the file, and only append lines to the main string that start with # + lsc := bufio.NewScanner(f) + h := digest.HashFn.New() + sz := 0 + + continuedLine := false + for lsc.Scan() { + // See https://cplusplus.com/doc/tutorial/preprocessor/ for a brief rundown on + // preprocessor directives and how they are structured. + txt := strings.TrimSpace(lsc.Text()) + if strings.HasPrefix(txt, "#") || continuedLine { + continuedLine = false + h.Write([]byte(txt)) + sz += len(txt) + + // If the line ends in a backslash the directive continues on the following + // line. + if strings.HasSuffix(txt, "\\") { + continuedLine = true + } + } + } + if err := lsc.Err(); err != nil { + return nil, err + } + + arr := h.Sum(nil) + mdinfo := &minDigestInfo{ + digest: digest.Digest{Hash: hex.EncodeToString(arr[:]), Size: int64(sz)}, + mtime: fstat.ModTime(), + } + + return mdinfo, nil +} diff --git a/pkg/inputprocessor/inputprocessor.go b/pkg/inputprocessor/inputprocessor.go index 13683e93..37bcce32 100644 --- a/pkg/inputprocessor/inputprocessor.go +++ b/pkg/inputprocessor/inputprocessor.go @@ -146,7 +146,7 @@ func NewInputProcessor(ctx context.Context, executor Executor, resMgr *localreso ip := newInputProcessor(depScanner, opt.IPTimeout, opt.CppLinkDeepScan, executor, resMgr, fmc, l) cleanup := func() {} if useDepsCache && (!depScanner.Capabilities().GetCaching() || features.GetConfig().ExperimentalGomaDepsCache) { - ip.depsCache, cleanup = newDepsCache(fmc, opt.CacheDir, l) + ip.depsCache, cleanup = newDepsCache(opt.CacheDir, l) } return ip, onceFunc(func() { cleanup() @@ -173,8 +173,8 @@ func newInputProcessor(ds cppcompile.CPPDependencyScanner, depScanTimeout time.D } } -func newDepsCache(fmc filemetadata.Cache, depsCacheDir string, l *logger.Logger) (*depscache.Cache, func()) { - dc := depscache.New(fmc) +func newDepsCache(depsCacheDir string, l *logger.Logger) (*depscache.Cache, func()) { + dc := depscache.New() dc.Logger = l go dc.LoadFromDir(depsCacheDir) return dc, func() { diff --git a/pkg/inputprocessor/inputprocessor_test.go b/pkg/inputprocessor/inputprocessor_test.go index e223bfe0..84625cb1 100644 --- a/pkg/inputprocessor/inputprocessor_test.go +++ b/pkg/inputprocessor/inputprocessor_test.go @@ -166,7 +166,7 @@ func TestCppWithDepsCache(t *testing.T) { resMgr := localresources.NewDefaultManager() fmc := filemetadata.NewSingleFlightCache() ip := newInputProcessor(ds, dsTimeout, false, nil, resMgr, fmc, nil) - ip.depsCache, cleanup = newDepsCache(fmc, er, nil) + ip.depsCache, cleanup = newDepsCache(er, nil) cmd := []string{"clang++", "-c", "-o", "test.o", "-Xclang", "-add-plugin", "-Xclang", "bar",