Skip to content

Commit

Permalink
Split metrics uploading into separtate binary
Browse files Browse the repository at this point in the history
Bug: b/294945709
Test: Updated unit and integration tests
Change-Id: I6e3dd4ed90696a51f9e014709e697409c38c8dc0
GitOrigin-RevId: 3ff0af7e2384671e2b796e418011133cdb0d0e7e
  • Loading branch information
bentekkie authored and copybara-github committed Sep 11, 2023
1 parent 78786d6 commit 736af22
Show file tree
Hide file tree
Showing 26 changed files with 411 additions and 327 deletions.
4 changes: 4 additions & 0 deletions api/stats/stats.proto
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ message Stats {
// action. 0 if there are no actions.
double build_latency = 11;

// Whether FATAL log files were found in the log directory when reproxy was
// shutdown by bootstrap.
bool fatal_exit = 12;

reserved 3, 8;
}

Expand Down
1 change: 1 addition & 0 deletions cfg/release/cipd/cipd-linux-csd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ data:
- file: rewrapper
- file: reproxystatus
- file: reclientreport
- file: metricsuploader
1 change: 1 addition & 0 deletions cfg/release/cipd/cipd-linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ data:
- file: remotetool
- file: scandeps_server
- file: scandeps_server.sym
- file: metricsuploader
1 change: 1 addition & 0 deletions cfg/release/cipd/cipd-mac-arm64-csd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ data:
- file: reproxystatus
- file: reclientreport
- file: remotetool
- file: metricsuploader
1 change: 1 addition & 0 deletions cfg/release/cipd/cipd-mac-arm64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ data:
- file: scandeps_server
- file: scandeps_server.sym
- file: remotetool
- file: metricsuploader
1 change: 1 addition & 0 deletions cfg/release/cipd/cipd-mac-csd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ data:
- file: reproxystatus
- file: reclientreport
- file: remotetool
- file: metricsuploader
1 change: 1 addition & 0 deletions cfg/release/cipd/cipd-mac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ data:
- file: remotetool
- file: scandeps_server
- file: scandeps_server.sym
- file: metricsuploader
1 change: 1 addition & 0 deletions cfg/release/cipd/cipd-windows-csd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ data:
- file: rewrapper.exe
- file: reproxystatus.exe
- file: reclientreport.exe
- file: metricsuploader.exe
1 change: 1 addition & 0 deletions cfg/release/cipd/cipd-windows.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ data:
- file: scandeps_server.exe
- file: scandeps_server.pdb
- file: scandeps_server.sym
- file: metricsuploader.exe
3 changes: 1 addition & 2 deletions cmd/bootstrap/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@ go_library(
"//api/log",
"//api/stats",
"//internal/pkg/auth",
"//internal/pkg/bigquery",
"//internal/pkg/bootstrap",
"//internal/pkg/logger",
"//internal/pkg/monitoring",
"//internal/pkg/pathtranslator",
"//internal/pkg/rbeflag",
"//internal/pkg/stats",
Expand All @@ -21,6 +19,7 @@ go_library(
"@com_github_bazelbuild_remote_apis_sdks//go/pkg/command",
"@com_github_bazelbuild_remote_apis_sdks//go/pkg/moreflag",
"@com_github_golang_glog//:go_default_library",
"@org_golang_google_protobuf//proto",
],
)

Expand Down
122 changes: 83 additions & 39 deletions cmd/bootstrap/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,8 @@ import (
lpb "team/foundry-x/re-client/api/log"
spb "team/foundry-x/re-client/api/stats"
"team/foundry-x/re-client/internal/pkg/auth"
"team/foundry-x/re-client/internal/pkg/bigquery"
"team/foundry-x/re-client/internal/pkg/bootstrap"
"team/foundry-x/re-client/internal/pkg/logger"
"team/foundry-x/re-client/internal/pkg/monitoring"
"team/foundry-x/re-client/internal/pkg/pathtranslator"
"team/foundry-x/re-client/internal/pkg/rbeflag"
"team/foundry-x/re-client/internal/pkg/stats"
Expand All @@ -42,6 +40,7 @@ import (
"github.com/bazelbuild/remote-apis-sdks/go/pkg/command"
"github.com/bazelbuild/remote-apis-sdks/go/pkg/moreflag"
log "github.com/golang/glog"
"google.golang.org/protobuf/proto"
)

// bootstrapStart saves the start time of the bootstrap binary.
Expand All @@ -50,7 +49,6 @@ var bootstrapStart = time.Now()

var (
homeDir, _ = os.UserHomeDir()
labels = make(map[string]string)
gcertErrMsg = fmt.Sprintf("\nTry restarting the build after running %q\n", "gcert")
gcloudErrMsg = fmt.Sprintf("\nTry restarting the build after running %q\n", "gcloud auth login")
logDir = os.TempDir()
Expand All @@ -68,9 +66,6 @@ var (
fastLogCollection = flag.Bool("fast_log_collection", false, "Enable optimized log aggregation pipeline. Does not work for multileg builds")
asyncReproxyShutdown = flag.Bool("async_reproxy_termination", false, "Allows reproxy to finish shutdown asyncronously. Only applicable with fast_log_collection=true")
metricsProject = flag.String("metrics_project", "", "If set, action and build metrics are exported to Cloud Monitoring in the specified GCP project")
metricsPrefix = flag.String("metrics_prefix", "", "Prefix of metrics exported to Cloud Monitoring")
metricsNamespace = flag.String("metrics_namespace", "", "Namespace of metrics exported to Cloud Monitoring (e.g. RBE project)")
metricsTable = flag.String("metrics_table", "", "Resource specifier of the BigQuery table to upload the contents of rbe_metrics.pb to. If the project is not provided in the specifier metrics_project will be used.")
outputDir = flag.String("output_dir", os.TempDir(), "The location to which stats should be written.")
useADC = flag.Bool(auth.UseAppDefaultCredsFlag, false, "Indicates whether to use application default credentials for authentication")
useGCE = flag.Bool(auth.UseGCECredsFlag, false, "Indicates whether to use GCE VM credentials for authentication")
Expand All @@ -79,12 +74,12 @@ var (
credFile = flag.String(auth.CredentialFileFlag, "", "The name of a file that contains service account credentials to use when calling remote execution. Used only if --use_application_default_credentials and --use_gce_credentials are false.")
remoteDisabled = flag.Bool("remote_disabled", false, "Whether to disable all remote operations and run all actions locally.")
cacheDir = flag.String("cache_dir", "", "Directory from which to load the cache files at startup and update at shutdown.")
metricsUploader = flag.String("metrics_uploader", defaultMetricsUploader(), "Path to the metrics uploader binary.")
)

func main() {
defer log.Flush()
flag.Var((*moreflag.StringListValue)(&proxyLogDir), "proxy_log_dir", "If provided, the directory path to a proxy log file of executed records.")
flag.Var((*moreflag.StringMapValue)(&labels), "metrics_labels", "Comma-separated key value pairs in the form key=value. This is used to add arbitrary labels to exported metrics.")
rbeflag.Parse()
version.PrintAndExitOnVersionFlag(true)

Expand Down Expand Up @@ -136,45 +131,49 @@ func main() {
From: bootstrapStart,
To: time.Now(),
})
if *metricsProject != "" {
start := time.Now()
var e *monitoring.Exporter
e, err = newExporter(creds)
if err != nil {
log.Warningf("Failed to initialize cloud monitoring: %v", err)
} else {
e.ExportBuildMetrics(context.Background(), s, spi.EventTimes[logger.EventBootstrapShutdown])
defer e.Close()
}
spi.EventTimes[logger.EventPostBuildMetricsUpload] = command.TimeIntervalToProto(&command.TimeInterval{From: start, To: time.Now()})
spi.Metrics[logger.EventPostBuildMetricsUpload] = &lpb.Metric{Value: &lpb.Metric_BoolValue{err == nil}}
}
s.ProxyInfo = append(s.ProxyInfo, spi)
s.FatalExit = fatalLogsExist(logDir)
log.Infof("Writing stats to %v", *outputDir)
if err := stats.WriteStats(s, *outputDir); err != nil {
log.Errorf("WriteStats(%s) failed: %v", *outputDir, err)
} else {
log.Infof("Stats dumped successfully.")
}
if *metricsTable != "" {
inserter, cleanup, err := bigquery.NewInserter(context.Background(), *metricsTable, *metricsProject, creds)
if err != nil {
log.Warningf("Error creating a bigquery client: %v", err)
return
if *metricsProject == "" {
return
}

tempRbeMetricsFilePath, err := createTempRbeMetricsFile(s)
if err != nil {
log.Errorf("Unable to make temp rbe_metrics.pb for upload: %v", err)
return
}

uploaderArgs := []string{"--rbe_metrics_path=" + tempRbeMetricsFilePath}
if cfg := flag.Lookup("cfg"); cfg != nil {
if cfg.Value.String() != "" {
uploaderArgs = append(uploaderArgs, "--cfg="+cfg.Value.String())
}
defer cleanup()
err = inserter.Put(context.Background(), &stats.ProtoSaver{s})
if err != nil {
log.Warningf("Error uploading stats to bigquery: %v", err)
return
}
if ts := creds.TokenSource(); ts != nil {
if t, err := ts.Token(); err == nil {
uploaderArgs = append(uploaderArgs, "--oauth_token="+t.AccessToken)
}
}
log.Infof("Stats uploaded successfully.")

log.V(2).Infof("Running %v %v", *metricsUploader, uploaderArgs)

uploaderCmd := exec.Command(*metricsUploader, uploaderArgs...)
err = uploaderCmd.Start()
if err != nil {
log.Warningf("Failed to start metrics uploader with command line %v %v: %v", *metricsUploader, uploaderArgs, err)
}
log.Infof("Stats uploader started successfully")
log.V(2).Infof("Stats uploader pid: %d", uploaderCmd.Process.Pid)
return
}

monitoring.CleanLogDir(logDir)
cleanFatalLogs(logDir)

args := []string{}
if cfg := flag.Lookup("cfg"); cfg != nil {
Expand All @@ -201,6 +200,58 @@ func main() {
os.Exit(exitCode)
}

var failureFiles = []string{"reproxy.FATAL", "bootstrap.FATAL", "rewrapper.FATAL", "reproxy.exe.FATAL", "bootstrap.exe.FATAL", "rewrapper.exe.FATAL"}

// cleanLogDir removes stray log files which may cause confusion when bootstrap starts
func cleanFatalLogs(logDir string) {
for _, f := range failureFiles {
fp := filepath.Join(logDir, f)
if err := os.Remove(fp); err != nil && !os.IsNotExist(err) {
log.Errorf("Failed to remove %v: %v", fp, err)
}
}
}

// fatalLogsExist returns true if any *.FATAL log file exists in
func fatalLogsExist(logDir string) bool {
for _, f := range failureFiles {
s, err := os.Stat(filepath.Join(logDir, f))
if err != nil {
continue
}
if s.Size() > 0 {
return true
}
}
return false
}

func createTempRbeMetricsFile(s *spb.Stats) (string, error) {
temp, err := os.CreateTemp("", "rbe_metrics_*.pb")
if err != nil {
return "", err
}
defer temp.Close()
blob, err := proto.Marshal(s)
if err != nil {
return "", err
}
_, err = temp.Write(blob)
if err != nil {
return "", err
}
return temp.Name(), nil
}

func defaultMetricsUploader() string {
metricsUploader, err := pathtranslator.BinaryRelToAbs("metricsuploader")
if err != nil {
log.Warningf("Did not find `metricsuploader` binary in the same directory as `bootstrap`: %v", err)
return ""
}
return metricsUploader
}

func shutdownReproxy() (*spb.Stats, error) {
if *asyncReproxyShutdown {
// On shutdown we may not want to wait for deps cache to finish writing
Expand Down Expand Up @@ -238,13 +289,6 @@ func bootstrapReproxy(args []string, startTime time.Time) (string, int) {
return "Proxy started successfully.", 0
}

func newExporter(creds *auth.Credentials) (*monitoring.Exporter, error) {
if err := monitoring.SetupViews(labels); err != nil {
return nil, err
}
return monitoring.NewExporter(context.Background(), *metricsProject, *metricsPrefix, *metricsNamespace, *remoteDisabled, logDir, creds)
}

func credsFilePath() (string, error) {
dir := os.TempDir()
if *cacheDir != "" {
Expand Down
27 changes: 27 additions & 0 deletions cmd/metricsuploader/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")

go_library(
name = "metricsuploader_lib",
srcs = ["main.go"],
importpath = "team/foundry-x/re-client/cmd/metricsuploader",
visibility = ["//visibility:private"],
deps = [
"//api/stats",
"//internal/pkg/bigquery",
"//internal/pkg/monitoring",
"//internal/pkg/rbeflag",
"//internal/pkg/stats",
"//pkg/version",
"@com_github_bazelbuild_remote_apis_sdks//go/pkg/moreflag",
"@com_github_golang_glog//:go_default_library",
"@org_golang_google_grpc//credentials/oauth",
"@org_golang_google_protobuf//proto",
"@org_golang_x_oauth2//:oauth2",
],
)

go_binary(
name = "metricsuploader",
embed = [":metricsuploader_lib"],
visibility = ["//visibility:public"],
)
Loading

0 comments on commit 736af22

Please sign in to comment.