Skip to content

Commit 3232a1b

Browse files
committed
trace_agent_integration_tests: tentatively fix flakiness
The `trace-agent` integration tests have been repeatedly failing on timeouts, for instance: - https://gitlab.ddbuild.io/DataDog/datadog-agent/-/jobs/1261246921 - https://gitlab.ddbuild.io/DataDog/datadog-agent/-/jobs/1261237699 - https://gitlab.ddbuild.io/DataDog/datadog-agent/-/jobs/1261226844 There are at least 2 culprits: 1. rebuilding the `trace-agent` binary from scratch for every test run. When running with the race detector (`-race` flag), each build takes time, 2. module cache contention when building, see #43207 for why. This change implements the TODO at agent.go:94 by: - ceating a temporary directory in `TestMain` (`BuildCachedBinaries`), - building `trace-agent` and `secret-script` binaries once in that directory, with `GOPRIVATE=*` and `GOPROXY=off` to prevent concurrent module cache access, - creating symlinks to the cached binaries in per-test subdirectories, - cleaning up the entire temp directory tree after all tests complete. This is expected to reduce test execution time, especially with race detection enabled, and hopefully preventtimeouts in `TestTraces`.
1 parent 4ba41f5 commit 3232a1b

File tree

2 files changed

+62
-16
lines changed

2 files changed

+62
-16
lines changed

cmd/trace-agent/test/agent.go

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ var ErrNotInstalled = errors.New("agent: trace-agent not found in $PATH")
4545
// SecretBackendBinary secret binary name
4646
var SecretBackendBinary = "secret-script.test"
4747

48+
var tmpDir string
49+
4850
type grpcServer struct {
4951
pb.UnimplementedAgentSecureServer
5052
}
@@ -63,41 +65,74 @@ type agentRunner struct {
6365
authToken string
6466
}
6567

66-
func newAgentRunner(ddAddr string, verbose bool, buildSecretBackend bool) (*agentRunner, error) {
67-
bindir, err := os.MkdirTemp("", "trace-agent-integration-tests")
68+
// BuildCachedBinaries builds the trace-agent and secret-script binaries once and caches them.
69+
// Returns a cleanup function that should be called to remove the temporary directory.
70+
// This should be called once from TestMain before running tests.
71+
func BuildCachedBinaries(verbose bool) (func(), error) {
72+
var err error
73+
tmpDir, err = os.MkdirTemp("", "trace-agent-integration-tests")
6874
if err != nil {
6975
return nil, err
7076
}
71-
binpath := filepath.Join(bindir, "trace-agent")
77+
cleanup := func() {
78+
_ = os.RemoveAll(tmpDir)
79+
}
80+
binpath := filepath.Join(tmpDir, "trace-agent")
7281
if verbose {
7382
log.Printf("agent: installing in %s...", binpath)
7483
}
75-
// TODO(gbbr): find a way to re-use the same binary within a whole run
76-
// instead of creating new ones on each test creating a new runner.
7784
o, err := exec.Command("go", "build", "-tags", "otlp", "-o", binpath, "github.com/DataDog/datadog-agent/cmd/trace-agent").CombinedOutput()
7885
if err != nil {
7986
if verbose {
8087
log.Printf("error installing trace-agent: %v", err)
8188
log.Print(string(o))
8289
}
90+
cleanup()
8391
return nil, ErrNotInstalled
8492
}
8593

86-
if buildSecretBackend {
87-
binSecrets := filepath.Join(bindir, SecretBackendBinary)
88-
o, err := exec.Command("go", "build", "-o", binSecrets, "./testdata/secretscript.go").CombinedOutput()
94+
binSecrets := filepath.Join(tmpDir, SecretBackendBinary)
95+
o, err = exec.Command("go", "build", "-o", binSecrets, "./testdata/secretscript.go").CombinedOutput()
96+
if err != nil {
97+
if verbose {
98+
log.Printf("error installing secret-script: %v", err)
99+
log.Print(string(o))
100+
}
101+
cleanup()
102+
return nil, ErrNotInstalled
103+
}
89104

90-
if err != nil {
91-
if verbose {
92-
log.Printf("error installing secret-script: %v", err)
93-
log.Print(string(o))
94-
}
95-
return nil, ErrNotInstalled
105+
if err := os.Chmod(binSecrets, 0700); err != nil {
106+
if verbose {
107+
log.Printf("error changing permissions secret-script: %v", err)
108+
}
109+
cleanup()
110+
return nil, ErrNotInstalled
111+
}
112+
return cleanup, nil
113+
}
114+
115+
func newAgentRunner(ddAddr string, verbose bool, buildSecretBackend bool) (*agentRunner, error) {
116+
bindir, err := os.MkdirTemp(tmpDir, "runner-")
117+
if err != nil {
118+
return nil, err
119+
}
120+
binpath := filepath.Join(bindir, "trace-agent")
121+
if verbose {
122+
log.Printf("agent: installing in %s...", binpath)
123+
}
124+
if err := os.Symlink(filepath.Join(tmpDir, "trace-agent"), binpath); err != nil {
125+
if verbose {
126+
log.Printf("error installing trace-agent: %v", err)
96127
}
128+
return nil, ErrNotInstalled
129+
}
97130

98-
if err := os.Chmod(binSecrets, 0700); err != nil {
131+
if buildSecretBackend {
132+
binSecrets := filepath.Join(bindir, SecretBackendBinary)
133+
if err := os.Symlink(filepath.Join(tmpDir, SecretBackendBinary), binSecrets); err != nil {
99134
if verbose {
100-
log.Printf("error changing permissions secret-script: %v", err)
135+
log.Printf("error installing secret-script: %v", err)
101136
}
102137
return nil, ErrNotInstalled
103138
}

cmd/trace-agent/test/testsuite/hostname_test.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package testsuite
77

88
import (
9+
"flag"
910
"log"
1011
"os"
1112
"testing"
@@ -21,6 +22,16 @@ func TestMain(m *testing.M) {
2122
log.Println("--- SKIP: to run tests in this package, set the INTEGRATION environment variable")
2223
os.Exit(0)
2324
}
25+
// Set environment variables to prevent go build commands from accessing
26+
// the module cache concurrently, which can cause timeouts.
27+
os.Setenv("GOPRIVATE", "*")
28+
os.Setenv("GOPROXY", "off")
29+
flag.Parse()
30+
cleanup, err := test.BuildCachedBinaries(testing.Verbose())
31+
if err != nil {
32+
log.Fatalf("failed to build cached binaries: %v", err)
33+
}
34+
defer cleanup()
2435
os.Exit(m.Run())
2536
}
2637

0 commit comments

Comments
 (0)