Skip to content

Commit 897c9ef

Browse files
committed
trace_agent_integration_tests: tentatively fix flakiness
The `trace-agent` integration tests have been repeatedly failing on timeouts, for instance: - https://gitlab.ddbuild.io/DataDog/datadog-agent/-/jobs/1261246921 - https://gitlab.ddbuild.io/DataDog/datadog-agent/-/jobs/1261237699 - https://gitlab.ddbuild.io/DataDog/datadog-agent/-/jobs/1261226844 There are at least 2 culprits: 1. rebuilding the `trace-agent` binary from scratch for every test run. When running with the race detector (`-race` flag), each build takes time, 2. module cache contention when building, see #43207 for why. This change implements the TODO at agent.go:94 by: - ceating a temporary directory in `TestMain` (`BuildCachedBinaries`), - building `trace-agent` and `secret-script` binaries once in that directory, with `GOPRIVATE=*` and `GOPROXY=off` to prevent concurrent module cache access, - creating symlinks to the cached binaries in per-test subdirectories, - cleaning up the entire temp directory tree after all tests complete. This is expected to reduce test execution time, especially with race detection enabled, and hopefully preventtimeouts in `TestTraces`.
1 parent 60eda69 commit 897c9ef

File tree

2 files changed

+71
-17
lines changed

2 files changed

+71
-17
lines changed

cmd/trace-agent/test/agent.go

Lines changed: 70 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ var ErrNotInstalled = errors.New("agent: trace-agent not found in $PATH")
4545
// SecretBackendBinary secret binary name
4646
var SecretBackendBinary = "secret-script.test"
4747

48+
var (
49+
tmpDir string
50+
buildOnce sync.Once
51+
)
52+
4853
type grpcServer struct {
4954
pb.UnimplementedAgentSecureServer
5055
}
@@ -63,41 +68,89 @@ type agentRunner struct {
6368
authToken string
6469
}
6570

71+
// CleanupCachedBinaries removes the temporary directory created for cached binaries.
72+
func CleanupCachedBinaries() {
73+
if tmpDir != "" {
74+
_, tmpDir = os.RemoveAll(tmpDir), ""
75+
}
76+
}
77+
78+
func buildBinaries(verbose bool) error {
79+
var err error
80+
tmpDir, err = os.MkdirTemp("", "trace-agent-integration-tests")
81+
if err != nil {
82+
return err
83+
}
84+
binpath := filepath.Join(tmpDir, "trace-agent")
85+
if verbose {
86+
log.Printf("agent: installing in %s...", binpath)
87+
}
88+
// Set environment variables to prevent go build commands from accessing
89+
// the module cache concurrently, which can cause timeouts.
90+
env := append(os.Environ(),
91+
"GOPRIVATE=*",
92+
"GOPROXY=off",
93+
)
94+
cmd := exec.Command("go", "build", "-tags", "otlp", "-o", binpath, "github.com/DataDog/datadog-agent/cmd/trace-agent")
95+
cmd.Env = env
96+
o, err := cmd.CombinedOutput()
97+
if err != nil {
98+
if verbose {
99+
log.Printf("error installing trace-agent: %v", err)
100+
log.Print(string(o))
101+
}
102+
return ErrNotInstalled
103+
}
104+
105+
binSecrets := filepath.Join(tmpDir, SecretBackendBinary)
106+
cmd = exec.Command("go", "build", "-o", binSecrets, "./testdata/secretscript.go")
107+
cmd.Env = env
108+
o, err = cmd.CombinedOutput()
109+
if err != nil {
110+
if verbose {
111+
log.Printf("error installing secret-script: %v", err)
112+
log.Print(string(o))
113+
}
114+
return ErrNotInstalled
115+
}
116+
117+
if err := os.Chmod(binSecrets, 0700); err != nil {
118+
if verbose {
119+
log.Printf("error changing permissions secret-script: %v", err)
120+
}
121+
return ErrNotInstalled
122+
}
123+
return nil
124+
}
125+
66126
func newAgentRunner(ddAddr string, verbose bool, buildSecretBackend bool) (*agentRunner, error) {
67-
bindir, err := os.MkdirTemp("", "trace-agent-integration-tests")
127+
var err error
128+
buildOnce.Do(func() {
129+
err = buildBinaries(verbose)
130+
})
131+
if err != nil {
132+
return nil, err
133+
}
134+
bindir, err := os.MkdirTemp(tmpDir, "runner-")
68135
if err != nil {
69136
return nil, err
70137
}
71138
binpath := filepath.Join(bindir, "trace-agent")
72139
if verbose {
73140
log.Printf("agent: installing in %s...", binpath)
74141
}
75-
// TODO(gbbr): find a way to re-use the same binary within a whole run
76-
// instead of creating new ones on each test creating a new runner.
77-
o, err := exec.Command("go", "build", "-tags", "otlp", "-o", binpath, "github.com/DataDog/datadog-agent/cmd/trace-agent").CombinedOutput()
78-
if err != nil {
142+
if err := os.Symlink(filepath.Join(tmpDir, "trace-agent"), binpath); err != nil {
79143
if verbose {
80144
log.Printf("error installing trace-agent: %v", err)
81-
log.Print(string(o))
82145
}
83146
return nil, ErrNotInstalled
84147
}
85148

86149
if buildSecretBackend {
87150
binSecrets := filepath.Join(bindir, SecretBackendBinary)
88-
o, err := exec.Command("go", "build", "-o", binSecrets, "./testdata/secretscript.go").CombinedOutput()
89-
90-
if err != nil {
151+
if err := os.Symlink(filepath.Join(tmpDir, SecretBackendBinary), binSecrets); err != nil {
91152
if verbose {
92153
log.Printf("error installing secret-script: %v", err)
93-
log.Print(string(o))
94-
}
95-
return nil, ErrNotInstalled
96-
}
97-
98-
if err := os.Chmod(binSecrets, 0700); err != nil {
99-
if verbose {
100-
log.Printf("error changing permissions secret-script: %v", err)
101154
}
102155
return nil, ErrNotInstalled
103156
}

cmd/trace-agent/test/testsuite/hostname_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ func TestMain(m *testing.M) {
2121
log.Println("--- SKIP: to run tests in this package, set the INTEGRATION environment variable")
2222
os.Exit(0)
2323
}
24+
defer test.CleanupCachedBinaries()
2425
os.Exit(m.Run())
2526
}
2627

0 commit comments

Comments
 (0)