Skip to content

Commit 9c8ae03

Browse files
DSET-4359: Fix flaky tests (#46)
* DSET-4359: Fix flaky tests * Fix failing tests * Increase duration of running tests * Try to increase timeout for stress tests. * Increase timeout and decrease lifetime * Another attempt to fix flaky tests * Include max elapsed time into more places * Increase timeout for test with large event * Increase version to 0.12.0
1 parent aca8af3 commit 9c8ae03

File tree

10 files changed

+125
-63
lines changed

10 files changed

+125
-63
lines changed

Makefile

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,17 @@ test-many-times:
6363
else \
6464
COUNT=$(COUNT); \
6565
fi; \
66+
prefix=out-test-many-times-; \
67+
rm -rfv $${prefix}*; \
6668
for i in `seq 1 $${COUNT}`; do \
67-
echo "Running test $${i} / $${COUNT}"; \
68-
rm -rfv out-test-$${i}.log; \
69-
make test 2>&1 | tee out-test-$${i}.log; \
70-
echo; \
71-
grep -H FAIL out-test-$${i}.log; \
69+
echo "Running test $${i} / $${COUNT} - BEGIN"; \
70+
make test 2>&1 | tee $${prefix}-$${i}.log | awk '{print "'$${i}'/'$${COUNT}'", $$0; }' ; \
7271
echo; \
72+
grep -H FAIL $${prefix}-$${i}.log; \
73+
echo "Running test $${i} / $${COUNT} - END"; \
7374
done; \
7475
echo "Grep for FAIL - no lines should be found"; \
75-
! grep -H FAIL out-test-*.log;
76+
! grep -H FAIL $${prefix}-*.log;
7677

7778
foo:
7879
! false

RELEASE_NOTES.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Release Notes
22

3+
## 0.12.0
4+
5+
* fix: make client shutdown timeout configurable.
6+
37
## 0.0.11
48

59
* feat: make client shutdown timeout configurable.

pkg/buffer_config/buffer_config.go

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,13 @@ import (
2626
const (
2727
ShouldSentBufferSize = 5 * 1024 * 1024
2828
// LimitBufferSize defines maximum payload size (before compression) for REST API
29-
LimitBufferSize = 5*1024*1024 + 960*1024
30-
MinimalMaxElapsedTime = time.Second
31-
MinimalMaxInterval = time.Second
32-
MinimalInitialInterval = 50 * time.Millisecond
33-
MinimalMultiplier = 0.0
34-
MinimalRandomizationFactor = 0.0
29+
LimitBufferSize = 5*1024*1024 + 960*1024
30+
MinimalMaxElapsedTime = time.Second
31+
MinimalMaxInterval = time.Second
32+
MinimalInitialInterval = 50 * time.Millisecond
33+
MinimalMultiplier = 0.0
34+
MinimalRandomizationFactor = 0.0
35+
MinimalRetryShutdownTimeout = 2 * MinimalMaxElapsedTime
3536
)
3637

3738
type DataSetBufferSettings struct {
@@ -147,7 +148,7 @@ func (cfg *DataSetBufferSettings) WithOptions(opts ...DataSetBufferSettingsOptio
147148

148149
func (cfg *DataSetBufferSettings) String() string {
149150
return fmt.Sprintf(
150-
"MaxLifetime: %s, MaxSize: %d, GroupBy: %s, RetryRandomizationFactor: %f, RetryMultiplier: %f, RetryInitialInterval: %s, RetryMaxInterval: %s, RetryMaxElapsedTime: %s",
151+
"MaxLifetime: %s, MaxSize: %d, GroupBy: %s, RetryRandomizationFactor: %f, RetryMultiplier: %f, RetryInitialInterval: %s, RetryMaxInterval: %s, RetryMaxElapsedTime: %s, RetryShutdownTimeout: %s",
151152
cfg.MaxLifetime,
152153
cfg.MaxSize,
153154
cfg.GroupBy,
@@ -156,6 +157,7 @@ func (cfg *DataSetBufferSettings) String() string {
156157
cfg.RetryInitialInterval,
157158
cfg.RetryMaxInterval,
158159
cfg.RetryMaxElapsedTime,
160+
cfg.RetryShutdownTimeout,
159161
)
160162
}
161163

@@ -202,11 +204,19 @@ func (cfg *DataSetBufferSettings) Validate() error {
202204

203205
if cfg.RetryRandomizationFactor <= MinimalRandomizationFactor {
204206
return fmt.Errorf(
205-
"RetryRandomizationFactor has value %f which is less or equal than %f",
207+
"RetryRandomizationFactor has value %f which is less or equal than %f",
206208
cfg.RetryRandomizationFactor,
207209
MinimalRandomizationFactor,
208210
)
209211
}
210212

213+
if cfg.RetryShutdownTimeout < MinimalRetryShutdownTimeout {
214+
return fmt.Errorf(
215+
"RetryShutdownTimeout has value %s which is less than %s",
216+
cfg.RetryShutdownTimeout,
217+
MinimalRetryShutdownTimeout,
218+
)
219+
}
220+
211221
return nil
212222
}

pkg/buffer_config/buffer_config_test.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ func TestConfigWithOptions(t *testing.T) {
3030
WithRetryInitialInterval(8*time.Second),
3131
WithRetryMaxInterval(30*time.Second),
3232
WithRetryMaxElapsedTime(10*time.Minute),
33+
WithRetryShutdownTimeout(2*time.Minute),
3334
)
3435

3536
assert.Nil(t, errB)
@@ -41,6 +42,7 @@ func TestConfigWithOptions(t *testing.T) {
4142
RetryInitialInterval: 8 * time.Second,
4243
RetryMaxInterval: 30 * time.Second,
4344
RetryMaxElapsedTime: 10 * time.Minute,
45+
RetryShutdownTimeout: 2 * time.Minute,
4446
}, *bufCfg)
4547
}
4648

@@ -52,6 +54,7 @@ func TestDataConfigUpdate(t *testing.T) {
5254
WithRetryInitialInterval(8*time.Second),
5355
WithRetryMaxInterval(30*time.Second),
5456
WithRetryMaxElapsedTime(10*time.Minute),
57+
WithRetryShutdownTimeout(2*time.Minute),
5558
)
5659
assert.Nil(t, errB)
5760

@@ -62,6 +65,7 @@ func TestDataConfigUpdate(t *testing.T) {
6265
RetryInitialInterval: 8 * time.Second,
6366
RetryMaxInterval: 30 * time.Second,
6467
RetryMaxElapsedTime: 10 * time.Minute,
68+
RetryShutdownTimeout: 2 * time.Minute,
6569
}, *bufCfg)
6670

6771
bufCfg2, err := bufCfg.WithOptions(
@@ -71,6 +75,7 @@ func TestDataConfigUpdate(t *testing.T) {
7175
WithRetryInitialInterval(28*time.Second),
7276
WithRetryMaxInterval(230*time.Second),
7377
WithRetryMaxElapsedTime(210*time.Minute),
78+
WithRetryShutdownTimeout(5*time.Minute),
7479
)
7580
assert.Nil(t, err)
7681

@@ -82,6 +87,7 @@ func TestDataConfigUpdate(t *testing.T) {
8287
RetryInitialInterval: 8 * time.Second,
8388
RetryMaxInterval: 30 * time.Second,
8489
RetryMaxElapsedTime: 10 * time.Minute,
90+
RetryShutdownTimeout: 2 * time.Minute,
8591
}, *bufCfg)
8692

8793
// new config is changed
@@ -92,12 +98,13 @@ func TestDataConfigUpdate(t *testing.T) {
9298
RetryInitialInterval: 28 * time.Second,
9399
RetryMaxInterval: 230 * time.Second,
94100
RetryMaxElapsedTime: 210 * time.Minute,
101+
RetryShutdownTimeout: 5 * time.Minute,
95102
}, *bufCfg2)
96103
}
97104

98105
func TestDataConfigNewDefaultToString(t *testing.T) {
99106
cfg := NewDefaultDataSetBufferSettings()
100-
assert.Equal(t, "MaxLifetime: 5s, MaxSize: 6225920, GroupBy: [], RetryRandomizationFactor: 0.500000, RetryMultiplier: 1.500000, RetryInitialInterval: 5s, RetryMaxInterval: 30s, RetryMaxElapsedTime: 5m0s", cfg.String())
107+
assert.Equal(t, "MaxLifetime: 5s, MaxSize: 6225920, GroupBy: [], RetryRandomizationFactor: 0.500000, RetryMultiplier: 1.500000, RetryInitialInterval: 5s, RetryMaxInterval: 30s, RetryMaxElapsedTime: 5m0s, RetryShutdownTimeout: 30s", cfg.String())
101108
}
102109

103110
func TestDataConfigNewDefaultIsValid(t *testing.T) {

pkg/client/add_events.go

Lines changed: 68 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -217,28 +217,38 @@ func (client *DataSetClient) isProcessingEvents() bool {
217217
// - tries (with 2nd half of shutdownMaxTimeout period) to send processed events (buffers) into DataSet
218218
func (client *DataSetClient) Shutdown() error {
219219
client.Logger.Info("Shutting down - BEGIN")
220+
// start measuring processing time
221+
processingStart := time.Now()
222+
220223
// mark as finished to prevent processing of further events
221224
client.finished.Store(true)
222225

223226
// log statistics when finish was called
224227
client.logStatistics()
225228

229+
retryShutdownTimeout := client.Config.BufferSettings.RetryShutdownTimeout
230+
maxElapsedTime := retryShutdownTimeout/2 - 100*time.Millisecond
231+
client.Logger.Info(
232+
"Shutting down - waiting for events",
233+
zap.Duration("maxElapsedTime", maxElapsedTime),
234+
zap.Duration("retryShutdownTimeout", retryShutdownTimeout),
235+
zap.Duration("elapsedTime", time.Since(processingStart)),
236+
)
237+
226238
var lastError error = nil
227-
shutdownTimeout := minDuration(client.Config.BufferSettings.RetryMaxElapsedTime, client.Config.BufferSettings.RetryShutdownTimeout)
228239
expBackoff := backoff.ExponentialBackOff{
229240
InitialInterval: client.Config.BufferSettings.RetryInitialInterval,
230241
RandomizationFactor: client.Config.BufferSettings.RetryRandomizationFactor,
231242
Multiplier: client.Config.BufferSettings.RetryMultiplier,
232243
MaxInterval: client.Config.BufferSettings.RetryMaxInterval,
233-
MaxElapsedTime: shutdownTimeout / 2,
244+
MaxElapsedTime: maxElapsedTime,
234245
Stop: backoff.Stop,
235246
Clock: backoff.SystemClock,
236247
}
237248
expBackoff.Reset()
238249

239250
// try (with timeout) to process (add into buffers) events,
240251
retryNum := 0
241-
processingStart := time.Now()
242252
for client.isProcessingEvents() {
243253
// log statistics
244254
client.logStatistics()
@@ -250,38 +260,40 @@ func (client *DataSetClient) Shutdown() error {
250260
zap.Duration("backoffDelay", backoffDelay),
251261
zap.Uint64("eventsEnqueued", client.eventsEnqueued.Load()),
252262
zap.Uint64("eventsProcessed", client.eventsProcessed.Load()),
263+
zap.Duration("elapsedTime", time.Since(processingStart)),
264+
zap.Duration("maxElapsedTime", maxElapsedTime),
253265
)
254266
if backoffDelay == expBackoff.Stop {
255-
lastError = fmt.Errorf(
256-
"not all events have been processed - %d",
257-
client.eventsEnqueued.Load()-client.eventsProcessed.Load(),
258-
)
259-
client.Logger.Error(
260-
"Shutting down - not all events have been processed",
261-
zap.Int("retryNum", retryNum),
262-
zap.Duration("backoffDelay", backoffDelay),
263-
zap.Uint64("eventsEnqueued", client.eventsEnqueued.Load()),
264-
zap.Uint64("eventsProcessed", client.eventsProcessed.Load()),
265-
)
266267
break
267268
}
268269
time.Sleep(backoffDelay)
269270
retryNum++
270271
}
271272

272273
// send all buffers
273-
client.Logger.Info("Shutting down - publishing all buffers")
274+
client.Logger.Info(
275+
"Shutting down - publishing all buffers",
276+
zap.Duration("retryShutdownTimeout", retryShutdownTimeout),
277+
zap.Duration("elapsedTime", time.Since(processingStart)),
278+
)
274279
client.publishAllBuffers()
275280

276281
// reinitialize expBackoff with MaxElapsedTime based on actually elapsed time of processing (previous phase)
277282
processingElapsed := time.Since(processingStart)
278-
remainingShutdownTimeout := maxDuration(shutdownTimeout-processingElapsed, shutdownTimeout/2)
283+
maxElapsedTime = maxDuration(retryShutdownTimeout-processingElapsed, retryShutdownTimeout/2)
284+
client.Logger.Info(
285+
"Shutting down - waiting for buffers",
286+
zap.Duration("maxElapsedTime", maxElapsedTime),
287+
zap.Duration("retryShutdownTimeout", retryShutdownTimeout),
288+
zap.Duration("elapsedTime", time.Since(processingStart)),
289+
)
290+
279291
expBackoff = backoff.ExponentialBackOff{
280292
InitialInterval: client.Config.BufferSettings.RetryInitialInterval,
281293
RandomizationFactor: client.Config.BufferSettings.RetryRandomizationFactor,
282294
Multiplier: client.Config.BufferSettings.RetryMultiplier,
283295
MaxInterval: client.Config.BufferSettings.RetryMaxInterval,
284-
MaxElapsedTime: remainingShutdownTimeout,
296+
MaxElapsedTime: maxElapsedTime,
285297
Stop: backoff.Stop,
286298
Clock: backoff.SystemClock,
287299
}
@@ -301,25 +313,43 @@ func (client *DataSetClient) Shutdown() error {
301313
zap.Uint64("buffersEnqueued", client.buffersEnqueued.Load()),
302314
zap.Uint64("buffersProcessed", client.buffersProcessed.Load()),
303315
zap.Uint64("buffersDropped", client.buffersDropped.Load()),
316+
zap.Duration("elapsedTime", time.Since(processingStart)),
317+
zap.Duration("maxElapsedTime", maxElapsedTime),
304318
)
305319
if backoffDelay == expBackoff.Stop {
306-
lastError = fmt.Errorf(
307-
"not all buffers have been processed - %d",
308-
client.buffersEnqueued.Load()-client.buffersProcessed.Load()-client.buffersDropped.Load(),
309-
)
310-
client.Logger.Error(
311-
"Shutting down - not all buffers have been processed",
312-
zap.Int("retryNum", retryNum),
313-
zap.Uint64("buffersEnqueued", client.buffersEnqueued.Load()),
314-
zap.Uint64("buffersProcessed", client.buffersProcessed.Load()),
315-
zap.Uint64("buffersDropped", client.buffersDropped.Load()),
316-
)
317320
break
318321
}
319322
time.Sleep(backoffDelay)
320323
retryNum++
321324
}
322325

326+
// construct error messages
327+
if client.isProcessingEvents() {
328+
lastError = fmt.Errorf(
329+
"not all events have been processed - %d",
330+
client.eventsEnqueued.Load()-client.eventsProcessed.Load(),
331+
)
332+
client.Logger.Error(
333+
"Shutting down - not all events have been processed",
334+
zap.Uint64("eventsEnqueued", client.eventsEnqueued.Load()),
335+
zap.Uint64("eventsProcessed", client.eventsProcessed.Load()),
336+
)
337+
}
338+
339+
if client.isProcessingBuffers() {
340+
lastError = fmt.Errorf(
341+
"not all buffers have been processed - %d",
342+
client.buffersEnqueued.Load()-client.buffersProcessed.Load()-client.buffersDropped.Load(),
343+
)
344+
client.Logger.Error(
345+
"Shutting down - not all buffers have been processed",
346+
zap.Int("retryNum", retryNum),
347+
zap.Uint64("buffersEnqueued", client.buffersEnqueued.Load()),
348+
zap.Uint64("buffersProcessed", client.buffersProcessed.Load()),
349+
zap.Uint64("buffersDropped", client.buffersDropped.Load()),
350+
)
351+
}
352+
323353
buffersDropped := client.buffersDropped.Load() - initialDropped
324354
if buffersDropped > 0 {
325355
lastError = fmt.Errorf(
@@ -336,9 +366,17 @@ func (client *DataSetClient) Shutdown() error {
336366
client.logStatistics()
337367

338368
if lastError == nil {
339-
client.Logger.Info("Shutting down - success")
369+
client.Logger.Info(
370+
"Shutting down - success",
371+
zap.Duration("retryShutdownTimeout", retryShutdownTimeout),
372+
zap.Duration("elapsedTime", time.Since(processingStart)),
373+
)
340374
} else {
341-
client.Logger.Error("Shutting down - error", zap.Error(lastError))
375+
client.Logger.Error(
376+
"Shutting down - error", zap.Error(lastError),
377+
zap.Duration("retryShutdownTimeout", retryShutdownTimeout),
378+
zap.Duration("elapsedTime", time.Since(processingStart)),
379+
)
342380
if client.LastError() == nil {
343381
return lastError
344382
}
@@ -475,13 +513,6 @@ func truncateText(text string, length int) string {
475513
return text
476514
}
477515

478-
func minDuration(a, b time.Duration) time.Duration {
479-
if a <= b {
480-
return a
481-
}
482-
return b
483-
}
484-
485516
func maxDuration(a, b time.Duration) time.Duration {
486517
if a >= b {
487518
return a

pkg/client/add_events_long_running_test.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ func TestAddEventsManyLogsShouldSucceed(t *testing.T) {
7676
}
7777

7878
lastCall.Store(time.Now().UnixNano())
79-
time.Sleep(time.Duration(float64(MaxDelay) * 0.7))
79+
time.Sleep(time.Duration(float64(MaxDelay) * 0.6))
8080
payload, err := json.Marshal(map[string]interface{}{
8181
"status": "success",
8282
"bytesCharged": 42,
@@ -92,12 +92,13 @@ func TestAddEventsManyLogsShouldSucceed(t *testing.T) {
9292
Tokens: config.DataSetTokens{WriteLog: "AAAA"},
9393
BufferSettings: buffer_config.DataSetBufferSettings{
9494
MaxSize: 1000,
95-
MaxLifetime: MaxDelay,
95+
MaxLifetime: 5 * MaxDelay,
9696
RetryRandomizationFactor: 1.0,
9797
RetryMultiplier: 1.0,
9898
RetryInitialInterval: RetryBase,
9999
RetryMaxInterval: RetryBase,
100100
RetryMaxElapsedTime: 10 * RetryBase,
101+
RetryShutdownTimeout: 50 * RetryBase,
101102
},
102103
ServerHostSettings: server_host_config.NewDefaultDataSetServerHostSettings(),
103104
}
@@ -146,16 +147,16 @@ func TestAddEventsManyLogsShouldSucceed(t *testing.T) {
146147
time.Sleep(time.Duration(float64(MaxDelay) * 0.3))
147148
}
148149

149-
err = sc.Shutdown()
150-
assert.Nil(t, err, err)
151-
152150
for {
153151
if time.Now().UnixNano()-lastCall.Load() > 5*time.Second.Nanoseconds() {
154152
break
155153
}
156154
time.Sleep(time.Second)
157155
}
158156

157+
err = sc.Shutdown()
158+
assert.Nil(t, err, err)
159+
159160
assert.Equal(t, seenKeys, expectedKeys)
160161
assert.Equal(t, processedEvents.Load(), ExpectedLogs, "processed items")
161162
assert.Equal(t, uint64(len(seenKeys)), ExpectedLogs, "unique items")

0 commit comments

Comments
 (0)