Skip to content

Commit ca5d457

Browse files
robinbryceRobin Bryce
andauthored
Dev/robin/9913 handle ratelimiting gracefully (#28)
* tests: ability to retain pre-images for test local reader context * fix: avoid masking 429 errors (and others) in verifyContext Also add support for detecting 429 errors from the blobs sdk and obtaining the Retry-After header value as a time.Duration AB#9913 * fix: linter issues * fix: review comment finish the comment * fix: explain why a range check is used in the tests for IsRateLimiting --------- Co-authored-by: Robin Bryce <[email protected]>
1 parent 1fa5904 commit ca5d457

File tree

5 files changed

+284
-65
lines changed

5 files changed

+284
-65
lines changed

massifs/bloberrors.go

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
package massifs
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"net/http"
7+
"strconv"
8+
"time"
9+
10+
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
11+
azStorageBlob "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
12+
)
13+
14+
const (
15+
azblobBlobNotFound = "BlobNotFound"
16+
)
17+
18+
func AsStorageError(err error) (azStorageBlob.StorageError, bool) {
19+
serr := &azStorageBlob.StorageError{}
20+
//nolint
21+
ierr, ok := err.(*azStorageBlob.InternalError)
22+
if ierr == nil || !ok {
23+
return azStorageBlob.StorageError{}, false
24+
}
25+
if !ierr.As(&serr) {
26+
return azStorageBlob.StorageError{}, false
27+
}
28+
return *serr, true
29+
}
30+
31+
func AsResponseError(err error) (azcore.ResponseError, bool) {
32+
33+
var ok bool
34+
var rerr *azcore.ResponseError
35+
36+
//nolint
37+
if rerr, ok = err.(*azcore.ResponseError); ok {
38+
return *rerr, true
39+
}
40+
41+
// check for an InternalError that has ResponseError as its cause
42+
rerr = &azcore.ResponseError{}
43+
44+
//nolint
45+
ierr, ok := err.(*azStorageBlob.InternalError)
46+
if ierr == nil || !ok {
47+
return azcore.ResponseError{}, false
48+
}
49+
if !ierr.As(&rerr) {
50+
return azcore.ResponseError{}, false
51+
}
52+
return *rerr, true
53+
}
54+
55+
// WrapBlobNotFound tranlsates the err to ErrBlobNotFound if the actual error is
56+
// the azure sdk blob not found error. In all cases where the original err is
57+
// not the azure BlobNot found, the original err is returned as is. Including
58+
// the case where it is nil
59+
func WrapBlobNotFound(err error) error {
60+
if err == nil {
61+
return nil
62+
}
63+
serr, ok := AsStorageError(err)
64+
if !ok {
65+
return err
66+
}
67+
if serr.ErrorCode != azblobBlobNotFound {
68+
return err
69+
}
70+
return fmt.Errorf("%s: %w", err.Error(), ErrBlobNotFound)
71+
}
72+
73+
func IsBlobNotFound(err error) bool {
74+
if err == nil {
75+
return false
76+
}
77+
if errors.Is(err, ErrBlobNotFound) {
78+
return true
79+
}
80+
serr, ok := AsStorageError(err)
81+
if !ok {
82+
return false
83+
}
84+
if serr.ErrorCode != azblobBlobNotFound {
85+
return false
86+
}
87+
return true
88+
}
89+
90+
// IsRateLimiting detects if the error is HTTP Status 429 Too Many Requests
91+
// The recomended wait time is returned if it is available. If the returned wait
92+
// time is zero, the caller should apply an appropriate default backoff.
93+
func IsRateLimiting(err error) (time.Duration, bool) {
94+
if err == nil {
95+
return 0, false
96+
}
97+
rerr, ok := AsResponseError(err)
98+
if !ok {
99+
return 0, false
100+
}
101+
if rerr.StatusCode != http.StatusTooManyRequests {
102+
return 0, false
103+
}
104+
105+
// It is a 429, check if there is a Retry-After header and return the indicated time if possible.
106+
107+
// Retry-After header is optional, if it is not present, the caller should still see it as a 429
108+
if rerr.RawResponse == nil {
109+
return 0, true
110+
}
111+
retryAfter := rerr.RawResponse.Header.Get("Retry-After")
112+
if retryAfter == "" {
113+
return 0, true
114+
}
115+
116+
// Try to parse Retry-After as an integer (seconds)
117+
if seconds, err := strconv.Atoi(retryAfter); err == nil {
118+
return time.Duration(seconds) * time.Second, true
119+
}
120+
121+
// Try to parse Retry-After as a date
122+
if retryTime, err := http.ParseTime(retryAfter); err == nil {
123+
retryTime = retryTime.In(time.UTC) // crucial, as Until does not work with different locations
124+
return time.Until(retryTime), true
125+
}
126+
127+
// couldn't parse the time, but it is definitely a 429. the caller should apply an appropriate default backoff.
128+
return 0, true
129+
}

massifs/bloberrors_test.go

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
package massifs
2+
3+
import (
4+
"errors"
5+
"net/http"
6+
"testing"
7+
"time"
8+
9+
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
10+
"github.com/stretchr/testify/assert"
11+
)
12+
13+
func TestIsRateLimiting(t *testing.T) {
14+
tests := []struct {
15+
name string
16+
err error
17+
minWait time.Duration
18+
maxWait time.Duration
19+
expectedWait time.Duration
20+
expectedResult bool
21+
}{
22+
{
23+
name: "429 with Retry-After header as date",
24+
err: &azcore.ResponseError{
25+
StatusCode: http.StatusTooManyRequests,
26+
RawResponse: &http.Response{
27+
Header: http.Header{
28+
"Retry-After": []string{time.Now().Add(5 * time.Minute).UTC().Format(http.TimeFormat)},
29+
},
30+
},
31+
},
32+
minWait: 4*time.Minute + 59*time.Second,
33+
maxWait: 6 * time.Minute, // remove the max wait if this turnes out to be flaky but it should be fine
34+
expectedResult: true,
35+
},
36+
{
37+
name: "nil error",
38+
err: nil,
39+
expectedWait: 0,
40+
expectedResult: false,
41+
},
42+
{
43+
name: "non-response error",
44+
err: errors.New("some error"),
45+
expectedWait: 0,
46+
expectedResult: false,
47+
},
48+
{
49+
name: "non-429 status code",
50+
err: &azcore.ResponseError{
51+
StatusCode: http.StatusInternalServerError,
52+
},
53+
expectedWait: 0,
54+
expectedResult: false,
55+
},
56+
{
57+
name: "429 without Retry-After header",
58+
err: &azcore.ResponseError{
59+
StatusCode: http.StatusTooManyRequests,
60+
RawResponse: &http.Response{
61+
Header: http.Header{},
62+
},
63+
},
64+
expectedWait: 0,
65+
expectedResult: true,
66+
},
67+
{
68+
name: "429 with Retry-After header in seconds",
69+
err: &azcore.ResponseError{
70+
StatusCode: http.StatusTooManyRequests,
71+
RawResponse: &http.Response{
72+
Header: http.Header{
73+
"Retry-After": []string{"10"},
74+
},
75+
},
76+
},
77+
expectedWait: 10 * time.Second,
78+
expectedResult: true,
79+
},
80+
{
81+
name: "429 with invalid Retry-After header",
82+
err: &azcore.ResponseError{
83+
StatusCode: http.StatusTooManyRequests,
84+
RawResponse: &http.Response{
85+
Header: http.Header{
86+
"Retry-After": []string{"invalid"},
87+
},
88+
},
89+
},
90+
expectedWait: 0,
91+
expectedResult: true,
92+
},
93+
}
94+
95+
for _, tt := range tests {
96+
t.Run(tt.name, func(t *testing.T) {
97+
98+
wait, result := IsRateLimiting(tt.err)
99+
// Note: we use a range check here because parsing the Retry-After header
100+
// uses time.Until, which then calls time.Now, in the case where the header is a date.
101+
// this means this test would be flaky. We could mock time.Now but that would be a lot of work
102+
if tt.minWait > 0 {
103+
assert.GreaterOrEqual(t, wait, tt.minWait)
104+
}
105+
if tt.maxWait > 0 {
106+
assert.LessOrEqual(t, wait, tt.maxWait)
107+
}
108+
if tt.expectedWait > 0 || (tt.minWait == 0 && tt.maxWait == 0) {
109+
assert.Equal(t, tt.expectedWait, wait)
110+
}
111+
assert.Equal(t, tt.expectedResult, result)
112+
})
113+
}
114+
}

massifs/blobnotfounderr.go

Lines changed: 0 additions & 60 deletions
This file was deleted.

massifs/massifcontextverified.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,12 @@ func (mc *MassifContext) verifyContext(
161161

162162
msg, state, err := options.sealGetter.GetSignedRoot(ctx, mc.TenantIdentity, mc.Start.MassifIndex)
163163
if err != nil {
164-
return nil, fmt.Errorf(
165-
"%w: failed to get seal for massif %d for tenant %s: %v",
166-
ErrSealNotFound, mc.Start.MassifIndex, mc.TenantIdentity, WrapBlobNotFound(err))
164+
if IsBlobNotFound(err) {
165+
return nil, fmt.Errorf(
166+
"%w: failed to get seal for massif %d for tenant %s: %v",
167+
ErrSealNotFound, mc.Start.MassifIndex, mc.TenantIdentity, WrapBlobNotFound(err))
168+
}
169+
return nil, err
167170
}
168171

169172
state.Root, err = mmr.GetRoot(state.MMRSize, mc, sha256.New())

massifs/testlocalreadercontext.go

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,42 @@ type TestLocalReaderContext struct {
2424
AzuriteReader MassifReader
2525
}
2626

27+
// TestLogCreatorContext holds the context data resulting from a call to CreateLog
28+
// Unless one or more of the TEstCreateLogOptions are used, the context will not have anything interesting in it.
29+
type TestLogCreatorContext struct {
30+
Preimages map[uint64][]byte
31+
}
32+
33+
type TestCreateLogOption func(*TestLogCreatorContext)
34+
35+
func TestWithCreateLogPreImages() TestCreateLogOption {
36+
return func(c *TestLogCreatorContext) {
37+
c.Preimages = make(map[uint64][]byte)
38+
}
39+
}
40+
2741
// CreateLog creates a log with the given tenant identity, massif height, and mmr size,
2842
// any previous seal or massif blobs for the same tenant are first deleted
29-
func (c *TestLocalReaderContext) CreateLog(tenantIdentity string, massifHeight uint8, massifCount uint32) {
43+
func (c *TestLocalReaderContext) CreateLog(
44+
tenantIdentity string, massifHeight uint8, massifCount uint32,
45+
opts ...TestCreateLogOption) {
46+
47+
logContext := &TestLogCreatorContext{}
48+
for _, opt := range opts {
49+
opt(logContext)
50+
}
51+
52+
generator := MMRTestingGenerateNumberedLeaf
53+
54+
// If the caller needs to work with the pre-images we wrap the generator to retain them
55+
if logContext.Preimages != nil {
56+
generator = func(tenantIdentity string, base, i uint64) mmrtesting.AddLeafArgs {
57+
58+
args := generator(tenantIdentity, base, i)
59+
logContext.Preimages[base+i] = args.Value
60+
return args
61+
}
62+
}
3063

3164
// clear out any previous log
3265
c.AzuriteContext.DeleteBlobsByPrefix(TenantMassifPrefix(tenantIdentity))
@@ -35,7 +68,7 @@ func (c *TestLocalReaderContext) CreateLog(tenantIdentity string, massifHeight u
3568
CommitmentEpoch: 1,
3669
MassifHeight: massifHeight,
3770
SealOnCommit: true, // create seals for each massif as we go
38-
}, c.AzuriteContext, c.G, MMRTestingGenerateNumberedLeaf)
71+
}, c.AzuriteContext, c.G, generator)
3972
require.NoError(c.AzuriteContext.T, err)
4073

4174
leavesPerMassif := mmr.HeightIndexLeafCount(uint64(massifHeight) - 1)

0 commit comments

Comments
 (0)