Skip to content

Commit ba8c9b0

Browse files
authored
Merge pull request #80 from authzed/feat/enhanced-fgam-retry-logic
feat: enhance FGAM retry logic with exponential backoff for concurrent operations
2 parents f822c52 + 9438c6c commit ba8c9b0

31 files changed

+1724
-638
lines changed

docs/index.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ provider "authzed" {
2727
token = var.authzed_api_token
2828
# Uncomment to specify a different API version
2929
# api_version = "25r1"
30+
31+
# Uncomment to enable serialization for high-concurrency scenarios
32+
# fgam_serialization = true
3033
}
3134
```
3235

@@ -46,6 +49,7 @@ To obtain a token, contact your AuthZed account team. They will provide you with
4649
* `endpoint` - (Required) The host address of the AuthZed Cloud API. Default is `https://api.admin.stage.aws.authzed.net`.
4750
* `token` - (Required) The bearer token for authentication with AuthZed.
4851
* `api_version` - (Optional) The version of the API to use. Default is "25r1".
52+
* `fgam_serialization` - (Optional) Enable serialization of operations to prevent conflicts. When enabled, resources within the same permission system will be created/updated sequentially instead of in parallel. Default is `false`.
4953

5054
## Important Notes
5155

internal/client/cloud_client.go

Lines changed: 78 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package client
22

33
import (
44
"bytes"
5+
"context"
56
"encoding/json"
67
"fmt"
78
"io"
@@ -109,87 +110,82 @@ func (c *CloudClient) Do(req *http.Request) (*ResponseWithETag, error) {
109110
}
110111

111112
// UpdateResource updates any resource that implements the Resource interface
112-
func (c *CloudClient) UpdateResource(resource Resource, endpoint string, body any) (Resource, error) {
113+
func (c *CloudClient) UpdateResource(ctx context.Context, resource Resource, endpoint string, body any) (Resource, error) {
113114
// Define a function to get the latest ETag
114115
getLatestETag := func() (string, error) {
115-
getReq, err := c.NewRequest(http.MethodGet, endpoint, nil)
116+
req, err := c.NewRequest(http.MethodGet, endpoint, nil)
116117
if err != nil {
117118
return "", fmt.Errorf("failed to create GET request: %w", err)
118119
}
119120

120-
getResp, err := c.Do(getReq)
121+
resp, err := c.Do(req)
121122
if err != nil {
122123
return "", fmt.Errorf("failed to send GET request: %w", err)
123124
}
124125
defer func() {
125-
if getResp.Response.Body != nil {
126-
_ = getResp.Response.Body.Close()
126+
if resp.Response.Body != nil {
127+
_ = resp.Response.Body.Close()
127128
}
128129
}()
129130

130-
if getResp.Response.StatusCode != http.StatusOK {
131-
return "", fmt.Errorf("failed to get latest ETag, status: %d", getResp.Response.StatusCode)
131+
if resp.Response.StatusCode != http.StatusOK {
132+
return "", NewAPIError(resp)
132133
}
133134

134-
// Extract ETag from response header
135-
return getResp.ETag, nil
135+
return resp.ETag, nil
136136
}
137137

138-
// Try update with current ETag
138+
// Define a function to update with a specific ETag
139139
updateWithETag := func(currentETag string) (*ResponseWithETag, error) {
140-
req, err := c.NewRequest(http.MethodPut, endpoint, body, WithETag(currentETag))
140+
req, err := c.NewRequest(http.MethodPut, endpoint, body)
141141
if err != nil {
142-
return nil, err
142+
return nil, fmt.Errorf("failed to create request: %w", err)
143143
}
144144

145-
return c.Do(req)
146-
}
147-
148-
respWithETag, err := updateWithETag(resource.GetETag())
149-
if err != nil {
150-
return nil, err
151-
}
145+
// Set the If-Match header for optimistic concurrency control
146+
req.Header.Set("If-Match", currentETag)
152147

153-
// Handle retryable errors (412 Precondition Failed, 409 Conflict) by refreshing ETag and retrying
154-
retryWithFreshETag := func(errorContext string) error {
155-
// Close the body of the first response
156-
if respWithETag.Response.Body != nil {
157-
_ = respWithETag.Response.Body.Close()
148+
resp, err := c.Do(req)
149+
if err != nil {
150+
return nil, fmt.Errorf("failed to send request: %w", err)
158151
}
159152

160-
// Get the latest ETag
161-
latestETag, err := getLatestETag()
162-
if err != nil {
163-
return fmt.Errorf("failed to get latest ETag for retry (%s): %w", errorContext, err)
153+
if resp.Response.StatusCode != http.StatusOK {
154+
defer func() {
155+
if resp.Response.Body != nil {
156+
_ = resp.Response.Body.Close()
157+
}
158+
}()
159+
return nil, NewAPIError(resp)
164160
}
165161

166-
// Retry the update with the latest ETag
167-
respWithETag, err = updateWithETag(latestETag)
168-
return err
162+
return resp, nil
169163
}
170164

171-
switch respWithETag.Response.StatusCode {
172-
case http.StatusPreconditionFailed:
173-
if err := retryWithFreshETag("precondition failed"); err != nil {
174-
return nil, err
175-
}
176-
case http.StatusConflict:
177-
if err := retryWithFreshETag("FGAM configuration change"); err != nil {
178-
return nil, err
179-
}
165+
// Use retry logic with exponential backoff
166+
retryConfig := DefaultRetryConfig()
167+
respWithETag, err := retryConfig.RetryWithExponentialBackoffLegacy(
168+
ctx,
169+
func() (*ResponseWithETag, error) {
170+
return updateWithETag(resource.GetETag())
171+
},
172+
getLatestETag,
173+
updateWithETag,
174+
"resource update",
175+
)
176+
if err != nil {
177+
return nil, err
180178
}
181179

182180
defer func() {
183-
// ignore the error
184-
_ = respWithETag.Response.Body.Close()
181+
if respWithETag.Response.Body != nil {
182+
_ = respWithETag.Response.Body.Close()
183+
}
185184
}()
186185

187-
if respWithETag.Response.StatusCode != http.StatusOK {
188-
return nil, NewAPIError(respWithETag)
189-
}
190-
191186
// Update the resource's ETag
192187
resource.SetETag(respWithETag.ETag)
188+
193189
return resource, nil
194190
}
195191

@@ -354,16 +350,47 @@ func (c *CloudClient) GetResourceWithFactory(endpoint string, dest any, factory
354350
}
355351

356352
// CreateResourceWithFactory combines CreateResource with a factory to create a proper Resource
357-
func (c *CloudClient) CreateResourceWithFactory(endpoint string, body any, dest any, factory ResourceFactory) (Resource, error) {
358-
req, err := c.NewRequest(http.MethodPost, endpoint, body)
359-
if err != nil {
360-
return nil, err
353+
func (c *CloudClient) CreateResourceWithFactory(ctx context.Context, endpoint string, body any, dest any, factory ResourceFactory) (Resource, error) {
354+
// Use retry logic with exponential backoff
355+
retryConfig := DefaultRetryConfig()
356+
357+
// Define the create operation
358+
createOperation := func() (*ResponseWithETag, error) {
359+
req, err := c.NewRequest(http.MethodPost, endpoint, body)
360+
if err != nil {
361+
return nil, err
362+
}
363+
364+
respWithETag, err := c.Do(req)
365+
if err != nil {
366+
return nil, err
367+
}
368+
369+
return respWithETag, nil
361370
}
362371

363-
respWithETag, err := c.Do(req)
372+
// For CREATE operations, we don't need to get fresh ETags since there's no existing resource
373+
// We just retry the same operation
374+
getLatestETag := func() (string, error) {
375+
return "", nil // Not used for CREATE operations
376+
}
377+
378+
createWithETag := func(etag string) (*ResponseWithETag, error) {
379+
return createOperation() // Retry the same CREATE operation
380+
}
381+
382+
// Execute with retry logic
383+
respWithETag, err := retryConfig.RetryWithExponentialBackoffLegacy(
384+
ctx,
385+
createOperation,
386+
getLatestETag,
387+
createWithETag,
388+
"resource create",
389+
)
364390
if err != nil {
365391
return nil, err
366392
}
393+
367394
defer func() {
368395
// ignore the error
369396
_ = respWithETag.Response.Body.Close()

internal/client/constants.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,11 @@ const (
1111
DefaultDeleteTimeout = 5 * time.Minute
1212
// DefaultDeletePollInterval is the default interval between polling attempts during delete operations
1313
DefaultDeletePollInterval = 2 * time.Second
14+
15+
// Retry configuration for handling concurrent operations
16+
// DefaultMaxRetries is the default number of retry attempts
17+
DefaultMaxRetries = 8
18+
DefaultBaseRetryDelay = 100 * time.Millisecond
19+
DefaultMaxRetryDelay = 5 * time.Second
20+
DefaultMaxJitter = 500 * time.Millisecond
1421
)

internal/client/errors.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ type APIError struct {
3737

3838
func (e *APIError) Error() string {
3939
if e.Message != "" {
40-
// Check if this is a FGAM configuration conflict and provide helpful context
40+
// Check if this is a configuration conflict and provide helpful context
4141
if e.StatusCode == 409 && containsFGAMConfigConflict(e.Message) {
4242
return fmt.Sprintf("API error (status %d): %s\n\nThis error occurs when the Fine-Grained Access Management (FGAM) configuration for the permission system has been modified by another process. The Terraform provider will automatically retry this operation.", e.StatusCode, e.Message)
4343
}
@@ -46,7 +46,7 @@ func (e *APIError) Error() string {
4646
return fmt.Sprintf("API error (status %d)", e.StatusCode)
4747
}
4848

49-
// containsFGAMConfigConflict checks if the error message indicates a FGAM configuration conflict
49+
// containsFGAMConfigConflict checks if the error message indicates a configuration conflict
5050
func containsFGAMConfigConflict(message string) bool {
5151
lowerMessage := strings.ToLower(message)
5252

internal/client/policy.go

Lines changed: 15 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package client
22

33
import (
44
"bytes"
5+
"context"
56
"encoding/json"
67
"fmt"
78
"io"
@@ -81,11 +82,11 @@ func (c *CloudClient) GetPolicy(permissionsSystemID, policyID string) (*PolicyWi
8182
}
8283

8384
// CreatePolicy creates a new policy
84-
func (c *CloudClient) CreatePolicy(policy *models.Policy) (*PolicyWithETag, error) {
85+
func (c *CloudClient) CreatePolicy(ctx context.Context, policy *models.Policy) (*PolicyWithETag, error) {
8586
path := fmt.Sprintf("/ps/%s/access/policies", policy.PermissionsSystemID)
8687

8788
var createdPolicy models.Policy
88-
resource, err := c.CreateResourceWithFactory(path, policy, &createdPolicy, NewPolicyResource)
89+
resource, err := c.CreateResourceWithFactory(ctx, path, policy, &createdPolicy, NewPolicyResource)
8990
if err != nil {
9091
return nil, err
9192
}
@@ -94,7 +95,7 @@ func (c *CloudClient) CreatePolicy(policy *models.Policy) (*PolicyWithETag, erro
9495
}
9596

9697
// UpdatePolicy updates an existing policy using the PUT method
97-
func (c *CloudClient) UpdatePolicy(policy *models.Policy, etag string) (*PolicyWithETag, error) {
98+
func (c *CloudClient) UpdatePolicy(ctx context.Context, policy *models.Policy, etag string) (*PolicyWithETag, error) {
9899
path := fmt.Sprintf("/ps/%s/access/policies/%s", policy.PermissionsSystemID, policy.ID)
99100

100101
// Define a function to get the latest ETag
@@ -143,41 +144,21 @@ func (c *CloudClient) UpdatePolicy(policy *models.Policy, etag string) (*PolicyW
143144
return respWithETag, nil
144145
}
145146

146-
// First attempt with the provided ETag
147-
respWithETag, err := updateWithETag(etag)
147+
// Use retry logic with exponential backoff
148+
retryConfig := DefaultRetryConfig()
149+
respWithETag, err := retryConfig.RetryWithExponentialBackoffLegacy(
150+
ctx,
151+
func() (*ResponseWithETag, error) {
152+
return updateWithETag(etag)
153+
},
154+
getLatestETag,
155+
updateWithETag,
156+
"policy update",
157+
)
148158
if err != nil {
149159
return nil, err
150160
}
151161

152-
// Handle retryable errors (412 Precondition Failed, 409 Conflict) by refreshing ETag and retrying
153-
retryWithFreshETag := func(errorContext string) error {
154-
// Close the body of the first response
155-
if respWithETag.Response.Body != nil {
156-
_ = respWithETag.Response.Body.Close()
157-
}
158-
159-
// Get the latest ETag
160-
latestETag, err := getLatestETag()
161-
if err != nil {
162-
return fmt.Errorf("failed to get latest ETag for retry (%s): %w", errorContext, err)
163-
}
164-
165-
// Retry the update with the latest ETag
166-
respWithETag, err = updateWithETag(latestETag)
167-
return err
168-
}
169-
170-
switch respWithETag.Response.StatusCode {
171-
case http.StatusPreconditionFailed:
172-
if err := retryWithFreshETag("precondition failed"); err != nil {
173-
return nil, err
174-
}
175-
case http.StatusConflict:
176-
if err := retryWithFreshETag("FGAM configuration change"); err != nil {
177-
return nil, err
178-
}
179-
}
180-
181162
// Keep the response body for potential error reporting
182163
var respBody []byte
183164
if respWithETag.Response.Body != nil {

0 commit comments

Comments
 (0)