Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

recognize better transient error #303

Merged
merged 50 commits into from
Jul 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
b4fc25f
recognize better transient error
TalShorSap Jul 12, 2023
928937d
recognize better transient error
TalShorSap Jul 12, 2023
c9c1c42
recognize better transient error
TalShorSap Jul 12, 2023
a858fbf
recognize better transient error
TalShorSap Jul 12, 2023
1af0fdf
recognize better transient error
TalShorSap Jul 12, 2023
cfcc2ec
tests
TalShorSap Jul 12, 2023
ea4c1b7
tests
TalShorSap Jul 12, 2023
48ba5a0
tests
TalShorSap Jul 12, 2023
adae8e9
tests
TalShorSap Jul 12, 2023
702076a
tests
TalShorSap Jul 12, 2023
943c067
tests
TalShorSap Jul 12, 2023
9ecc343
tests
TalShorSap Jul 12, 2023
c284392
tests
TalShorSap Jul 12, 2023
4b1bf1f
tests
TalShorSap Jul 12, 2023
f809d61
tests
TalShorSap Jul 12, 2023
2eeab58
tests
TalShorSap Jul 12, 2023
07423f9
tests
TalShorSap Jul 12, 2023
f15314c
tests
TalShorSap Jul 12, 2023
f69dca8
fix
TalShorSap Jul 16, 2023
9a93f49
fix TESTS
TalShorSap Jul 17, 2023
2ef13a3
fix TESTS
TalShorSap Jul 17, 2023
e0a34d8
fix
TalShorSap Jul 18, 2023
25eb453
fix
TalShorSap Jul 19, 2023
22982f5
fix Keren review
TalShorSap Jul 20, 2023
6c5a2b2
fix Keren review
TalShorSap Jul 23, 2023
a65d818
fix Keren review
TalShorSap Jul 23, 2023
6e0a76c
fix Keren review
TalShorSap Jul 23, 2023
360a009
fix Keren review
TalShorSap Jul 23, 2023
d396d4c
fix Keren review
TalShorSap Jul 23, 2023
193fbc9
fix Keren review
TalShorSap Jul 23, 2023
45a2d96
fix Keren review
TalShorSap Jul 23, 2023
1665970
fix Keren review
TalShorSap Jul 23, 2023
e207350
fix Keren review
TalShorSap Jul 23, 2023
b983bf3
fix Keren review
TalShorSap Jul 23, 2023
f150f38
fix Keren review
TalShorSap Jul 23, 2023
7f10571
fix Keren review
TalShorSap Jul 23, 2023
48e9c05
fix Keren review
TalShorSap Jul 23, 2023
49ee9b4
fix isTransient
kerenlahav Jul 23, 2023
6ac0055
fix Keren review
TalShorSap Jul 23, 2023
29f52fe
fix Keren review
TalShorSap Jul 23, 2023
5affe17
clean code
TalShorSap Jul 23, 2023
2e4a5e5
clean code
TalShorSap Jul 24, 2023
936180e
clean code
TalShorSap Jul 24, 2023
cbded30
go mod vendor && tidy
TalShorSap Jul 24, 2023
1e86338
go mod vendor && tidy
TalShorSap Jul 24, 2023
e39c933
go mod vendor && tidy
TalShorSap Jul 24, 2023
1534dea
cosmetics
kerenlahav Jul 24, 2023
0f35e32
Merge remote-tracking branch 'origin/better_recognition_of_transient_…
kerenlahav Jul 24, 2023
a5ff4a2
fix
TalShorSap Jul 24, 2023
7457b3a
cosmetics
kerenlahav Jul 24, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions api/common.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package api

import (
"fmt"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -17,6 +19,30 @@ const (
ForceRotateAnnotation string = "services.cloud.sap.com/forceRotate"
)

type HTTPStatusCodeError struct {
// StatusCode is the HTTP status code returned by the broker.
StatusCode int
// ErrorMessage is a machine-readable error string that may be returned by the broker.
ErrorMessage *string
// Description is a human-readable description of the error that may be returned by the broker.
Description *string
// ResponseError is set to the error that occurred when unmarshalling a response body from the broker.
ResponseError error
}

func (e HTTPStatusCodeError) Error() string {
errorMessage := "<nil>"
description := "<nil>"

if e.ErrorMessage != nil {
errorMessage = *e.ErrorMessage
}
if e.Description != nil {
description = *e.Description
}
return fmt.Sprintf("Status: %v; ErrorMessage: %v; Description: %v; ResponseError: %v", e.StatusCode, errorMessage, description, e.ResponseError)
}

const (
// ConditionSucceeded represents whether the last operation CREATE/UPDATE/DELETE was successful.
ConditionSucceeded = "Succeeded"
Expand Down
6 changes: 4 additions & 2 deletions client/sm/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"strconv"
"strings"

"github.com/SAP/sap-btp-service-operator/api"
"github.com/SAP/sap-btp-service-operator/client/sm/types"
"github.com/SAP/sap-btp-service-operator/internal/auth"
"github.com/SAP/sap-btp-service-operator/internal/httputil"
Expand Down Expand Up @@ -67,8 +68,9 @@ type Client interface {
Call(method string, smpath string, body io.Reader, q *Parameters) (*http.Response, error)
}
type ServiceManagerError struct {
Message string
StatusCode int
Message string
StatusCode int
BrokerError *api.HTTPStatusCodeError `json:"broker_error,omitempty"`
}

func (e *ServiceManagerError) Error() string {
Expand Down
68 changes: 47 additions & 21 deletions controllers/base_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,7 @@ func (r *BaseReconciler) updateStatus(ctx context.Context, object api.SAPBTPReso
func (r *BaseReconciler) init(ctx context.Context, obj api.SAPBTPResource) error {
obj.SetReady(metav1.ConditionFalse)
setInProgressConditions(smClientTypes.CREATE, "Pending", obj)
if err := r.updateStatus(ctx, obj); err != nil {
return err
}
return nil
return r.updateStatus(ctx, obj)
}

func getConditionReason(opType smClientTypes.OperationCategory, state smClientTypes.OperationState) string {
Expand Down Expand Up @@ -302,43 +299,72 @@ func isDelete(object metav1.ObjectMeta) bool {
return !object.DeletionTimestamp.IsZero()
}

func isTransientError(ctx context.Context, err error) bool {
func isTransientError(ctx context.Context, err error) (bool, string) {
log := GetLogger(ctx)
if smError, ok := err.(*sm.ServiceManagerError); ok {
smError, ok := err.(*sm.ServiceManagerError)
if !ok {
return false, err.Error()
}

statusCode := smError.StatusCode
errMsg := smError.Error()
if isBrokerErrorExist(smError) {
log.Info(fmt.Sprintf("Broker returned error status code %d", smError.BrokerError.StatusCode))
statusCode = smError.BrokerError.StatusCode
errMsg = smError.BrokerError.Error()
} else {
log.Info(fmt.Sprintf("SM returned error status code %d", smError.StatusCode))
return smError.StatusCode == http.StatusTooManyRequests || smError.StatusCode == http.StatusServiceUnavailable ||
smError.StatusCode == http.StatusGatewayTimeout || smError.StatusCode == http.StatusNotFound || smError.StatusCode == http.StatusBadGateway
}
return false
return isTransientStatusCode(statusCode), errMsg
}

func isTransientStatusCode(StatusCode int) bool {
return StatusCode == http.StatusTooManyRequests || StatusCode == http.StatusServiceUnavailable ||
StatusCode == http.StatusGatewayTimeout || StatusCode == http.StatusNotFound || StatusCode == http.StatusBadGateway
}

func isBrokerErrorExist(smError *sm.ServiceManagerError) bool {
return smError.BrokerError != nil && smError.BrokerError.StatusCode != 0
}

func (r *BaseReconciler) handleError(ctx context.Context, operationType smClientTypes.OperationCategory, err error, resource api.SAPBTPResource) (ctrl.Result, error) {
var (
isTransient bool
errMsg string
)

if isTransient, errMsg = isTransientError(ctx, err); isTransient {
return r.markAsTransientError(ctx, operationType, errMsg, resource)
}
return r.markAsNonTransientError(ctx, operationType, errMsg, resource)
}

func (r *BaseReconciler) markAsNonTransientError(ctx context.Context, operationType smClientTypes.OperationCategory, nonTransientErr error, object api.SAPBTPResource) (ctrl.Result, error) {
func (r *BaseReconciler) markAsNonTransientError(ctx context.Context, operationType smClientTypes.OperationCategory, errMsg string, object api.SAPBTPResource) (ctrl.Result, error) {
log := GetLogger(ctx)
setFailureConditions(operationType, nonTransientErr.Error(), object)
setFailureConditions(operationType, errMsg, object)
if operationType != smClientTypes.DELETE {
log.Info(fmt.Sprintf("operation %s of %s encountered a non transient error %s, giving up operation :(", operationType, object.GetControllerName(), nonTransientErr.Error()))
log.Info(fmt.Sprintf("operation %s of %s encountered a non transient error %s, giving up operation :(", operationType, object.GetControllerName(), errMsg))
}
object.SetObservedGeneration(object.GetGeneration())
err := r.updateStatus(ctx, object)
if err != nil {
return ctrl.Result{}, err
}
if operationType == smClientTypes.DELETE {
return ctrl.Result{}, nonTransientErr
return ctrl.Result{}, fmt.Errorf(errMsg)
}
return ctrl.Result{}, nil
}

func (r *BaseReconciler) markAsTransientError(ctx context.Context, operationType smClientTypes.OperationCategory, transientErr error, object api.SAPBTPResource) (ctrl.Result, error) {
func (r *BaseReconciler) markAsTransientError(ctx context.Context, operationType smClientTypes.OperationCategory, errMsg string, object api.SAPBTPResource) (ctrl.Result, error) {
log := GetLogger(ctx)
if smError, ok := transientErr.(*sm.ServiceManagerError); ok && smError.StatusCode != http.StatusTooManyRequests {
setInProgressConditions(operationType, transientErr.Error(), object)
log.Info(fmt.Sprintf("operation %s of %s encountered a transient error %s, retrying operation :)", operationType, object.GetControllerName(), transientErr.Error()))
if err := r.updateStatus(ctx, object); err != nil {
return ctrl.Result{}, err
}
setInProgressConditions(operationType, errMsg, object)
log.Info(fmt.Sprintf("operation %s of %s encountered a transient error %s, retrying operation :)", operationType, object.GetControllerName(), errMsg))
if err := r.updateStatus(ctx, object); err != nil {
return ctrl.Result{}, err
}
return ctrl.Result{}, transientErr

return ctrl.Result{}, fmt.Errorf(errMsg)
}

func isInProgress(object api.SAPBTPResource) bool {
Expand Down
22 changes: 9 additions & 13 deletions controllers/servicebinding_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func (r *ServiceBindingReconciler) Reconcile(ctx context.Context, req ctrl.Reque

smClient, err := r.getSMClient(ctx, serviceBinding)
if err != nil {
return r.markAsTransientError(ctx, Unknown, err, serviceBinding)
return r.markAsTransientError(ctx, Unknown, err.Error(), serviceBinding)
}

if len(serviceBinding.Status.OperationURL) > 0 {
Expand Down Expand Up @@ -173,7 +173,7 @@ func (r *ServiceBindingReconciler) Reconcile(ctx context.Context, req ctrl.Reque
binding, err := r.getBindingForRecovery(ctx, smClient, serviceBinding)
if err != nil {
log.Error(err, "failed to check binding recovery")
return r.markAsTransientError(ctx, smClientTypes.CREATE, err, serviceBinding)
return r.markAsTransientError(ctx, smClientTypes.CREATE, err.Error(), serviceBinding)
}
if binding != nil {
// Recovery - restore binding from SM
Expand Down Expand Up @@ -209,7 +209,7 @@ func (r *ServiceBindingReconciler) createBinding(ctx context.Context, smClient s
_, bindingParameters, err := buildParameters(r.Client, serviceBinding.Namespace, serviceBinding.Spec.ParametersFrom, serviceBinding.Spec.Parameters)
if err != nil {
log.Error(err, "failed to parse smBinding parameters")
return r.markAsNonTransientError(ctx, smClientTypes.CREATE, err, serviceBinding)
return r.markAsNonTransientError(ctx, smClientTypes.CREATE, err.Error(), serviceBinding)
}

smBinding, operationURL, bindErr := smClient.Bind(&smClientTypes.ServiceBinding{
Expand All @@ -225,16 +225,13 @@ func (r *ServiceBindingReconciler) createBinding(ctx context.Context, smClient s

if bindErr != nil {
log.Error(err, "failed to create service binding", "serviceInstanceID", serviceInstance.Status.InstanceID)
if isTransientError(ctx, bindErr) {
return r.markAsTransientError(ctx, smClientTypes.CREATE, bindErr, serviceBinding)
}
return r.markAsNonTransientError(ctx, smClientTypes.CREATE, bindErr, serviceBinding)
return r.handleError(ctx, smClientTypes.CREATE, bindErr, serviceBinding)
}

if operationURL != "" {
var bindingID string
if bindingID = sm.ExtractBindingID(operationURL); len(bindingID) == 0 {
return r.markAsNonTransientError(ctx, smClientTypes.CREATE, fmt.Errorf("failed to extract smBinding ID from operation URL %s", operationURL), serviceBinding)
return r.markAsNonTransientError(ctx, smClientTypes.CREATE, fmt.Sprintf("failed to extract smBinding ID from operation URL %s", operationURL), serviceBinding)
}
serviceBinding.Status.BindingID = bindingID

Expand Down Expand Up @@ -295,7 +292,7 @@ func (r *ServiceBindingReconciler) delete(ctx context.Context, smClient sm.Clien
operationURL, unbindErr := smClient.Unbind(serviceBinding.Status.BindingID, nil, buildUserInfo(ctx, serviceBinding.Spec.UserInfo))
if unbindErr != nil {
// delete will proceed anyway
return r.markAsNonTransientError(ctx, smClientTypes.DELETE, unbindErr, serviceBinding)
return r.markAsNonTransientError(ctx, smClientTypes.DELETE, unbindErr.Error(), serviceBinding)
}

if operationURL != "" {
Expand Down Expand Up @@ -336,8 +333,7 @@ func (r *ServiceBindingReconciler) poll(ctx context.Context, smClient sm.Client,
}

if status == nil {
err := fmt.Errorf("failed to get last operation status of %s", serviceBinding.Name)
return r.markAsTransientError(ctx, serviceBinding.Status.OperationType, err, serviceBinding)
return r.markAsTransientError(ctx, serviceBinding.Status.OperationType, fmt.Sprintf("failed to get last operation status of %s", serviceBinding.Name), serviceBinding)
}
switch status.State {
case smClientTypes.INPROGRESS:
Expand Down Expand Up @@ -689,9 +685,9 @@ func (r *ServiceBindingReconciler) handleSecretError(ctx context.Context, op smC
log := GetLogger(ctx)
log.Error(err, fmt.Sprintf("failed to store secret %s for binding %s", binding.Spec.SecretName, binding.Name))
if apierrors.ReasonForError(err) == metav1.StatusReasonUnknown {
return r.markAsNonTransientError(ctx, op, err, binding)
return r.markAsNonTransientError(ctx, op, err.Error(), binding)
}
return r.markAsTransientError(ctx, op, err, binding)
return r.markAsTransientError(ctx, op, err.Error(), binding)
}

func (r *ServiceBindingReconciler) addInstanceInfo(ctx context.Context, binding *servicesv1.ServiceBinding, credentialsMap map[string][]byte) ([]SecretMetadataProperty, error) {
Expand Down
78 changes: 64 additions & 14 deletions controllers/servicebinding_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,7 @@ var _ = Describe("ServiceBinding controller", func() {
var guid string

createBindingWithoutAssertionsAndWait := func(ctx context.Context, name string, namespace string, instanceName string, externalName string, wait bool) (*v1.ServiceBinding, error) {
binding := newBindingObject(name, namespace)
binding.Spec.ServiceInstanceName = instanceName
binding.Spec.ExternalName = externalName
binding.Spec.Parameters = &runtime.RawExtension{
Raw: []byte(`{"key": "value"}`),
}
binding.Spec.ParametersFrom = []v1.ParametersFromSource{
{
SecretKeyRef: &v1.SecretKeyReference{
Name: "param-secret",
Key: "secret-parameter",
},
},
}
binding := generateBasicBindingTemplate(name, namespace, instanceName, externalName)

if err := k8sClient.Create(ctx, binding); err != nil {
return nil, err
Expand Down Expand Up @@ -538,6 +525,34 @@ var _ = Describe("ServiceBinding controller", func() {
})
})

When("SM returned error 502 and broker returned 429", func() {
BeforeEach(func() {
errorMessage = "too many requests from broker"
fakeClient.BindReturns(nil, "", getTransientBrokerError(errorMessage))
})

It("should detect the error as transient and eventually succeed", func() {
createdBinding, _ := createBindingWithoutAssertionsAndWait(context.Background(),
bindingName, bindingTestNamespace, instanceName, "binding-external-name", false)
expectBindingToBeInFailedStateWithMsg(createdBinding, errorMessage)

fakeClient.BindReturns(&smClientTypes.ServiceBinding{ID: fakeBindingID,
Credentials: json.RawMessage("{\"secret_key\": \"secret_value\"}")}, "", nil)
validateBindingIsReady(createdBinding, bindingName)
})
})

When("SM returned 502 and broker returned 400", func() {
BeforeEach(func() {
errorMessage = "very bad request"
fakeClient.BindReturnsOnCall(0, nil, "", getNonTransientBrokerError(errorMessage))
})

It("should detect the error as non-transient and fail", func() {
createBindingWithError(context.Background(), bindingName, bindingTestNamespace, instanceName, "binding-external-name", errorMessage)
})
})

})

When("SM returned invalid credentials json", func() {
Expand Down Expand Up @@ -1167,6 +1182,41 @@ var _ = Describe("ServiceBinding controller", func() {
})
})

func expectBindingToBeInFailedStateWithMsg(binding *v1.ServiceBinding, message string) {
cond := meta.FindStatusCondition(binding.GetConditions(), api.ConditionSucceeded)
Expect(cond).To(Not(BeNil()))
Expect(cond.Message).To(ContainSubstring(message))
Expect(cond.Status).To(Equal(metav1.ConditionFalse))
}

func validateBindingIsReady(createdBinding *v1.ServiceBinding, bindingName string) {
Eventually(func() bool {
err := k8sClient.Get(context.Background(), types.NamespacedName{Name: bindingName, Namespace: bindingTestNamespace}, createdBinding)
if err != nil {
return false
}
return isReady(createdBinding)
}, timeout, interval).Should(BeTrue())
}

func generateBasicBindingTemplate(name, namespace, instanceName, externalName string) *v1.ServiceBinding {
binding := newBindingObject(name, namespace)
binding.Spec.ServiceInstanceName = instanceName
binding.Spec.ExternalName = externalName
binding.Spec.Parameters = &runtime.RawExtension{
Raw: []byte(`{"key": "value"}`),
}
binding.Spec.ParametersFrom = []v1.ParametersFromSource{
{
SecretKeyRef: &v1.SecretKeyReference{
Name: "param-secret",
Key: "secret-parameter",
},
},
}
return binding
}

func validateSecretData(secret *corev1.Secret, expectedKey string, expectedValue string) {
Expect(secret.Data).ToNot(BeNil())
Expect(secret.Data).To(HaveKey(expectedKey))
Expand Down
Loading
Loading