Skip to content

Commit

Permalink
recognize better transient error (#303)
Browse files Browse the repository at this point in the history
* recognize better transient error


Co-authored-by: I501080 <[email protected]>
  • Loading branch information
TalShorSap and kerenlahav authored Jul 24, 2023
1 parent 571f018 commit 61663b2
Show file tree
Hide file tree
Showing 8 changed files with 262 additions and 98 deletions.
26 changes: 26 additions & 0 deletions api/common.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package api

import (
"fmt"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -17,6 +19,30 @@ const (
ForceRotateAnnotation string = "services.cloud.sap.com/forceRotate"
)

type HTTPStatusCodeError struct {
// StatusCode is the HTTP status code returned by the broker.
StatusCode int
// ErrorMessage is a machine-readable error string that may be returned by the broker.
ErrorMessage *string
// Description is a human-readable description of the error that may be returned by the broker.
Description *string
// ResponseError is set to the error that occurred when unmarshalling a response body from the broker.
ResponseError error
}

func (e HTTPStatusCodeError) Error() string {
errorMessage := "<nil>"
description := "<nil>"

if e.ErrorMessage != nil {
errorMessage = *e.ErrorMessage
}
if e.Description != nil {
description = *e.Description
}
return fmt.Sprintf("Status: %v; ErrorMessage: %v; Description: %v; ResponseError: %v", e.StatusCode, errorMessage, description, e.ResponseError)
}

const (
// ConditionSucceeded represents whether the last operation CREATE/UPDATE/DELETE was successful.
ConditionSucceeded = "Succeeded"
Expand Down
6 changes: 4 additions & 2 deletions client/sm/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"strconv"
"strings"

"github.com/SAP/sap-btp-service-operator/api"
"github.com/SAP/sap-btp-service-operator/client/sm/types"
"github.com/SAP/sap-btp-service-operator/internal/auth"
"github.com/SAP/sap-btp-service-operator/internal/httputil"
Expand Down Expand Up @@ -67,8 +68,9 @@ type Client interface {
Call(method string, smpath string, body io.Reader, q *Parameters) (*http.Response, error)
}
type ServiceManagerError struct {
Message string
StatusCode int
Message string
StatusCode int
BrokerError *api.HTTPStatusCodeError `json:"broker_error,omitempty"`
}

func (e *ServiceManagerError) Error() string {
Expand Down
68 changes: 47 additions & 21 deletions controllers/base_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,7 @@ func (r *BaseReconciler) updateStatus(ctx context.Context, object api.SAPBTPReso
func (r *BaseReconciler) init(ctx context.Context, obj api.SAPBTPResource) error {
obj.SetReady(metav1.ConditionFalse)
setInProgressConditions(smClientTypes.CREATE, "Pending", obj)
if err := r.updateStatus(ctx, obj); err != nil {
return err
}
return nil
return r.updateStatus(ctx, obj)
}

func getConditionReason(opType smClientTypes.OperationCategory, state smClientTypes.OperationState) string {
Expand Down Expand Up @@ -302,43 +299,72 @@ func isDelete(object metav1.ObjectMeta) bool {
return !object.DeletionTimestamp.IsZero()
}

func isTransientError(ctx context.Context, err error) bool {
func isTransientError(ctx context.Context, err error) (bool, string) {
log := GetLogger(ctx)
if smError, ok := err.(*sm.ServiceManagerError); ok {
smError, ok := err.(*sm.ServiceManagerError)
if !ok {
return false, err.Error()
}

statusCode := smError.StatusCode
errMsg := smError.Error()
if isBrokerErrorExist(smError) {
log.Info(fmt.Sprintf("Broker returned error status code %d", smError.BrokerError.StatusCode))
statusCode = smError.BrokerError.StatusCode
errMsg = smError.BrokerError.Error()
} else {
log.Info(fmt.Sprintf("SM returned error status code %d", smError.StatusCode))
return smError.StatusCode == http.StatusTooManyRequests || smError.StatusCode == http.StatusServiceUnavailable ||
smError.StatusCode == http.StatusGatewayTimeout || smError.StatusCode == http.StatusNotFound || smError.StatusCode == http.StatusBadGateway
}
return false
return isTransientStatusCode(statusCode), errMsg
}

func isTransientStatusCode(StatusCode int) bool {
return StatusCode == http.StatusTooManyRequests || StatusCode == http.StatusServiceUnavailable ||
StatusCode == http.StatusGatewayTimeout || StatusCode == http.StatusNotFound || StatusCode == http.StatusBadGateway
}

func isBrokerErrorExist(smError *sm.ServiceManagerError) bool {
return smError.BrokerError != nil && smError.BrokerError.StatusCode != 0
}

func (r *BaseReconciler) handleError(ctx context.Context, operationType smClientTypes.OperationCategory, err error, resource api.SAPBTPResource) (ctrl.Result, error) {
var (
isTransient bool
errMsg string
)

if isTransient, errMsg = isTransientError(ctx, err); isTransient {
return r.markAsTransientError(ctx, operationType, errMsg, resource)
}
return r.markAsNonTransientError(ctx, operationType, errMsg, resource)
}

func (r *BaseReconciler) markAsNonTransientError(ctx context.Context, operationType smClientTypes.OperationCategory, nonTransientErr error, object api.SAPBTPResource) (ctrl.Result, error) {
func (r *BaseReconciler) markAsNonTransientError(ctx context.Context, operationType smClientTypes.OperationCategory, errMsg string, object api.SAPBTPResource) (ctrl.Result, error) {
log := GetLogger(ctx)
setFailureConditions(operationType, nonTransientErr.Error(), object)
setFailureConditions(operationType, errMsg, object)
if operationType != smClientTypes.DELETE {
log.Info(fmt.Sprintf("operation %s of %s encountered a non transient error %s, giving up operation :(", operationType, object.GetControllerName(), nonTransientErr.Error()))
log.Info(fmt.Sprintf("operation %s of %s encountered a non transient error %s, giving up operation :(", operationType, object.GetControllerName(), errMsg))
}
object.SetObservedGeneration(object.GetGeneration())
err := r.updateStatus(ctx, object)
if err != nil {
return ctrl.Result{}, err
}
if operationType == smClientTypes.DELETE {
return ctrl.Result{}, nonTransientErr
return ctrl.Result{}, fmt.Errorf(errMsg)
}
return ctrl.Result{}, nil
}

func (r *BaseReconciler) markAsTransientError(ctx context.Context, operationType smClientTypes.OperationCategory, transientErr error, object api.SAPBTPResource) (ctrl.Result, error) {
func (r *BaseReconciler) markAsTransientError(ctx context.Context, operationType smClientTypes.OperationCategory, errMsg string, object api.SAPBTPResource) (ctrl.Result, error) {
log := GetLogger(ctx)
if smError, ok := transientErr.(*sm.ServiceManagerError); ok && smError.StatusCode != http.StatusTooManyRequests {
setInProgressConditions(operationType, transientErr.Error(), object)
log.Info(fmt.Sprintf("operation %s of %s encountered a transient error %s, retrying operation :)", operationType, object.GetControllerName(), transientErr.Error()))
if err := r.updateStatus(ctx, object); err != nil {
return ctrl.Result{}, err
}
setInProgressConditions(operationType, errMsg, object)
log.Info(fmt.Sprintf("operation %s of %s encountered a transient error %s, retrying operation :)", operationType, object.GetControllerName(), errMsg))
if err := r.updateStatus(ctx, object); err != nil {
return ctrl.Result{}, err
}
return ctrl.Result{}, transientErr

return ctrl.Result{}, fmt.Errorf(errMsg)
}

func isInProgress(object api.SAPBTPResource) bool {
Expand Down
22 changes: 9 additions & 13 deletions controllers/servicebinding_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func (r *ServiceBindingReconciler) Reconcile(ctx context.Context, req ctrl.Reque

smClient, err := r.getSMClient(ctx, serviceBinding)
if err != nil {
return r.markAsTransientError(ctx, Unknown, err, serviceBinding)
return r.markAsTransientError(ctx, Unknown, err.Error(), serviceBinding)
}

if len(serviceBinding.Status.OperationURL) > 0 {
Expand Down Expand Up @@ -173,7 +173,7 @@ func (r *ServiceBindingReconciler) Reconcile(ctx context.Context, req ctrl.Reque
binding, err := r.getBindingForRecovery(ctx, smClient, serviceBinding)
if err != nil {
log.Error(err, "failed to check binding recovery")
return r.markAsTransientError(ctx, smClientTypes.CREATE, err, serviceBinding)
return r.markAsTransientError(ctx, smClientTypes.CREATE, err.Error(), serviceBinding)
}
if binding != nil {
// Recovery - restore binding from SM
Expand Down Expand Up @@ -209,7 +209,7 @@ func (r *ServiceBindingReconciler) createBinding(ctx context.Context, smClient s
_, bindingParameters, err := buildParameters(r.Client, serviceBinding.Namespace, serviceBinding.Spec.ParametersFrom, serviceBinding.Spec.Parameters)
if err != nil {
log.Error(err, "failed to parse smBinding parameters")
return r.markAsNonTransientError(ctx, smClientTypes.CREATE, err, serviceBinding)
return r.markAsNonTransientError(ctx, smClientTypes.CREATE, err.Error(), serviceBinding)
}

smBinding, operationURL, bindErr := smClient.Bind(&smClientTypes.ServiceBinding{
Expand All @@ -225,16 +225,13 @@ func (r *ServiceBindingReconciler) createBinding(ctx context.Context, smClient s

if bindErr != nil {
log.Error(err, "failed to create service binding", "serviceInstanceID", serviceInstance.Status.InstanceID)
if isTransientError(ctx, bindErr) {
return r.markAsTransientError(ctx, smClientTypes.CREATE, bindErr, serviceBinding)
}
return r.markAsNonTransientError(ctx, smClientTypes.CREATE, bindErr, serviceBinding)
return r.handleError(ctx, smClientTypes.CREATE, bindErr, serviceBinding)
}

if operationURL != "" {
var bindingID string
if bindingID = sm.ExtractBindingID(operationURL); len(bindingID) == 0 {
return r.markAsNonTransientError(ctx, smClientTypes.CREATE, fmt.Errorf("failed to extract smBinding ID from operation URL %s", operationURL), serviceBinding)
return r.markAsNonTransientError(ctx, smClientTypes.CREATE, fmt.Sprintf("failed to extract smBinding ID from operation URL %s", operationURL), serviceBinding)
}
serviceBinding.Status.BindingID = bindingID

Expand Down Expand Up @@ -295,7 +292,7 @@ func (r *ServiceBindingReconciler) delete(ctx context.Context, smClient sm.Clien
operationURL, unbindErr := smClient.Unbind(serviceBinding.Status.BindingID, nil, buildUserInfo(ctx, serviceBinding.Spec.UserInfo))
if unbindErr != nil {
// delete will proceed anyway
return r.markAsNonTransientError(ctx, smClientTypes.DELETE, unbindErr, serviceBinding)
return r.markAsNonTransientError(ctx, smClientTypes.DELETE, unbindErr.Error(), serviceBinding)
}

if operationURL != "" {
Expand Down Expand Up @@ -336,8 +333,7 @@ func (r *ServiceBindingReconciler) poll(ctx context.Context, smClient sm.Client,
}

if status == nil {
err := fmt.Errorf("failed to get last operation status of %s", serviceBinding.Name)
return r.markAsTransientError(ctx, serviceBinding.Status.OperationType, err, serviceBinding)
return r.markAsTransientError(ctx, serviceBinding.Status.OperationType, fmt.Sprintf("failed to get last operation status of %s", serviceBinding.Name), serviceBinding)
}
switch status.State {
case smClientTypes.INPROGRESS:
Expand Down Expand Up @@ -689,9 +685,9 @@ func (r *ServiceBindingReconciler) handleSecretError(ctx context.Context, op smC
log := GetLogger(ctx)
log.Error(err, fmt.Sprintf("failed to store secret %s for binding %s", binding.Spec.SecretName, binding.Name))
if apierrors.ReasonForError(err) == metav1.StatusReasonUnknown {
return r.markAsNonTransientError(ctx, op, err, binding)
return r.markAsNonTransientError(ctx, op, err.Error(), binding)
}
return r.markAsTransientError(ctx, op, err, binding)
return r.markAsTransientError(ctx, op, err.Error(), binding)
}

func (r *ServiceBindingReconciler) addInstanceInfo(ctx context.Context, binding *servicesv1.ServiceBinding, credentialsMap map[string][]byte) ([]SecretMetadataProperty, error) {
Expand Down
78 changes: 64 additions & 14 deletions controllers/servicebinding_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,7 @@ var _ = Describe("ServiceBinding controller", func() {
var guid string

createBindingWithoutAssertionsAndWait := func(ctx context.Context, name string, namespace string, instanceName string, externalName string, wait bool) (*v1.ServiceBinding, error) {
binding := newBindingObject(name, namespace)
binding.Spec.ServiceInstanceName = instanceName
binding.Spec.ExternalName = externalName
binding.Spec.Parameters = &runtime.RawExtension{
Raw: []byte(`{"key": "value"}`),
}
binding.Spec.ParametersFrom = []v1.ParametersFromSource{
{
SecretKeyRef: &v1.SecretKeyReference{
Name: "param-secret",
Key: "secret-parameter",
},
},
}
binding := generateBasicBindingTemplate(name, namespace, instanceName, externalName)

if err := k8sClient.Create(ctx, binding); err != nil {
return nil, err
Expand Down Expand Up @@ -538,6 +525,34 @@ var _ = Describe("ServiceBinding controller", func() {
})
})

When("SM returned error 502 and broker returned 429", func() {
BeforeEach(func() {
errorMessage = "too many requests from broker"
fakeClient.BindReturns(nil, "", getTransientBrokerError(errorMessage))
})

It("should detect the error as transient and eventually succeed", func() {
createdBinding, _ := createBindingWithoutAssertionsAndWait(context.Background(),
bindingName, bindingTestNamespace, instanceName, "binding-external-name", false)
expectBindingToBeInFailedStateWithMsg(createdBinding, errorMessage)

fakeClient.BindReturns(&smClientTypes.ServiceBinding{ID: fakeBindingID,
Credentials: json.RawMessage("{\"secret_key\": \"secret_value\"}")}, "", nil)
validateBindingIsReady(createdBinding, bindingName)
})
})

When("SM returned 502 and broker returned 400", func() {
BeforeEach(func() {
errorMessage = "very bad request"
fakeClient.BindReturnsOnCall(0, nil, "", getNonTransientBrokerError(errorMessage))
})

It("should detect the error as non-transient and fail", func() {
createBindingWithError(context.Background(), bindingName, bindingTestNamespace, instanceName, "binding-external-name", errorMessage)
})
})

})

When("SM returned invalid credentials json", func() {
Expand Down Expand Up @@ -1167,6 +1182,41 @@ var _ = Describe("ServiceBinding controller", func() {
})
})

func expectBindingToBeInFailedStateWithMsg(binding *v1.ServiceBinding, message string) {
cond := meta.FindStatusCondition(binding.GetConditions(), api.ConditionSucceeded)
Expect(cond).To(Not(BeNil()))
Expect(cond.Message).To(ContainSubstring(message))
Expect(cond.Status).To(Equal(metav1.ConditionFalse))
}

func validateBindingIsReady(createdBinding *v1.ServiceBinding, bindingName string) {
Eventually(func() bool {
err := k8sClient.Get(context.Background(), types.NamespacedName{Name: bindingName, Namespace: bindingTestNamespace}, createdBinding)
if err != nil {
return false
}
return isReady(createdBinding)
}, timeout, interval).Should(BeTrue())
}

func generateBasicBindingTemplate(name, namespace, instanceName, externalName string) *v1.ServiceBinding {
binding := newBindingObject(name, namespace)
binding.Spec.ServiceInstanceName = instanceName
binding.Spec.ExternalName = externalName
binding.Spec.Parameters = &runtime.RawExtension{
Raw: []byte(`{"key": "value"}`),
}
binding.Spec.ParametersFrom = []v1.ParametersFromSource{
{
SecretKeyRef: &v1.SecretKeyReference{
Name: "param-secret",
Key: "secret-parameter",
},
},
}
return binding
}

func validateSecretData(secret *corev1.Secret, expectedKey string, expectedValue string) {
Expect(secret.Data).ToNot(BeNil())
Expect(secret.Data).To(HaveKey(expectedKey))
Expand Down
Loading

0 comments on commit 61663b2

Please sign in to comment.