Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Surface cpu and mem requests forbidden errors (and other ones too) in KSVC creation #14453

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pkg/apis/serving/v1/revision_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package v1
import (
"time"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
net "knative.dev/networking/pkg/apis/networking"
"knative.dev/pkg/kmeta"
Expand Down Expand Up @@ -143,3 +144,9 @@ func (rs *RevisionStatus) IsActivationRequired() bool {
c := revisionCondSet.Manage(rs).GetCondition(RevisionConditionActive)
return c != nil && c.Status != corev1.ConditionTrue
}

// IsReplicaSetFailure returns true if the deployment replicaset is failing to create
func (rs *RevisionStatus) IsReplicaSetFailure(deploymentStatus *appsv1.DeploymentStatus) bool {
ds := serving.TransformDeploymentStatus(deploymentStatus)
return ds != nil && ds.GetCondition(serving.DeploymentConditionReplicaSetReady).IsFalse()
}
43 changes: 43 additions & 0 deletions pkg/apis/serving/v1/revision_helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"testing"
"time"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality"

Expand Down Expand Up @@ -267,3 +268,45 @@ func TestSetRoutingState(t *testing.T) {
t.Error("Expected default value for unparsable annotationm but got:", got)
}
}

func TestIsReplicaSetFailure(t *testing.T) {
revisionStatus := RevisionStatus{}
cases := []struct {
name string
status appsv1.DeploymentStatus
IsReplicaSetFailure bool
}{{
name: "empty deployment status should not be a failure",
status: appsv1.DeploymentStatus{},
}, {
name: "Ready deployment status should not be a failure",
status: appsv1.DeploymentStatus{
Conditions: []appsv1.DeploymentCondition{{
Type: appsv1.DeploymentAvailable, Status: corev1.ConditionTrue,
}},
},
}, {
name: "ReplicasetFailure true should be a failure",
status: appsv1.DeploymentStatus{
Conditions: []appsv1.DeploymentCondition{{
Type: appsv1.DeploymentReplicaFailure, Status: corev1.ConditionTrue,
}},
},
IsReplicaSetFailure: true,
}, {
name: "ReplicasetFailure false should not be a failure",
status: appsv1.DeploymentStatus{
Conditions: []appsv1.DeploymentCondition{{
Type: appsv1.DeploymentReplicaFailure, Status: corev1.ConditionFalse,
}},
},
}}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if got, want := revisionStatus.IsReplicaSetFailure(&tc.status), tc.IsReplicaSetFailure; got != want {
t.Errorf("IsReplicaSetFailure = %v, want: %v", got, want)
}
})
}
}
3 changes: 2 additions & 1 deletion pkg/apis/serving/v1/revision_lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ const (
ReasonProgressDeadlineExceeded = "ProgressDeadlineExceeded"
)

// RevisionConditionActive is not part of the RevisionConditionSet because we can have Inactive Ready Revisions (scale to zero)
// TODO: add to the revisionCondSet or at least initialize RevisionConditionActive and deal with the edge cases
gabo1208 marked this conversation as resolved.
Show resolved Hide resolved
var revisionCondSet = apis.NewLivingConditionSet(
RevisionConditionResourcesAvailable,
RevisionConditionContainerHealthy,
Expand Down Expand Up @@ -171,7 +173,6 @@ func (rs *RevisionStatus) PropagateDeploymentStatus(original *appsv1.DeploymentS
func (rs *RevisionStatus) PropagateAutoscalerStatus(ps *autoscalingv1alpha1.PodAutoscalerStatus) {
// Reflect the PA status in our own.
cond := ps.GetCondition(autoscalingv1alpha1.PodAutoscalerConditionReady)

rs.ActualReplicas = nil
if ps.ActualScale != nil && *ps.ActualScale >= 0 {
rs.ActualReplicas = ps.ActualScale
Expand Down
6 changes: 6 additions & 0 deletions pkg/reconciler/revision/reconcile_resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ func (c *Reconciler) reconcileDeployment(ctx context.Context, rev *v1.Revision)
}
}

// If the replicaset is failing we assume its an error we have to surface
if rev.Status.IsReplicaSetFailure(&deployment.Status) {
gabo1208 marked this conversation as resolved.
Show resolved Hide resolved
rev.Status.PropagateDeploymentStatus(&deployment.Status)
return nil
}
Comment on lines +83 to +86
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My initial thought was to have this in the else clause. But seeing this here I wonder if there is a case where creating the Deployment will succeed but response includes a status condition with a ReplicaFailure?

Otherwise this section might be easier to reason about if we push if !rev.Status.IsActivationRequired() down into
PropagateDeploymentStatus and we can do the replica failure check there

Copy link
Member Author

@gabo1208 gabo1208 Oct 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that I revisit the code, yes the reason to have it there is for newly created deployments with replicaset errors.

I couldn't move it inside propagate status inside since they are both different cases. The revision required surface an error but it does not care to be overwritten later, and you can have a replicaset error without a replicaset error, with the replicaset error we want to return immediately after so we need to check in the reconciler.


// If a container keeps crashing (no active pods in the deployment although we want some)
if *deployment.Spec.Replicas > 0 && deployment.Status.AvailableReplicas == 0 {
pods, err := c.kubeclient.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{LabelSelector: metav1.FormatLabelSelector(deployment.Spec.Selector)})
Expand Down
18 changes: 11 additions & 7 deletions test/e2e/resource_quota_error_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@ func TestResourceQuotaError(t *testing.T) {

clients := test.Setup(t, test.Options{Namespace: "rq-test"})
const (
errorReason = "RevisionFailed"
errorMsgScale = "Initial scale was never achieved"
errorMsgQuota = "forbidden: exceeded quota"
revisionReason = "ProgressDeadlineExceeded"
errorReason = "RevisionFailed"
containerCreating = "ContainerCreating"
gabo1208 marked this conversation as resolved.
Show resolved Hide resolved
errorMsgScale = "Initial scale was never achieved"
errorMsgQuota = "forbidden: exceeded quota"
revisionReason = "ProgressDeadlineExceeded"
)
names := test.ResourceNames{
Service: test.ObjectNameForTest(t),
Expand Down Expand Up @@ -87,9 +88,12 @@ func TestResourceQuotaError(t *testing.T) {
if strings.Contains(cond.Message, errorMsgQuota) && cond.IsFalse() {
return true, nil
}
if cond.Reason == errorReason && cond.IsFalse() {
return true, nil
}
t.Logf("Reason: %s ; Message: %s ; Status: %s", cond.Reason, cond.Message, cond.Status)
return true, fmt.Errorf("the service %s was not marked with expected error condition (Reason=%q, Message=%q, Status=%q), but with (Reason=%q, Message=%q, Status=%q)",
names.Config, errorReason, errorMsgScale, "False", cond.Reason, cond.Message, cond.Status)
names.Config, errorReason, "", "False", cond.Reason, cond.Message, cond.Status)
}
return false, nil
}, "ContainerUnscheduleable")
Expand All @@ -113,8 +117,8 @@ func TestResourceQuotaError(t *testing.T) {
err = v1test.CheckRevisionState(clients.ServingClient, revisionName, func(r *v1.Revision) (bool, error) {
cond := r.Status.GetCondition(v1.RevisionConditionReady)
if cond != nil {
// Can fail with either a progress deadline exceeded error or an exceeded resource quota error
if cond.Reason == revisionReason && strings.Contains(cond.Message, errorMsgScale) {
// Can fail with either a progress deadline exceeded error, container creating status, waiting for the revision to fail or an exceeded resource quota error
if (cond.Reason == revisionReason || cond.Reason == containerCreating) && cond.IsFalse() {
return true, nil
}
if strings.Contains(cond.Message, errorMsgQuota) && cond.IsFalse() {
Expand Down