Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for daemonSet and statefulSet resources in charts #405

Merged
merged 6 commits into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion internal/chartverifier/checks/charttesting.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ func testRelease(
release, namespace, releaseSelector string,
cleanupHelmTests bool,
) error {
if err := kubectl.WaitForDeployments(ctx, namespace, releaseSelector); err != nil {
if err := kubectl.WaitForWorkloadResources(ctx, namespace, releaseSelector); err != nil {
return err
}
if err := helm.Test(ctx, namespace, release); err != nil {
Expand Down
108 changes: 79 additions & 29 deletions internal/tool/kubectl.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ var content embed.FS
var (
kubeOpenShiftVersionMap map[string]string
listDeployments = getDeploymentsList
listDaemonSets = getDaemonSetsList
listStatefulSets = getStatefulSetsList
latestKubeVersion *semver.Version
)

Expand All @@ -41,9 +43,10 @@ type versionMapping struct {
OcpVersion string `yaml:"ocp-version"`
}

type deploymentNotReady struct {
Name string
Unavailable int32
type workloadNotReady struct {
ResourceType string
Name string
Unavailable int32
}

func init() {
Expand Down Expand Up @@ -94,49 +97,80 @@ func NewKubectl(kubeConfig clientcmd.ClientConfig) (*Kubectl, error) {
return kubectl, nil
}

func (k Kubectl) WaitForDeployments(context context.Context, namespace string, selector string) error {
func (k Kubectl) WaitForWorkloadResources(context context.Context, namespace string, selector string) error {
mgoerens marked this conversation as resolved.
Show resolved Hide resolved
deadline, _ := context.Deadline()
unavailableDeployments := []deploymentNotReady{{Name: "none", Unavailable: 1}}
getDeploymentsError := ""

utils.LogInfo(fmt.Sprintf("Start wait for deployments. --timeout time left: %s ", time.Until(deadline).String()))

for deadline.After(time.Now()) && len(unavailableDeployments) > 0 {
unavailableDeployments = []deploymentNotReady{}
deployments, err := listDeployments(k, context, namespace, selector)
if err != nil {
unavailableDeployments = []deploymentNotReady{{Name: "none", Unavailable: 1}}
getDeploymentsError = fmt.Sprintf("error getting deployments from namespace %s : %v", namespace, err)
utils.LogWarning(getDeploymentsError)
unavailableWorkloadResources := []workloadNotReady{{Name: "none", Unavailable: 1}}
getWorkloadResourceError := ""

// Loop until timeout reached or all requested pods are available
utils.LogInfo(fmt.Sprintf("Start wait for workloads resources. --timeout time left: %s ", time.Until(deadline).String()))
for deadline.After(time.Now()) && len(unavailableWorkloadResources) > 0 {
unavailableWorkloadResources = []workloadNotReady{}

deployments, errDeployments := listDeployments(k, context, namespace, selector)
daemonSets, errDaemonSets := listDaemonSets(k, context, namespace, selector)
statefulSets, errStatefulSets := listStatefulSets(k, context, namespace, selector)
Comment on lines +113 to +115
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I'm understanding that we check for these three resources regardless of whether the deployed chart uses them? The idea being that charts that don't use statefulsets/daemonsets would just auto-pass the "readiness" check because none of those workloads exist. Is that a correct understanding?

That's fine, I just want to make sure. Fundamentally, this check seems to be flawed because charts can deploy all sorts of resources. But at the end of the day, this is a better check than nothing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, charts that don't try do deploy certain resources will just auto-pass since we're not expecting them to be there.

As far as checking for other resources I could add in jobs at least because it seems like the only one we're missing from the list of default workload resources (https://kubernetes.io/docs/concepts/workloads/) but didn't know if i should since it wasn't requested and didn't want to go overboard. Happy to make another PR for that though if it's needed


// Else/If block will handle errors from API requests for each
// resource type or inspect the resources that are successfully returned
if errDeployments != nil {
unavailableWorkloadResources = []workloadNotReady{{Name: "none", ResourceType: "Deployment", Unavailable: 1}}
getWorkloadResourceError = fmt.Sprintf("error getting deployments from namespace %s : %v", namespace, errDeployments)
utils.LogWarning(getWorkloadResourceError)
time.Sleep(time.Second)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the purpose of sleeping here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was actually left over from what the function previously did.
I believe the idea is to avoid excessive calls and rate limiting since it'll immediately hit the API again after these failures. Happy to remove it if this is a non-issue but trying to find the default rate limit info first

} else if errDaemonSets != nil {
unavailableWorkloadResources = []workloadNotReady{{Name: "none", ResourceType: "DaemonSet", Unavailable: 1}}
getWorkloadResourceError = fmt.Sprintf("error getting daemon sets from namespace %s : %v", namespace, errDaemonSets)
utils.LogWarning(getWorkloadResourceError)
time.Sleep(time.Second)
} else if errStatefulSets != nil {
unavailableWorkloadResources = []workloadNotReady{{Name: "none", ResourceType: "StatefulSet", Unavailable: 1}}
getWorkloadResourceError = fmt.Sprintf("error getting stateful sets from namespace %s : %v", namespace, errStatefulSets)
utils.LogWarning(getWorkloadResourceError)
time.Sleep(time.Second)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A few things for this block.

  1. This feels like we should be dropping the else if component. Running through each if block separately should work here.

  2. There's a good deal of repetition in what's being executed, indicating we could probably just refactor this to remove it and, in turn, be more concise.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

refactored to reduce repetition

} else {
getDeploymentsError = ""
getWorkloadResourceError = ""
// Check the number of unavailable replicas for each workload type
for _, deployment := range deployments {
// Just after rollout, pods from the previous deployment revision may still be in a
// terminating state.
if deployment.Status.UnavailableReplicas > 0 {
unavailableDeployments = append(unavailableDeployments, deploymentNotReady{Name: deployment.Name, Unavailable: deployment.Status.UnavailableReplicas})
unavailableWorkloadResources = append(unavailableWorkloadResources, workloadNotReady{Name: deployment.Name, ResourceType: "Deployment", Unavailable: deployment.Status.UnavailableReplicas})
}
}
for _, daemonSet := range daemonSets {
if daemonSet.Status.NumberUnavailable > 0 {
unavailableWorkloadResources = append(unavailableWorkloadResources, workloadNotReady{Name: daemonSet.Name, ResourceType: "DaemonSet", Unavailable: daemonSet.Status.NumberUnavailable})
}
}
for _, statefulSet := range statefulSets {
// StatefulSet doesn't report unavailable replicas so it is calculated here
unavailableReplicas := statefulSet.Status.Replicas - statefulSet.Status.AvailableReplicas
if unavailableReplicas > 0 {
unavailableWorkloadResources = append(unavailableWorkloadResources, workloadNotReady{Name: statefulSet.Name, ResourceType: "StatefulSet", Unavailable: unavailableReplicas})
}
}
if len(unavailableDeployments) > 0 {
utils.LogInfo(fmt.Sprintf("Wait for %d deployments:", len(unavailableDeployments)))
for _, unavailableDeployment := range unavailableDeployments {
utils.LogInfo(fmt.Sprintf(" - %s with %d unavailable replicas", unavailableDeployment.Name, unavailableDeployment.Unavailable))

// If any pods are unavailable report it and sleep until the next loop
// If everythign is available exit the loop
if len(unavailableWorkloadResources) > 0 {
utils.LogInfo(fmt.Sprintf("Wait for %d workload resources:", len(unavailableWorkloadResources)))
for _, unavailableWorkloadResource := range unavailableWorkloadResources {
utils.LogInfo(fmt.Sprintf(" - %s %s with %d unavailable pods", unavailableWorkloadResource.ResourceType, unavailableWorkloadResource.Name, unavailableWorkloadResource.Unavailable))
}
time.Sleep(time.Second)
} else {
utils.LogInfo(fmt.Sprintf("Finish wait for deployments, --timeout time left %s", time.Until(deadline).String()))
utils.LogInfo(fmt.Sprintf("Finish wait for workload resources, --timeout time left %s", time.Until(deadline).String()))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. The structure here feels like we can abstract out the parsing of these into separate functions and call them.
  2. Perhaps there's an opportunity to use goroutines here to let the parsing of statefulsets, deployments, and daemonsets work independently of each other? WDYT. I think it could do away with the sleeps I'm seeing.

}
}
}

if len(getDeploymentsError) > 0 {
errorMsg := fmt.Sprintf("Time out retrying after %s", getDeploymentsError)
// Any errors or resources that are still unavailable returns an error at this point
if getWorkloadResourceError != "" {
errorMsg := fmt.Sprintf("Time out retrying after %s", getWorkloadResourceError)
utils.LogError(errorMsg)
return errors.New(errorMsg)
}
if len(unavailableDeployments) > 0 {
errorMsg := "error unavailable deployments, timeout has expired, please consider increasing the timeout using the chart-verifier --timeout flag"
if len(unavailableWorkloadResources) > 0 {
errorMsg := "error unavailable workload resources, timeout has expired, please consider increasing the timeout using the chart-verifier --timeout flag"
utils.LogError(errorMsg)
return errors.New(errorMsg)
}
Expand Down Expand Up @@ -182,6 +216,22 @@ func getDeploymentsList(k Kubectl, context context.Context, namespace string, se
return list.Items, err
}

func getStatefulSetsList(k Kubectl, context context.Context, namespace string, selector string) ([]v1.StatefulSet, error) {
list, err := k.clientset.AppsV1().StatefulSets(namespace).List(context, metav1.ListOptions{LabelSelector: selector})
if err != nil {
return nil, err
}
return list.Items, err
}

func getDaemonSetsList(k Kubectl, context context.Context, namespace string, selector string) ([]v1.DaemonSet, error) {
list, err := k.clientset.AppsV1().DaemonSets(namespace).List(context, metav1.ListOptions{LabelSelector: selector})
if err != nil {
return nil, err
}
return list.Items, err
}

func GetLatestKubeVersion() string {
return latestKubeVersion.String()
}
Loading