From 8183a8f25c25d9bafdfa410ec4fb87c87cefcfff Mon Sep 17 00:00:00 2001 From: Yash Mehrotra Date: Wed, 12 Jul 2023 16:09:16 +0530 Subject: [PATCH] fix: deletion logic for transformed checks --- checks/common.go | 1 + pkg/api.go | 1 + pkg/cache/postgres.go | 9 +++++++-- pkg/controllers/canary_controller.go | 30 +--------------------------- pkg/db/canary.go | 30 +++++++++++++++++++++++----- pkg/jobs/canary/canary_jobs.go | 25 +++++++++++++---------- pkg/metrics/metrics.go | 4 +++- 7 files changed, 53 insertions(+), 47 deletions(-) diff --git a/checks/common.go b/checks/common.go index 4d4708bcf..ac57adc6e 100644 --- a/checks/common.go +++ b/checks/common.go @@ -96,6 +96,7 @@ func transform(ctx *context.Context, in *pkg.CheckResult) ([]*pkg.CheckResult, e // We use this label to set the transformed column to true // This label is used and then removed in pkg.FromV1 function r.Canary.Labels["transformed"] = "true" //nolint:goconst + r.Transformed = true results = append(results, &r) } diff --git a/pkg/api.go b/pkg/api.go index 3a0b4aadb..95de1ee12 100644 --- a/pkg/api.go +++ b/pkg/api.go @@ -401,6 +401,7 @@ type CheckResult struct { Message string Error string Metrics []Metric + Transformed bool // Check is the configuration Check external.Check Canary v1.Canary diff --git a/pkg/cache/postgres.go b/pkg/cache/postgres.go index 3e1d9c0cb..7500e3108 100644 --- a/pkg/cache/postgres.go +++ b/pkg/cache/postgres.go @@ -7,6 +7,7 @@ import ( "github.com/flanksource/canary-checker/pkg" "github.com/flanksource/canary-checker/pkg/db" + "github.com/flanksource/commons/collections" "github.com/flanksource/commons/logger" "github.com/flanksource/duty" "github.com/flanksource/duty/models" @@ -15,7 +16,11 @@ import ( "gorm.io/gorm/clause" ) -var PostgresCache = &postgresCache{} +var ( + PostgresCache = &postgresCache{} + + CheckTypesToDeleteOnTransformation = []string{"alertmanager"} +) type postgresCache struct { *pgxpool.Pool @@ -38,7 +43,7 @@ func (c *postgresCache) Add(check pkg.Check, statii ...pkg.CheckStatus) []string checkID, err := c.AddCheckFromStatus(check, status) if err != nil { logger.Errorf("error persisting check with canary %s: %v", check.CanaryID, err) - } else { + } else if collections.Contains(CheckTypesToDeleteOnTransformation, check.Type) { checkIDs = append(checkIDs, checkID.String()) } c.AddCheckStatus(check, status) diff --git a/pkg/controllers/canary_controller.go b/pkg/controllers/canary_controller.go index 219570518..425ab1cad 100644 --- a/pkg/controllers/canary_controller.go +++ b/pkg/controllers/canary_controller.go @@ -21,9 +21,6 @@ import ( "time" "github.com/flanksource/canary-checker/pkg/db" - "github.com/flanksource/canary-checker/pkg/metrics" - "github.com/flanksource/canary-checker/pkg/utils" - "github.com/google/uuid" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -98,7 +95,7 @@ func (r *CanaryReconciler) Reconcile(ctx gocontext.Context, req ctrl.Request) (c return ctrl.Result{}, r.Update(ctx, canary) } - c, checks, changed, err := db.PersistCanary(*canary, "kubernetes/"+string(canary.ObjectMeta.UID)) + _, checks, changed, err := db.PersistCanary(*canary, "kubernetes/"+string(canary.ObjectMeta.UID)) if err != nil { return ctrl.Result{Requeue: true}, err } @@ -108,22 +105,6 @@ func (r *CanaryReconciler) Reconcile(ctx gocontext.Context, req ctrl.Request) (c checkIDs = append(checkIDs, id) } - dbCheckIds := getCheckIDsForCanary(c.ID) - // delete checks which are no longer in the canary - // fetching the checkIds present in the db but not present on the canary - toRemoveCheckIDs := utils.SetDifference(dbCheckIds, checkIDs) - // delete the check and update the cron for now - if len(toRemoveCheckIDs) > 0 { - logger.Info("removing checks from canary", "checkIDs", toRemoveCheckIDs) - if err := db.DeleteChecks(toRemoveCheckIDs); err != nil { - logger.Error(err, "failed to delete checks") - } - metrics.UnregisterGauge(toRemoveCheckIDs) - if err := canaryJobs.SyncCanaryJob(*canary); err != nil { - logger.Error(err, "failed to sync canary job") - } - } - // Sync jobs if canary is created or updated if canary.Generation == 1 || changed { if err := canaryJobs.SyncCanaryJob(*canary); err != nil { @@ -202,12 +183,3 @@ func (r *CanaryReconciler) includeNamespace(namespace string) bool { } return false } - -func getCheckIDsForCanary(canaryID uuid.UUID) []string { - checks, _ := db.GetAllActiveChecksForCanary(canaryID) - var checkIDs []string - for _, check := range checks { - checkIDs = append(checkIDs, check.ID.String()) - } - return checkIDs -} diff --git a/pkg/db/canary.go b/pkg/db/canary.go index 206b222a5..8334add0c 100644 --- a/pkg/db/canary.go +++ b/pkg/db/canary.go @@ -96,15 +96,16 @@ func PersistCheck(check pkg.Check, canaryID uuid.UUID) (uuid.UUID, error) { "deleted_at": nil, } + var _check pkg.Check tx := Gorm.Clauses(clause.OnConflict{ Columns: []clause.Column{{Name: "canary_id"}, {Name: "type"}, {Name: "name"}, {Name: "agent_id"}}, DoUpdates: clause.Assignments(assignments), - }).Create(&check) + }, clause.Returning{Columns: []clause.Column{{Name: "id"}}}).Create(&check).Scan(&_check) if tx.Error != nil { return uuid.Nil, tx.Error } - return check.ID, nil + return _check.ID, nil } func GetTransformedCheckIDs(canaryID string) ([]string, error) { @@ -170,8 +171,8 @@ func DeleteCheckComponentRelationshipsForCanary(id string, deleteTime time.Time) return Gorm.Table("check_component_relationships").Where("canary_id = ?", id).UpdateColumn("deleted_at", deleteTime).Error } -func DeleteChecks(id []string) error { - return Gorm.Table("checks").Where("id IN (?)", id).UpdateColumn("deleted_at", time.Now()).Error +func DeleteNonTransformedChecks(id []string) error { + return Gorm.Table("checks").Where("id IN (?) and transformed = false", id).UpdateColumn("deleted_at", time.Now()).Error } func GetCanary(id string) (pkg.Canary, error) { @@ -272,8 +273,16 @@ func PersistCanary(canary v1.Canary, source string) (*pkg.Canary, map[string]str } } - var checks = make(map[string]string) + var oldCheckIDs []string + err = Gorm. + Table("checks"). + Select("id"). + Where("canary_id = ? AND deleted_at IS NULL AND transformed = false", model.ID). + Scan(&oldCheckIDs). + Error + var checks = make(map[string]string) + var newCheckIDs []string for _, config := range canary.Spec.GetAllChecks() { check := pkg.FromExternalCheck(model, config) // not creating the new check if already exists in the status @@ -286,9 +295,20 @@ func PersistCanary(canary v1.Canary, source string) (*pkg.Canary, map[string]str if err != nil { logger.Errorf("error persisting check", err) } + newCheckIDs = append(newCheckIDs, id.String()) checks[config.GetName()] = id.String() } + // Delete non-transformed checks which are no longer in the canary + // fetching the checkIds present in the db but not present on the canary + checkIDsToRemove := utils.SetDifference(oldCheckIDs, newCheckIDs) + if len(checkIDsToRemove) > 0 { + logger.Infof("removing checks from canary:%s with ids %v", model.ID, checkIDsToRemove) + if err := DeleteNonTransformedChecks(checkIDsToRemove); err != nil { + logger.Errorf("failed to delete non transformed checks: %v", err) + } + metrics.UnregisterGauge(checkIDsToRemove) + } return &model, checks, changed, nil } diff --git a/pkg/jobs/canary/canary_jobs.go b/pkg/jobs/canary/canary_jobs.go index cb89107b1..dcdc445e5 100644 --- a/pkg/jobs/canary/canary_jobs.go +++ b/pkg/jobs/canary/canary_jobs.go @@ -53,9 +53,8 @@ type CanaryJob struct { Kubernetes kubernetes.Interface Canary v1.Canary DBCanary pkg.Canary - // model pkg.Canary - LogPass bool - LogFail bool + LogPass bool + LogFail bool } func (job CanaryJob) GetNamespacedName() types.NamespacedName { @@ -94,22 +93,24 @@ func (job CanaryJob) Run() { } // Get transformed checks before and after, and then delete the olds ones that are not in new set - existingTransformedChecks, _ := db.GetTransformedCheckIDs(job.Canary.GetPersistedID()) - var newChecksCreated []string + existingTransformedChecks, _ := db.GetTransformedCheckIDs(job.DBCanary.ID.String()) + var transformedChecksCreated []string results := checks.RunChecks(job.NewContext()) for _, result := range results { if job.LogPass && result.Pass || job.LogFail && !result.Pass { logger.Infof(result.String()) } - checkIDsAdded := cache.PostgresCache.Add(pkg.FromV1(result.Canary, result.Check), pkg.FromResult(*result)) - newChecksCreated = append(newChecksCreated, checkIDsAdded...) + transformedChecksAdded := cache.PostgresCache.Add(pkg.FromV1(result.Canary, result.Check), pkg.FromResult(*result)) + transformedChecksCreated = append(transformedChecksCreated, transformedChecksAdded...) } job.updateStatusAndEvent(results) // Checks which are not present now should be marked as healthy - checksToMarkHealthy := utils.SetDifference(existingTransformedChecks, newChecksCreated) - if err := db.UpdateChecksStatus(checksToMarkHealthy, models.CheckStatusHealthy); err != nil { - logger.Errorf("error deleting transformed checks for canary %s: %v", job.Canary.GetPersistedID(), err) + checksToMarkHealthy := utils.SetDifference(existingTransformedChecks, transformedChecksCreated) + if len(checksToMarkHealthy) > 0 && len(transformedChecksCreated) > 0 { + if err := db.UpdateChecksStatus(checksToMarkHealthy, models.CheckStatusHealthy); err != nil { + logger.Errorf("error deleting transformed checks for canary %s: %v", job.Canary.GetPersistedID(), err) + } } // Update last runtime map @@ -265,6 +266,10 @@ func SyncCanaryJob(canary v1.Canary) error { return err } + if dbCanary.Name != "kubernetes-bundle" { + return nil + } + job := CanaryJob{ Client: Kommons, Kubernetes: Kubernetes, diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 89db1bb9f..9f78977cd 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -140,7 +140,9 @@ func Record(canary v1.Canary, result *pkg.CheckResult) (_uptime pkg.Uptime, _lat return _uptime, _latency } if canary.GetCheckID(result.Check.GetName()) == "" { - logger.Warnf("%s/%s/%s returned a result for a check that does not exist", canary.Namespace, canary.Name, result.Check.GetName()) + if val := result.Canary.Labels["transformed"]; val != "true" { + logger.Warnf("%s/%s/%s returned a result for a check that does not exist", canary.Namespace, canary.Name, result.Check.GetName()) + } return _uptime, _latency } canaryNamespace := canary.Namespace