Skip to content

Commit

Permalink
fix: deletion logic for transformed checks
Browse files Browse the repository at this point in the history
  • Loading branch information
yashmehrotra committed Jul 12, 2023
1 parent 0c79e5e commit 8183a8f
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 47 deletions.
1 change: 1 addition & 0 deletions checks/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ func transform(ctx *context.Context, in *pkg.CheckResult) ([]*pkg.CheckResult, e
// We use this label to set the transformed column to true
// This label is used and then removed in pkg.FromV1 function
r.Canary.Labels["transformed"] = "true" //nolint:goconst
r.Transformed = true
results = append(results, &r)
}

Expand Down
1 change: 1 addition & 0 deletions pkg/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@ type CheckResult struct {
Message string
Error string
Metrics []Metric
Transformed bool
// Check is the configuration
Check external.Check
Canary v1.Canary
Expand Down
9 changes: 7 additions & 2 deletions pkg/cache/postgres.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

"github.com/flanksource/canary-checker/pkg"
"github.com/flanksource/canary-checker/pkg/db"
"github.com/flanksource/commons/collections"
"github.com/flanksource/commons/logger"
"github.com/flanksource/duty"
"github.com/flanksource/duty/models"
Expand All @@ -15,7 +16,11 @@ import (
"gorm.io/gorm/clause"
)

var PostgresCache = &postgresCache{}
var (
PostgresCache = &postgresCache{}

CheckTypesToDeleteOnTransformation = []string{"alertmanager"}
)

type postgresCache struct {
*pgxpool.Pool
Expand All @@ -38,7 +43,7 @@ func (c *postgresCache) Add(check pkg.Check, statii ...pkg.CheckStatus) []string
checkID, err := c.AddCheckFromStatus(check, status)
if err != nil {
logger.Errorf("error persisting check with canary %s: %v", check.CanaryID, err)
} else {
} else if collections.Contains(CheckTypesToDeleteOnTransformation, check.Type) {
checkIDs = append(checkIDs, checkID.String())
}
c.AddCheckStatus(check, status)
Expand Down
30 changes: 1 addition & 29 deletions pkg/controllers/canary_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ import (
"time"

"github.com/flanksource/canary-checker/pkg/db"
"github.com/flanksource/canary-checker/pkg/metrics"
"github.com/flanksource/canary-checker/pkg/utils"
"github.com/google/uuid"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

Expand Down Expand Up @@ -98,7 +95,7 @@ func (r *CanaryReconciler) Reconcile(ctx gocontext.Context, req ctrl.Request) (c
return ctrl.Result{}, r.Update(ctx, canary)
}

c, checks, changed, err := db.PersistCanary(*canary, "kubernetes/"+string(canary.ObjectMeta.UID))
_, checks, changed, err := db.PersistCanary(*canary, "kubernetes/"+string(canary.ObjectMeta.UID))
if err != nil {
return ctrl.Result{Requeue: true}, err
}
Expand All @@ -108,22 +105,6 @@ func (r *CanaryReconciler) Reconcile(ctx gocontext.Context, req ctrl.Request) (c
checkIDs = append(checkIDs, id)

Check failure on line 105 in pkg/controllers/canary_controller.go

View workflow job for this annotation

GitHub Actions / lint

SA4010: this result of append is never used, except maybe in other appends (staticcheck)
}

dbCheckIds := getCheckIDsForCanary(c.ID)
// delete checks which are no longer in the canary
// fetching the checkIds present in the db but not present on the canary
toRemoveCheckIDs := utils.SetDifference(dbCheckIds, checkIDs)
// delete the check and update the cron for now
if len(toRemoveCheckIDs) > 0 {
logger.Info("removing checks from canary", "checkIDs", toRemoveCheckIDs)
if err := db.DeleteChecks(toRemoveCheckIDs); err != nil {
logger.Error(err, "failed to delete checks")
}
metrics.UnregisterGauge(toRemoveCheckIDs)
if err := canaryJobs.SyncCanaryJob(*canary); err != nil {
logger.Error(err, "failed to sync canary job")
}
}

// Sync jobs if canary is created or updated
if canary.Generation == 1 || changed {
if err := canaryJobs.SyncCanaryJob(*canary); err != nil {
Expand Down Expand Up @@ -202,12 +183,3 @@ func (r *CanaryReconciler) includeNamespace(namespace string) bool {
}
return false
}

func getCheckIDsForCanary(canaryID uuid.UUID) []string {
checks, _ := db.GetAllActiveChecksForCanary(canaryID)
var checkIDs []string
for _, check := range checks {
checkIDs = append(checkIDs, check.ID.String())
}
return checkIDs
}
30 changes: 25 additions & 5 deletions pkg/db/canary.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,16 @@ func PersistCheck(check pkg.Check, canaryID uuid.UUID) (uuid.UUID, error) {
"deleted_at": nil,
}

var _check pkg.Check
tx := Gorm.Clauses(clause.OnConflict{
Columns: []clause.Column{{Name: "canary_id"}, {Name: "type"}, {Name: "name"}, {Name: "agent_id"}},
DoUpdates: clause.Assignments(assignments),
}).Create(&check)
}, clause.Returning{Columns: []clause.Column{{Name: "id"}}}).Create(&check).Scan(&_check)
if tx.Error != nil {
return uuid.Nil, tx.Error
}

return check.ID, nil
return _check.ID, nil
}

func GetTransformedCheckIDs(canaryID string) ([]string, error) {
Expand Down Expand Up @@ -170,8 +171,8 @@ func DeleteCheckComponentRelationshipsForCanary(id string, deleteTime time.Time)
return Gorm.Table("check_component_relationships").Where("canary_id = ?", id).UpdateColumn("deleted_at", deleteTime).Error
}

func DeleteChecks(id []string) error {
return Gorm.Table("checks").Where("id IN (?)", id).UpdateColumn("deleted_at", time.Now()).Error
func DeleteNonTransformedChecks(id []string) error {
return Gorm.Table("checks").Where("id IN (?) and transformed = false", id).UpdateColumn("deleted_at", time.Now()).Error
}

func GetCanary(id string) (pkg.Canary, error) {
Expand Down Expand Up @@ -272,8 +273,16 @@ func PersistCanary(canary v1.Canary, source string) (*pkg.Canary, map[string]str
}
}

var checks = make(map[string]string)
var oldCheckIDs []string
err = Gorm.

Check failure on line 277 in pkg/db/canary.go

View workflow job for this annotation

GitHub Actions / lint

ineffectual assignment to err (ineffassign)
Table("checks").
Select("id").
Where("canary_id = ? AND deleted_at IS NULL AND transformed = false", model.ID).
Scan(&oldCheckIDs).
Error

var checks = make(map[string]string)
var newCheckIDs []string
for _, config := range canary.Spec.GetAllChecks() {
check := pkg.FromExternalCheck(model, config)
// not creating the new check if already exists in the status
Expand All @@ -286,9 +295,20 @@ func PersistCanary(canary v1.Canary, source string) (*pkg.Canary, map[string]str
if err != nil {
logger.Errorf("error persisting check", err)
}
newCheckIDs = append(newCheckIDs, id.String())
checks[config.GetName()] = id.String()
}

// Delete non-transformed checks which are no longer in the canary
// fetching the checkIds present in the db but not present on the canary
checkIDsToRemove := utils.SetDifference(oldCheckIDs, newCheckIDs)
if len(checkIDsToRemove) > 0 {
logger.Infof("removing checks from canary:%s with ids %v", model.ID, checkIDsToRemove)
if err := DeleteNonTransformedChecks(checkIDsToRemove); err != nil {
logger.Errorf("failed to delete non transformed checks: %v", err)
}
metrics.UnregisterGauge(checkIDsToRemove)
}
return &model, checks, changed, nil
}

Expand Down
25 changes: 15 additions & 10 deletions pkg/jobs/canary/canary_jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,8 @@ type CanaryJob struct {
Kubernetes kubernetes.Interface
Canary v1.Canary
DBCanary pkg.Canary
// model pkg.Canary
LogPass bool
LogFail bool
LogPass bool
LogFail bool
}

func (job CanaryJob) GetNamespacedName() types.NamespacedName {
Expand Down Expand Up @@ -94,22 +93,24 @@ func (job CanaryJob) Run() {
}

// Get transformed checks before and after, and then delete the olds ones that are not in new set
existingTransformedChecks, _ := db.GetTransformedCheckIDs(job.Canary.GetPersistedID())
var newChecksCreated []string
existingTransformedChecks, _ := db.GetTransformedCheckIDs(job.DBCanary.ID.String())
var transformedChecksCreated []string
results := checks.RunChecks(job.NewContext())
for _, result := range results {
if job.LogPass && result.Pass || job.LogFail && !result.Pass {
logger.Infof(result.String())
}
checkIDsAdded := cache.PostgresCache.Add(pkg.FromV1(result.Canary, result.Check), pkg.FromResult(*result))
newChecksCreated = append(newChecksCreated, checkIDsAdded...)
transformedChecksAdded := cache.PostgresCache.Add(pkg.FromV1(result.Canary, result.Check), pkg.FromResult(*result))
transformedChecksCreated = append(transformedChecksCreated, transformedChecksAdded...)
}
job.updateStatusAndEvent(results)

// Checks which are not present now should be marked as healthy
checksToMarkHealthy := utils.SetDifference(existingTransformedChecks, newChecksCreated)
if err := db.UpdateChecksStatus(checksToMarkHealthy, models.CheckStatusHealthy); err != nil {
logger.Errorf("error deleting transformed checks for canary %s: %v", job.Canary.GetPersistedID(), err)
checksToMarkHealthy := utils.SetDifference(existingTransformedChecks, transformedChecksCreated)
if len(checksToMarkHealthy) > 0 && len(transformedChecksCreated) > 0 {
if err := db.UpdateChecksStatus(checksToMarkHealthy, models.CheckStatusHealthy); err != nil {
logger.Errorf("error deleting transformed checks for canary %s: %v", job.Canary.GetPersistedID(), err)
}
}

// Update last runtime map
Expand Down Expand Up @@ -265,6 +266,10 @@ func SyncCanaryJob(canary v1.Canary) error {
return err
}

if dbCanary.Name != "kubernetes-bundle" {
return nil
}

job := CanaryJob{
Client: Kommons,
Kubernetes: Kubernetes,
Expand Down
4 changes: 3 additions & 1 deletion pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,9 @@ func Record(canary v1.Canary, result *pkg.CheckResult) (_uptime pkg.Uptime, _lat
return _uptime, _latency
}
if canary.GetCheckID(result.Check.GetName()) == "" {
logger.Warnf("%s/%s/%s returned a result for a check that does not exist", canary.Namespace, canary.Name, result.Check.GetName())
if val := result.Canary.Labels["transformed"]; val != "true" {
logger.Warnf("%s/%s/%s returned a result for a check that does not exist", canary.Namespace, canary.Name, result.Check.GetName())
}
return _uptime, _latency
}
canaryNamespace := canary.Namespace
Expand Down

0 comments on commit 8183a8f

Please sign in to comment.