diff --git a/canary-checker.properties b/canary-checker.properties index 7554ec754..e8295057a 100644 --- a/canary-checker.properties +++ b/canary-checker.properties @@ -6,5 +6,6 @@ # topology.runNow=true log.level.db=warn +# check.concurrency=100 # jobs.ComponentRelationshipSync.runNow=true diff --git a/cmd/operator.go b/cmd/operator.go index e9091f9a2..6e4a14c01 100644 --- a/cmd/operator.go +++ b/cmd/operator.go @@ -8,6 +8,7 @@ import ( apicontext "github.com/flanksource/canary-checker/api/context" "github.com/flanksource/canary-checker/pkg/cache" "github.com/flanksource/canary-checker/pkg/jobs" + "github.com/flanksource/canary-checker/pkg/jobs/canary" canaryJobs "github.com/flanksource/canary-checker/pkg/jobs/canary" "github.com/flanksource/canary-checker/pkg/runner" "github.com/flanksource/canary-checker/pkg/utils" @@ -96,6 +97,12 @@ func run() error { // so we use a goroutine to unblock server start // to prevent health check from failing go jobs.Start() + + // TODO: stop the cron scheduler so that no more checks are scheduled + + shutdown.AddHookWithPriority("check jobs", shutdown.PriorityJobs, func() { + canary.AcquireAllCheckLocks(ctx) + }) } go serve() diff --git a/cmd/serve.go b/cmd/serve.go index e6d94194f..f8b4e2b8f 100644 --- a/cmd/serve.go +++ b/cmd/serve.go @@ -16,6 +16,7 @@ import ( "github.com/flanksource/canary-checker/pkg/db" "github.com/flanksource/canary-checker/pkg/echo" "github.com/flanksource/canary-checker/pkg/jobs" + "github.com/flanksource/canary-checker/pkg/jobs/canary" canaryJobs "github.com/flanksource/canary-checker/pkg/jobs/canary" echov4 "github.com/labstack/echo/v4" @@ -49,6 +50,12 @@ var Serve = &cobra.Command{ canaryJobs.StartScanCanaryConfigs(apicontext.DefaultContext, dataFile, configFiles) if executor { jobs.Start() + + // TODO: stop the cron scheduler so that no more checks are scheduled + + shutdown.AddHookWithPriority("check jobs", shutdown.PriorityJobs, func() { + canary.AcquireAllCheckLocks(apicontext.DefaultContext) + }) } serve() diff --git a/go.mod b/go.mod index 4c064f6b9..3fa75d75d 100644 --- a/go.mod +++ b/go.mod @@ -119,11 +119,9 @@ require ( github.com/aws/aws-sdk-go-v2/service/ssooidc v1.26.5 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.30.5 // indirect github.com/aws/smithy-go v1.20.4 // indirect - github.com/bahlo/generic-list-go v0.2.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d // indirect github.com/bmatcuk/doublestar/v4 v4.6.1 // indirect - github.com/buger/jsonparser v1.1.1 // indirect github.com/cenkalti/backoff/v4 v4.2.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cloudflare/circl v1.3.7 // indirect @@ -200,7 +198,6 @@ require ( github.com/hirochachacha/go-smb2 v1.1.0 // indirect github.com/imdario/mergo v0.3.16 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/invopop/jsonschema v0.12.0 // indirect github.com/itchyny/gojq v0.12.16 // indirect github.com/itchyny/timefmt-go v0.1.6 // indirect github.com/jackc/pgerrcode v0.0.0-20240316143900-6e2875d9b438 // indirect @@ -275,14 +272,10 @@ require ( github.com/valyala/fasttemplate v1.2.2 // indirect github.com/vmihailenco/msgpack/v5 v5.4.1 // indirect github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect - github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect github.com/xdg-go/pbkdf2 v1.0.0 // indirect github.com/xdg-go/scram v1.1.2 // indirect github.com/xdg-go/stringprep v1.0.4 // indirect - github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect - github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect - github.com/xeipuuv/gojsonschema v1.2.0 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect github.com/xlab/treeprint v1.2.0 // indirect github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a // indirect @@ -334,7 +327,7 @@ require ( sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) -// replace github.com/flanksource/duty => ../duty +replace github.com/flanksource/duty => ../duty // replace github.com/flanksource/artifacts => ../artifacts diff --git a/go.sum b/go.sum index 504bbed3c..151249778 100644 --- a/go.sum +++ b/go.sum @@ -748,8 +748,6 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.30.5/go.mod h1:vmSqFK+BVIwVpDAGZB3Co github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= github.com/aws/smithy-go v1.20.4 h1:2HK1zBdPgRbjFOHlfeQZfpC4r72MOb9bZkiFwggKO+4= github.com/aws/smithy-go v1.20.4/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= -github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk= -github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -761,8 +759,6 @@ github.com/bmatcuk/doublestar/v4 v4.6.1 h1:FH9SifrbvJhnlQpztAx++wlkk70QBf0iBWDwN github.com/bmatcuk/doublestar/v4 v4.6.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= github.com/boombuler/barcode v1.0.1/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= -github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs= -github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/bwesterb/go-ristretto v1.2.3/go.mod h1:fUIoIZaG73pV5biE2Blr2xEzDoMj7NFEuV9ekS419A0= github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500 h1:6lhrsTEnloDPXyeZBvSYvQf8u86jbKehZPVDDlkgDl4= github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M= @@ -865,8 +861,6 @@ github.com/flanksource/artifacts v1.0.15 h1:3ImJr2y0ZCXw/QrMhfJJktAT7pYD3sMZR5ix github.com/flanksource/artifacts v1.0.15/go.mod h1:qHVCnQu5k50aWNJ5UhpcAKEl7pAzqUrFFKGSm147G70= github.com/flanksource/commons v1.30.5 h1:p8PXGiNt7SurBBh9K3ea8/ZrDvacXSYHJSs/cqJLDK8= github.com/flanksource/commons v1.30.5/go.mod h1:26zdVkmMPsGpvfcsvst5WgsqcyRL8KqFNxkumagBN+A= -github.com/flanksource/duty v1.0.727 h1:5f7mntZjtg4bvVCzLdIe2cwT8DnFtKYToq1H74T7tfo= -github.com/flanksource/duty v1.0.727/go.mod h1:sZY2NytdenrkqXoMD6Gn2C8xH6dm5HsqOeE0p74Z2VE= github.com/flanksource/gomplate/v3 v3.20.4/go.mod h1:27BNWhzzSjDed1z8YShO6W+z6G9oZXuxfNFGd/iGSdc= github.com/flanksource/gomplate/v3 v3.24.39 h1:O763lnNIcTELSMYeIO0dNDfcb3LoZvzU1fr62I4Yxqg= github.com/flanksource/gomplate/v3 v3.24.39/go.mod h1:0wY/+UPvd7CxmiTBNmzZdWIEOUZAsRkpGY1j5R711O8= @@ -1176,8 +1170,6 @@ github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/invopop/jsonschema v0.12.0 h1:6ovsNSuvn9wEQVOyc72aycBMVQFKz7cPdMJn10CvzRI= -github.com/invopop/jsonschema v0.12.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0= github.com/itchyny/gojq v0.12.13/go.mod h1:JzwzAqenfhrPUuwbmEz3nu3JQmFLlQTQMUcOdnu/Sf4= github.com/itchyny/gojq v0.12.16 h1:yLfgLxhIr/6sJNVmYfQjTIv0jGctu6/DgDoivmxTr7g= github.com/itchyny/gojq v0.12.16/go.mod h1:6abHbdC2uB9ogMS38XsErnfqJ94UlngIJGlRAIj4jTM= @@ -1580,8 +1572,6 @@ github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IU github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok= github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= -github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc= -github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw= github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= @@ -1590,12 +1580,6 @@ github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= -github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c= -github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= -github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0= -github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= -github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74= -github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= github.com/xhit/go-str2duration v1.2.0/go.mod h1:3cPSlfZlUHVlneIVfePFWcJZsuwf+P1v2SRTV4cUmp4= github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo= diff --git a/pkg/jobs/canary/sync.go b/pkg/jobs/canary/sync.go index 77f53ad6d..46a2e2bdd 100644 --- a/pkg/jobs/canary/sync.go +++ b/pkg/jobs/canary/sync.go @@ -19,8 +19,31 @@ import ( "github.com/flanksource/duty/job" "github.com/flanksource/duty/models" "github.com/robfig/cron/v3" + "golang.org/x/sync/semaphore" ) +const ( + propertyCheckConcurrency = "check.concurrency" +) + +var ( + // The maximum number of checks that can run concurrently + defaultCheckConcurrency = 50 + + // Holds in the lock for every running check. + // Can be overwritten by 'check.concurrency' property. + globalCheckSemaphore *semaphore.Weighted +) + +// AcquireAllCheckLocks blocks until the global check sempahore is fully acquired. +// +// This helps to ensure that no checks are currently running. +func AcquireAllCheckLocks(ctx context.Context) { + ctx.Logger.V(6).Infof("acquiring all check locks") + globalCheckSemaphore.Acquire(ctx, int64(ctx.Properties().Int(propertyCheckConcurrency, defaultCheckConcurrency))) + ctx.Logger.V(6).Infof("acquired all check locks") +} + var canaryJobs sync.Map const DefaultCanarySchedule = "@every 5m" @@ -140,6 +163,7 @@ func newCanaryJob(c CanaryJob) { IgnoreSuccessHistory: true, Retention: job.RetentionBalanced, ResourceID: c.DBCanary.ID.String(), + Semaphores: []*semaphore.Weighted{globalCheckSemaphore}, ResourceType: "canary", ID: fmt.Sprintf("%s/%s", c.Canary.Namespace, c.Canary.Name), Fn: c.Run, @@ -159,6 +183,10 @@ var SyncCanaryJobs = &job.Job{ Schedule: "@every 5m", Retention: job.RetentionFew, Fn: func(ctx job.JobRuntime) error { + if globalCheckSemaphore == nil { + globalCheckSemaphore = semaphore.NewWeighted(int64(ctx.Properties().Int("check.concurrency", defaultCheckConcurrency))) + } + canaries, err := db.GetAllCanariesForSync(ctx.Context, runner.WatchNamespace) if err != nil { return err