@@ -2,28 +2,45 @@ package healthcheck
22
33import (
44 "context"
5+ "encoding/json"
56 "net/http"
67 "sync/atomic"
78 "time"
89
9- "github.com/numberly/vault-db-injector/pkg/leadership"
10+ "github.com/numberly/vault-db-injector/pkg/config"
11+ "github.com/numberly/vault-db-injector/pkg/k8s"
1012 "github.com/numberly/vault-db-injector/pkg/logger"
13+ "github.com/numberly/vault-db-injector/pkg/vault"
1114)
1215
16+ type HealthStatus struct {
17+ Status string `json:"status"`
18+ Kubernetes * ServiceHealth `json:"kubernetes,omitempty"`
19+ Vault * ServiceHealth `json:"vault,omitempty"`
20+ Timestamp string `json:"timestamp"`
21+ }
22+
23+ type ServiceHealth struct {
24+ Status string `json:"status"`
25+ Message string `json:"message,omitempty"`
26+ }
27+
1328type HealthChecker interface {
1429 RegisterHandlers ()
15- Start () error
30+ Start (context. Context , chan struct {} ) error
1631}
1732
1833type Service struct {
19- isReady * atomic.Value
20- server * http.Server
21- log logger.Logger
34+ isReady * atomic.Value
35+ server * http.Server
36+ log logger.Logger
37+ cfg * config.Config
38+ k8sClient k8s.ClientInterface
2239}
2340
24- func NewService () * Service {
41+ func NewService (cfg * config. Config ) * Service {
2542 isReady := & atomic.Value {}
26- isReady .Store (true ) // Initialize as ready
43+ isReady .Store (true )
2744
2845 return & Service {
2946 isReady : isReady ,
@@ -32,62 +49,139 @@ func NewService() *Service {
3249 ReadTimeout : 10 * time .Second ,
3350 WriteTimeout : 10 * time .Second ,
3451 },
35- log : logger .GetLogger (),
52+ log : logger .GetLogger (),
53+ cfg : cfg ,
54+ k8sClient : k8s .NewClient (),
3655 }
3756}
3857
39- // RegisterHandlers sets up the HTTP endpoints for the health check service.
4058func (s * Service ) RegisterHandlers () {
41- http .HandleFunc ("/healthz" , s .healthzHandler )
59+ http .HandleFunc ("/healthz" , s .healthHandler )
4260 http .HandleFunc ("/readyz" , s .readyzHandler ())
43- hcle := leadership .NewHealthChecker ()
44- hcle .SetupLivenessEndpoint ()
4561}
4662
47- // Start begins listening for health check requests.
63+ func (s * Service ) checkKubernetesHealth () * ServiceHealth {
64+ _ , err := s .k8sClient .GetKubernetesClient ()
65+ if err != nil {
66+ return & ServiceHealth {
67+ Status : "unhealthy" ,
68+ Message : "Failed to connect to Kubernetes: " + err .Error (),
69+ }
70+ }
71+ return & ServiceHealth {
72+ Status : "healthy" ,
73+ }
74+ }
75+
76+ func (s * Service ) checkVaultHealth (ctx context.Context ) * ServiceHealth {
77+ k8sClient := k8s .NewClient ()
78+ tok , err := k8sClient .GetServiceAccountToken ()
79+ if err != nil {
80+ return & ServiceHealth {
81+ Status : "unhealthy" ,
82+ Message : "Failed to get ServiceAccount token: " + err .Error (),
83+ }
84+ }
85+
86+ vaultConn := vault .NewConnector (s .cfg .VaultAddress , s .cfg .VaultAuthPath , s .cfg .KubeRole , "random" , "random" , tok , s .cfg .VaultRateLimit )
87+
88+ if err := vaultConn .CheckHealth (ctx ); err != nil {
89+ return & ServiceHealth {
90+ Status : "unhealthy" ,
91+ Message : err .Error (),
92+ }
93+ }
94+
95+ return & ServiceHealth {
96+ Status : "healthy" ,
97+ }
98+ }
99+
100+ func (s * Service ) healthHandler (w http.ResponseWriter , r * http.Request ) {
101+ ctx := r .Context ()
102+ health := HealthStatus {
103+ Timestamp : time .Now ().UTC ().Format (time .RFC3339 ),
104+ }
105+
106+ // Check both services
107+ k8sHealth := s .checkKubernetesHealth ()
108+ vaultHealth := s .checkVaultHealth (ctx )
109+
110+ health .Kubernetes = k8sHealth
111+ health .Vault = vaultHealth
112+
113+ w .Header ().Set ("Content-Type" , "application/json" )
114+
115+ if k8sHealth .Status == "healthy" && vaultHealth .Status == "healthy" {
116+ health .Status = "healthy"
117+ w .WriteHeader (http .StatusOK )
118+ } else {
119+ health .Status = "unhealthy"
120+ var statusCode int
121+
122+ switch {
123+ case k8sHealth .Status != "healthy" && vaultHealth .Status != "healthy" :
124+ statusCode = http .StatusServiceUnavailable
125+ case k8sHealth .Status != "healthy" :
126+ statusCode = http .StatusBadGateway
127+ case vaultHealth .Status != "healthy" :
128+ statusCode = http .StatusFailedDependency
129+ }
130+
131+ w .WriteHeader (statusCode )
132+ }
133+
134+ if err := json .NewEncoder (w ).Encode (health ); err != nil {
135+ s .log .Errorf ("Failed to encode health status: %v" , err )
136+ }
137+ }
138+
139+ func (s * Service ) readyzHandler () http.HandlerFunc {
140+ return func (w http.ResponseWriter , _ * http.Request ) {
141+ w .Header ().Set ("Content-Type" , "application/json" )
142+
143+ response := HealthStatus {
144+ Timestamp : time .Now ().UTC ().Format (time .RFC3339 ),
145+ }
146+
147+ if s .isReady == nil || ! s .isReady .Load ().(bool ) {
148+ response .Status = "not ready"
149+ w .WriteHeader (http .StatusServiceUnavailable )
150+ json .NewEncoder (w ).Encode (response )
151+ return
152+ }
153+
154+ response .Status = "ready"
155+ w .WriteHeader (http .StatusOK )
156+ json .NewEncoder (w ).Encode (response )
157+ }
158+ }
159+
48160func (s * Service ) Start (ctx context.Context , doneCh chan struct {}) error {
49- // Start the server in a separate goroutine.
50161 go func () {
51162 s .log .Info ("Listening for health checks on :8888" )
52163 if err := s .server .ListenAndServe (); err != http .ErrServerClosed {
53- // Log the error if it's not ErrServerClosed, as we expect this error on shutdown.
54164 s .log .Errorf ("Error serving health check: %v" , err )
55165 }
56- close (doneCh ) // Signal that the server has stopped.
166+ close (doneCh )
57167 }()
58168
59- // Wait for context cancellation or server stop signal.
60169 select {
61170 case <- ctx .Done ():
62- // Context was canceled, shut down the server.
63171 shutdownCtx , cancel := context .WithTimeout (context .Background (), 5 * time .Second )
64172 defer cancel ()
65173
66174 s .log .Info ("Context canceled, shutting down health check server" )
67175 if err := s .server .Shutdown (shutdownCtx ); err != nil {
68176 s .log .Errorf ("Error shutting down health check server: %v" , err )
69- return err // Return error if shutdown fails.
177+ return err
70178 }
71179 case <- doneCh :
72180 if err := s .server .Shutdown (ctx ); err != nil {
73181 s .log .Errorf ("Error shutting down health check server: %v" , err )
74- return err // Return error if shutdown fails.
182+ return err
75183 }
76184 s .log .Info ("Health check server has stopped" )
77185 }
78- return nil // Return nil as the service stopped cleanly or was shutdown on context cancel.
79- }
80-
81- func (s * Service ) healthzHandler (w http.ResponseWriter , _ * http.Request ) {
82- w .WriteHeader (http .StatusNoContent )
83- }
84-
85- func (s * Service ) readyzHandler () http.HandlerFunc {
86- return func (w http.ResponseWriter , _ * http.Request ) {
87- if s .isReady == nil || ! s .isReady .Load ().(bool ) {
88- http .Error (w , http .StatusText (http .StatusServiceUnavailable ), http .StatusServiceUnavailable )
89- return
90- }
91- w .WriteHeader (http .StatusNoContent )
92- }
186+ return nil
93187}
0 commit comments