Skip to content

Commit 3da0053

Browse files
authored
CP-32687: detect missing tables during update/insert and treat as fatal (#452)
We have seen a couple of instances where the SQLite tables don't seem to be created during initialization, leading to an infinite stream of errors when we attempt to write data to the table. This patch will simply detect these errors, and change the log event from a warning to a fatal, causing the process to exit, which will give Kubernetes a chance to restart the pod and initialize a new database, this time hopefully *with* the missing table(s).
1 parent ada3383 commit 3da0053

File tree

5 files changed

+87
-6
lines changed

5 files changed

+87
-6
lines changed

app/storage/core/errors.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@ package core
55

66
import (
77
"errors"
8+
"strings"
89

10+
"github.com/mattn/go-sqlite3"
911
"gorm.io/gorm"
1012

1113
"github.com/cloudzero/cloudzero-agent/app/types"
1214
)
1315

14-
// TranslateError maps GORM errors to application-specific errors.
16+
// TranslateError maps GORM / SQLite errors to application-specific errors.
1517
// If the error does not match any known GORM errors, it returns the original error.
1618
func TranslateError(err error) error {
1719
if errors.Is(err, gorm.ErrRecordNotFound) {
@@ -62,5 +64,18 @@ func TranslateError(err error) error {
6264
case errors.Is(err, gorm.ErrCheckConstraintViolated):
6365
return types.ErrCheckConstraintViolated
6466
}
67+
68+
// Check for SQLite-specific "no such table" errors
69+
// This is a fatal error that should cause the application to exit
70+
var sqliteErr sqlite3.Error
71+
if errors.As(err, &sqliteErr) {
72+
// SQLITE_ERROR (1) with "no such table" message indicates missing table
73+
if sqliteErr.Code == sqlite3.ErrError {
74+
if strings.HasPrefix(sqliteErr.Error(), "no such table: ") {
75+
return types.ErrTableMissing
76+
}
77+
}
78+
}
79+
6580
return err
6681
}

app/storage/core/errors_test.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// SPDX-FileCopyrightText: Copyright (c) 2016-2025, CloudZero, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package core_test
5+
6+
import (
7+
"testing"
8+
9+
"github.com/cloudzero/cloudzero-agent/app/storage/core"
10+
"github.com/cloudzero/cloudzero-agent/app/storage/sqlite"
11+
"github.com/cloudzero/cloudzero-agent/app/types"
12+
"github.com/stretchr/testify/assert"
13+
)
14+
15+
func TestTranslateError_SQLiteTableMissing(t *testing.T) {
16+
// Create an in-memory SQLite database
17+
db, err := sqlite.NewSQLiteDriver(sqlite.MemorySharedCached)
18+
if err != nil {
19+
t.Fatalf("Failed to create database: %v", err)
20+
}
21+
22+
// Try to query a non-existent table to get the real SQLite error
23+
var results []struct {
24+
ID uint `gorm:"primaryKey"`
25+
Name string
26+
}
27+
err = db.Table("resource_tags").Find(&results).Error
28+
29+
// Verify we got the expected SQLite error
30+
assert.Error(t, err)
31+
assert.Contains(t, err.Error(), "no such table: resource_tags")
32+
33+
// Test our TranslateError function
34+
translatedErr := core.TranslateError(err)
35+
assert.Error(t, translatedErr)
36+
assert.Equal(t, types.ErrTableMissing, translatedErr)
37+
}
38+
39+
func TestTranslateError_UnknownError(t *testing.T) {
40+
// Test that unknown errors are passed through unchanged
41+
unknownErr := assert.AnError
42+
result := core.TranslateError(unknownErr)
43+
assert.Equal(t, unknownErr, result)
44+
}

app/storage/repo/resource_store_impl.go

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ package repo
1010
import (
1111
"context"
1212
"encoding/json"
13+
"errors"
1314
"fmt"
1415
"sync"
1516

@@ -20,6 +21,7 @@ import (
2021
"github.com/cloudzero/cloudzero-agent/app/storage/sqlite"
2122
"github.com/cloudzero/cloudzero-agent/app/types"
2223
"github.com/prometheus/client_golang/prometheus"
24+
"github.com/rs/zerolog"
2325
"github.com/rs/zerolog/log"
2426
)
2527

@@ -37,6 +39,15 @@ var (
3739
)
3840
)
3941

42+
// createDBErrorEvent returns a logger event with the appropriate level based on the error type.
43+
// Fatal errors (like missing tables) will cause the application to exit.
44+
func createDBErrorEvent(logger *zerolog.Logger, translatedErr error) *zerolog.Event {
45+
if errors.Is(translatedErr, types.ErrTableMissing) {
46+
return logger.Fatal() //nolint:zerologlint // Caller will dispatch the event
47+
}
48+
return logger.Warn() //nolint:zerologlint // Caller will dispatch the event
49+
}
50+
4051
// NewInMemoryResourceRepository creates a new in-memory resource repository.
4152
func NewInMemoryResourceRepository(clock types.TimeProvider) (types.ResourceStore, error) {
4253
remoteWriteStatsOnce.Do(func() {
@@ -100,14 +111,18 @@ func (r *resourceRepoImpl) Create(ctx context.Context, it *types.ResourceTags) e
100111
DoNothing: true,
101112
}).Create(it).Error
102113
if err != nil {
103-
log.Ctx(ctx).Warn().Err(err).Msg("storage write create failure")
114+
translatedErr := core.TranslateError(err)
115+
116+
// Create log entry with appropriate level
117+
createDBErrorEvent(log.Ctx(ctx), translatedErr).Msg("storage write create failure")
118+
104119
StorageWriteFailures.With(prometheus.Labels{
105120
"action": "create",
106121
"resource_type": fmt.Sprintf("%d", it.Type),
107122
"namespace": *it.Namespace,
108123
"resource_name": it.Name,
109124
}).Inc()
110-
return core.TranslateError(err)
125+
return translatedErr
111126
}
112127
return nil
113128
}
@@ -179,14 +194,18 @@ func (r *resourceRepoImpl) Update(ctx context.Context, it *types.ResourceTags) e
179194
if err := r.DB(ctx).Model(it).
180195
Where("id = ?", it.ID).
181196
Updates(updates).Error; err != nil {
182-
log.Ctx(ctx).Warn().Err(err).Msg("storage write update failure")
197+
translatedErr := core.TranslateError(err)
198+
199+
// Create log entry with appropriate level
200+
createDBErrorEvent(log.Ctx(ctx), translatedErr).Msg("storage write update failure")
201+
183202
StorageWriteFailures.With(prometheus.Labels{
184203
"action": "update",
185204
"resource_type": fmt.Sprintf("%d", it.Type),
186205
"namespace": *it.Namespace,
187206
"resource_name": it.Name,
188207
}).Inc()
189-
return core.TranslateError(err)
208+
return translatedErr
190209
}
191210
return nil
192211
}

app/types/errors.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,4 +108,7 @@ var (
108108

109109
// ErrCheckConstraintViolated is returned when a check constraint is violated.
110110
ErrCheckConstraintViolated = errors.New("check constraint violated")
111+
112+
// ErrTableMissing is returned when a required database table does not exist.
113+
ErrTableMissing = errors.New("required database table missing")
111114
)

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ require (
208208
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc // indirect
209209
github.com/jinzhu/inflection v1.0.0 // indirect
210210
github.com/jinzhu/now v1.1.5 // indirect
211-
github.com/mattn/go-sqlite3 v1.14.22 // indirect
211+
github.com/mattn/go-sqlite3 v1.14.22
212212
github.com/microcosm-cc/bluemonday v1.0.27
213213
github.com/prometheus/client_model v0.6.2 // indirect
214214
github.com/prometheus/common v0.66.1

0 commit comments

Comments
 (0)