Skip to content

Commit

Permalink
[release-16.0] OnlineDDL: fix nil 'completed_timestamp' for cancelled…
Browse files Browse the repository at this point in the history
… migrations (vitessio#13928) (vitessio#13936)

Signed-off-by: Shlomi Noach <[email protected]>
Co-authored-by: vitess-bot[bot] <108069721+vitess-bot[bot]@users.noreply.github.com>
Co-authored-by: Shlomi Noach <[email protected]>
  • Loading branch information
vitess-bot[bot] and shlomi-noach authored Sep 7, 2023
1 parent a682c1e commit 1f2de68
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,9 @@ func TestSchemaChange(t *testing.T) {
t.Run("summary: validate sequential migration IDs", func(t *testing.T) {
onlineddl.ValidateSequentialMigrationIDs(t, &vtParams, shards)
})
t.Run("summary: validate completed_timestamp", func(t *testing.T) {
onlineddl.ValidateCompletedTimestamp(t, &vtParams)
})
}

func testScheduler(t *testing.T) {
Expand Down
13 changes: 8 additions & 5 deletions go/test/endtoend/onlineddl/vrepl/onlineddl_vrepl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ func TestMain(m *testing.M) {
if err != nil {
fmt.Printf("%v\n", err)
os.Exit(1)
} else {
} else { // nolint:revive
os.Exit(exitcode)
}

Expand Down Expand Up @@ -537,11 +537,11 @@ func TestSchemaChange(t *testing.T) {
onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusRunning)
})
t.Run("wait for vreplication to run on shard -80", func(t *testing.T) {
vreplStatus := onlineddl.WaitForVReplicationStatus(t, &vtParams, currentPrimaryTablet, uuid, normalMigrationWait, "Copying", "Running")
vreplStatus := onlineddl.WaitForVReplicationStatus(t, currentPrimaryTablet, uuid, normalMigrationWait, "Copying", "Running")
require.Contains(t, []string{"Copying", "Running"}, vreplStatus)
})
t.Run("wait for vreplication to run on shard 80-", func(t *testing.T) {
vreplStatus := onlineddl.WaitForVReplicationStatus(t, &vtParams, shards[1].Vttablets[0], uuid, normalMigrationWait, "Copying", "Running")
vreplStatus := onlineddl.WaitForVReplicationStatus(t, shards[1].Vttablets[0], uuid, normalMigrationWait, "Copying", "Running")
require.Contains(t, []string{"Copying", "Running"}, vreplStatus)
})
t.Run("check status again", func(t *testing.T) {
Expand Down Expand Up @@ -646,11 +646,11 @@ func TestSchemaChange(t *testing.T) {
onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusRunning)
})
t.Run("wait for vreplication to run on shard -80", func(t *testing.T) {
vreplStatus := onlineddl.WaitForVReplicationStatus(t, &vtParams, currentPrimaryTablet, uuid, normalMigrationWait, "Copying", "Running")
vreplStatus := onlineddl.WaitForVReplicationStatus(t, currentPrimaryTablet, uuid, normalMigrationWait, "Copying", "Running")
require.Contains(t, []string{"Copying", "Running"}, vreplStatus)
})
t.Run("wait for vreplication to run on shard 80-", func(t *testing.T) {
vreplStatus := onlineddl.WaitForVReplicationStatus(t, &vtParams, shards[1].Vttablets[0], uuid, normalMigrationWait, "Copying", "Running")
vreplStatus := onlineddl.WaitForVReplicationStatus(t, shards[1].Vttablets[0], uuid, normalMigrationWait, "Copying", "Running")
require.Contains(t, []string{"Copying", "Running"}, vreplStatus)
})
t.Run("check status again", func(t *testing.T) {
Expand Down Expand Up @@ -888,6 +888,9 @@ func TestSchemaChange(t *testing.T) {
t.Run("summary: validate sequential migration IDs", func(t *testing.T) {
onlineddl.ValidateSequentialMigrationIDs(t, &vtParams, shards)
})
t.Run("summary: validate completed_timestamp", func(t *testing.T) {
onlineddl.ValidateCompletedTimestamp(t, &vtParams)
})
}

func insertRow(t *testing.T) {
Expand Down
38 changes: 37 additions & 1 deletion go/test/endtoend/onlineddl/vtgate_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ import (
"github.com/stretchr/testify/require"
)

var (
testsStartupTime time.Time
)

func init() {
testsStartupTime = time.Now()
}

// VtgateExecQuery runs a query on VTGate using given query params
func VtgateExecQuery(t *testing.T, vtParams *mysql.ConnParams, query string, expectError string) *sqltypes.Result {
t.Helper()
Expand Down Expand Up @@ -354,7 +362,7 @@ func WaitForThrottlerStatusEnabled(t *testing.T, tablet *cluster.Vttablet, timeo
jsonPath := "IsEnabled"
url := fmt.Sprintf("http://localhost:%d/throttler/status", tablet.HTTPPort)

ctx, cancel := context.WithTimeout(context.Background(), throttlerConfigTimeout)
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()

ticker := time.NewTicker(time.Second)
Expand Down Expand Up @@ -428,3 +436,31 @@ func ValidateSequentialMigrationIDs(t *testing.T, vtParams *mysql.ConnParams, sh
assert.Equalf(t, count, shardMax[shard]-shardMin[shard]+1, "mismatch: shared=%v, count=%v, min=%v, max=%v", shard, count, shardMin[shard], shardMax[shard])
}
}

// ValidateCompletedTimestamp ensures that any migration in `cancelled`, `completed`, `failed` statuses
// has a non-nil and valid `completed_timestamp` value.
func ValidateCompletedTimestamp(t *testing.T, vtParams *mysql.ConnParams) {
require.False(t, testsStartupTime.IsZero())
r := VtgateExecQuery(t, vtParams, "show vitess_migrations", "")

completedTimestampNumValidations := 0
for _, row := range r.Named().Rows {
migrationStatus := row.AsString("migration_status", "")
require.NotEmpty(t, migrationStatus)
switch migrationStatus {
case string(schema.OnlineDDLStatusComplete),
string(schema.OnlineDDLStatusFailed),
string(schema.OnlineDDLStatusCancelled):
{
assert.False(t, row["completed_timestamp"].IsNull())
// Also make sure the timestamp is "real", and that it is recent.
timestamp := row.AsString("completed_timestamp", "")
completedTime, err := time.Parse(sqltypes.TimestampFormat, timestamp)
assert.NoError(t, err)
assert.Greater(t, completedTime.Unix(), testsStartupTime.Unix())
completedTimestampNumValidations++
}
}
}
assert.NotZero(t, completedTimestampNumValidations)
}
3 changes: 1 addition & 2 deletions go/test/endtoend/onlineddl/vttablet_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"testing"
"time"

"vitess.io/vitess/go/mysql"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/vt/sqlparser"

Expand All @@ -31,7 +30,7 @@ import (
)

// WaitForVReplicationStatus waits for a vreplication stream to be in one of given states, or timeout
func WaitForVReplicationStatus(t *testing.T, vtParams *mysql.ConnParams, tablet *cluster.Vttablet, uuid string, timeout time.Duration, expectStatuses ...string) (status string) {
func WaitForVReplicationStatus(t *testing.T, tablet *cluster.Vttablet, uuid string, timeout time.Duration, expectStatuses ...string) (status string) {

query, err := sqlparser.ParseAndBind("select state from _vt.vreplication where workflow=%a",
sqltypes.StringBindVariable(uuid),
Expand Down
23 changes: 17 additions & 6 deletions go/vt/vttablet/onlineddl/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ var vexecInsertTemplates = []string{
'val1', 'val2', 'val3', 'val4', 'val5', 'val6', 'val7', 'val8', 'val9', FROM_UNIXTIME(0), 'vala', 'valb'
)`,
}
var (
// fixCompletedTimestampDone fixes a nil `completed_tiemstamp` columns, see
// https://github.com/vitessio/vitess/issues/13927
// The fix is in release-18.0
// TODO: remove in release-19.0
fixCompletedTimestampDone bool
)

var emptyResult = &sqltypes.Result{}
var acceptableDropTableIfExistsErrorCodes = []int{mysql.ERCantFindFile, mysql.ERNoSuchTable}
Expand Down Expand Up @@ -3852,13 +3859,17 @@ func (e *Executor) gcArtifacts(ctx context.Context) error {
e.migrationMutex.Lock()
defer e.migrationMutex.Unlock()

if _, err := e.execQuery(ctx, sqlFixCompletedTimestamp); err != nil {
// This query fixes a bug where stale migrations were marked as 'failed' without updating 'completed_timestamp'
// see https://github.com/vitessio/vitess/issues/8499
// Running this query retroactively sets completed_timestamp
// This 'if' clause can be removed in version v13
return err
// v18 fix (backported to 16). Remove in v19
if !fixCompletedTimestampDone {
if _, err := e.execQuery(ctx, sqlFixCompletedTimestamp); err != nil {
// This query fixes a bug where stale migrations were marked as 'cancelled' or 'failed' without updating 'completed_timestamp'
// Running this query retroactively sets completed_timestamp
// This fix is created in v18 and can be removed in v19
return err
}
fixCompletedTimestampDone = true
}

query, err := sqlparser.ParseAndBind(sqlSelectUncollectedArtifacts,
sqltypes.Int64BindVariable(int64((retainOnlineDDLTables).Seconds())),
)
Expand Down
5 changes: 3 additions & 2 deletions go/vt/vttablet/onlineddl/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ const (
migration_uuid=%a
`
sqlUpdateMigrationStatusFailedOrCancelled = `UPDATE _vt.schema_migrations
SET migration_status=IF(cancelled_timestamp IS NULL, 'failed', 'cancelled')
SET migration_status=IF(cancelled_timestamp IS NULL, 'failed', 'cancelled'),
completed_timestamp=NOW(6)
WHERE
migration_uuid=%a
`
Expand Down Expand Up @@ -349,7 +350,7 @@ const (
SET
completed_timestamp=NOW(6)
WHERE
migration_status='failed'
migration_status IN ('cancelled', 'failed')
AND cleanup_timestamp IS NULL
AND completed_timestamp IS NULL
`
Expand Down

0 comments on commit 1f2de68

Please sign in to comment.