Skip to content

Commit

Permalink
[release-15.0] OnlineDDL: fix nil 'completed_timestamp' for cancelled…
Browse files Browse the repository at this point in the history
… migrations (vitessio#13928) (vitessio#13935)

Signed-off-by: Shlomi Noach <[email protected]>
Co-authored-by: vitess-bot[bot] <108069721+vitess-bot[bot]@users.noreply.github.com>
Co-authored-by: Shlomi Noach <[email protected]>
  • Loading branch information
vitess-bot[bot] and shlomi-noach authored Sep 7, 2023
1 parent a1e0045 commit 1373805
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,9 @@ func TestSchemaChange(t *testing.T) {
}
})
})
t.Run("summary: validate completed_timestamp", func(t *testing.T) {
onlineddl.ValidateCompletedTimestamp(t, &vtParams)
})
}

// testOnlineDDLStatement runs an online DDL, ALTER statement
Expand Down
3 changes: 3 additions & 0 deletions go/test/endtoend/onlineddl/vrepl/onlineddl_vrepl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,9 @@ func TestSchemaChange(t *testing.T) {
}
})
})
t.Run("summary: validate completed_timestamp", func(t *testing.T) {
onlineddl.ValidateCompletedTimestamp(t, &vtParams)
})
}

func insertRow(t *testing.T) {
Expand Down
73 changes: 73 additions & 0 deletions go/test/endtoend/onlineddl/vtgate_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ import (
"github.com/stretchr/testify/require"
)

var (
testsStartupTime time.Time
)

func init() {
testsStartupTime = time.Now()
}

// VtgateExecQuery runs a query on VTGate using given query params
func VtgateExecQuery(t *testing.T, vtParams *mysql.ConnParams, query string, expectError string) *sqltypes.Result {
t.Helper()
Expand Down Expand Up @@ -344,3 +352,68 @@ func WaitForThrottledTimestamp(t *testing.T, vtParams *mysql.ConnParams, uuid st
t.Error("timeout waiting for last_throttled_timestamp to have nonempty value")
return
}

// ValidateSequentialMigrationIDs validates that schem_migrations.id column, which is an AUTO_INCREMENT, does
// not have gaps
func ValidateSequentialMigrationIDs(t *testing.T, vtParams *mysql.ConnParams, shards []cluster.Shard) {
r := VtgateExecQuery(t, vtParams, "show vitess_migrations", "")
shardMin := map[string]uint64{}
shardMax := map[string]uint64{}
shardCount := map[string]uint64{}

for _, row := range r.Named().Rows {
id := row.AsUint64("id", 0)
require.NotZero(t, id)

shard := row.AsString("shard", "")
require.NotEmpty(t, shard)

if _, ok := shardMin[shard]; !ok {
shardMin[shard] = id
shardMax[shard] = id
}
if id < shardMin[shard] {
shardMin[shard] = id
}
if id > shardMax[shard] {
shardMax[shard] = id
}
shardCount[shard]++
}
require.NotEmpty(t, shards)
assert.Equal(t, len(shards), len(shardMin))
assert.Equal(t, len(shards), len(shardMax))
assert.Equal(t, len(shards), len(shardCount))
for shard, count := range shardCount {
assert.NotZero(t, count)
assert.Equalf(t, count, shardMax[shard]-shardMin[shard]+1, "mismatch: shared=%v, count=%v, min=%v, max=%v", shard, count, shardMin[shard], shardMax[shard])
}
}

// ValidateCompletedTimestamp ensures that any migration in `cancelled`, `completed`, `failed` statuses
// has a non-nil and valid `completed_timestamp` value.
func ValidateCompletedTimestamp(t *testing.T, vtParams *mysql.ConnParams) {
require.False(t, testsStartupTime.IsZero())
r := VtgateExecQuery(t, vtParams, "show vitess_migrations", "")

completedTimestampNumValidations := 0
for _, row := range r.Named().Rows {
migrationStatus := row.AsString("migration_status", "")
require.NotEmpty(t, migrationStatus)
switch migrationStatus {
case string(schema.OnlineDDLStatusComplete),
string(schema.OnlineDDLStatusFailed),
string(schema.OnlineDDLStatusCancelled):
{
assert.False(t, row["completed_timestamp"].IsNull())
// Also make sure the timestamp is "real", and that it is recent.
timestamp := row.AsString("completed_timestamp", "")
completedTime, err := time.Parse(sqltypes.TimestampFormat, timestamp)
assert.NoError(t, err)
assert.Greater(t, completedTime.Unix(), testsStartupTime.Unix())
completedTimestampNumValidations++
}
}
}
assert.NotZero(t, completedTimestampNumValidations)
}
2 changes: 1 addition & 1 deletion go/test/endtoend/onlineddl/vttablet_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import (
)

// WaitForVReplicationStatus waits for a vreplication stream to be in one of given states, or timeout
func WaitForVReplicationStatus(t *testing.T, vtParams *mysql.ConnParams, shards []cluster.Shard, uuid string, timeout time.Duration, expectStatuses ...string) (status string) {
func WaitForVReplicationStatus(t *testing.T, vtParams *mysql.ConnParams, shards []cluster.Shard, uuid string, timeout time.Duration, expectStatuses ...string) (status string) { // nolint:revive

query, err := sqlparser.ParseAndBind("select workflow, state from _vt.vreplication where workflow=%a",
sqltypes.StringBindVariable(uuid),
Expand Down
23 changes: 17 additions & 6 deletions go/vt/vttablet/onlineddl/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,13 @@ var vexecInsertTemplates = []string{
'val1', 'val2', 'val3', 'val4', 'val5', 'val6', 'val7', 'val8', 'val9', FROM_UNIXTIME(0), 'vala', 'valb'
)`,
}
var (
// fixCompletedTimestampDone fixes a nil `completed_tiemstamp` columns, see
// https://github.com/vitessio/vitess/issues/13927
// The fix is in release-18.0
// TODO: remove in release-19.0
fixCompletedTimestampDone bool
)

var emptyResult = &sqltypes.Result{}
var acceptableDropTableIfExistsErrorCodes = []int{mysql.ERCantFindFile, mysql.ERNoSuchTable}
Expand Down Expand Up @@ -3754,13 +3761,17 @@ func (e *Executor) gcArtifacts(ctx context.Context) error {
e.migrationMutex.Lock()
defer e.migrationMutex.Unlock()

if _, err := e.execQuery(ctx, sqlFixCompletedTimestamp); err != nil {
// This query fixes a bug where stale migrations were marked as 'failed' without updating 'completed_timestamp'
// see https://github.com/vitessio/vitess/issues/8499
// Running this query retroactively sets completed_timestamp
// This 'if' clause can be removed in version v13
return err
// v18 fix. Remove in v19
if !fixCompletedTimestampDone {
if _, err := e.execQuery(ctx, sqlFixCompletedTimestamp); err != nil {
// This query fixes a bug where stale migrations were marked as 'cancelled' or 'failed' without updating 'completed_timestamp'
// Running this query retroactively sets completed_timestamp
// This fix is created in v18 and can be removed in v19
return err
}
fixCompletedTimestampDone = true
}

query, err := sqlparser.ParseAndBind(sqlSelectUncollectedArtifacts,
sqltypes.Int64BindVariable(int64((retainOnlineDDLTables).Seconds())),
)
Expand Down
5 changes: 3 additions & 2 deletions go/vt/vttablet/onlineddl/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ const (
migration_uuid=%a
`
sqlUpdateMigrationStatusFailedOrCancelled = `UPDATE _vt.schema_migrations
SET migration_status=IF(cancelled_timestamp IS NULL, 'failed', 'cancelled')
SET migration_status=IF(cancelled_timestamp IS NULL, 'failed', 'cancelled'),
completed_timestamp=NOW(6)
WHERE
migration_uuid=%a
`
Expand Down Expand Up @@ -414,7 +415,7 @@ const (
SET
completed_timestamp=NOW()
WHERE
migration_status='failed'
migration_status IN ('cancelled', 'failed')
AND cleanup_timestamp IS NULL
AND completed_timestamp IS NULL
`
Expand Down

0 comments on commit 1373805

Please sign in to comment.