Skip to content

Commit

Permalink
Merge pull request #77 from github/cut-over-lock-table-names
Browse files Browse the repository at this point in the history
Solved cut-over stall; change of table names
  • Loading branch information
Shlomi Noach authored Jun 21, 2016
2 parents dc8d274 + 96e8419 commit b6d88dd
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 15 deletions.
2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
#
#
RELEASE_VERSION="0.9.6"
RELEASE_VERSION="0.9.7"

buildpath=/tmp/gh-ost
target=gh-ost
Expand Down
15 changes: 9 additions & 6 deletions go/base/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,20 +164,23 @@ func GetMigrationContext() *MigrationContext {

// GetGhostTableName generates the name of ghost table, based on original table name
func (this *MigrationContext) GetGhostTableName() string {
return fmt.Sprintf("_%s_gst", this.OriginalTableName)
return fmt.Sprintf("_%s_gho", this.OriginalTableName)
}

// GetOldTableName generates the name of the "old" table, into which the original table is renamed.
func (this *MigrationContext) GetOldTableName() string {
// if this.TestOnReplica {
// return fmt.Sprintf("_%s_tst", this.OriginalTableName)
// }
return fmt.Sprintf("_%s_old", this.OriginalTableName)
if this.TestOnReplica {
return fmt.Sprintf("_%s_ght", this.OriginalTableName)
}
if this.MigrateOnReplica {
return fmt.Sprintf("_%s_ghr", this.OriginalTableName)
}
return fmt.Sprintf("_%s_del", this.OriginalTableName)
}

// GetChangelogTableName generates the name of changelog table, based on original table name
func (this *MigrationContext) GetChangelogTableName() string {
return fmt.Sprintf("_%s_osc", this.OriginalTableName)
return fmt.Sprintf("_%s_ghc", this.OriginalTableName)
}

// GetVoluntaryLockName returns a name of a voluntary lock to be used throughout
Expand Down
13 changes: 12 additions & 1 deletion go/logic/applier.go
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,7 @@ func (this *Applier) LockOriginalTableAndWait(sessionIdChan chan int64, tableLoc

var sessionId int64
if err := tx.QueryRow(`select connection_id()`).Scan(&sessionId); err != nil {
tableLocked <- err
return err
}
sessionIdChan <- sessionId
Expand All @@ -616,7 +617,17 @@ func (this *Applier) LockOriginalTableAndWait(sessionIdChan chan int64, tableLoc
lockName := this.GetSessionLockName(sessionId)
log.Infof("Grabbing voluntary lock: %s", lockName)
if err := tx.QueryRow(query, lockName).Scan(&lockResult); err != nil || lockResult != 1 {
return fmt.Errorf("Unable to acquire lock %s", lockName)
err := fmt.Errorf("Unable to acquire lock %s", lockName)
tableLocked <- err
return err
}

tableLockTimeoutSeconds := this.migrationContext.SwapTablesTimeoutSeconds * 2
log.Infof("Setting LOCK timeout as %d seconds", tableLockTimeoutSeconds)
query = fmt.Sprintf(`set session lock_wait_timeout:=%d`, tableLockTimeoutSeconds)
if _, err := tx.Exec(query); err != nil {
tableLocked <- err
return err
}

query = fmt.Sprintf(`lock /* gh-ost */ tables %s.%s write`,
Expand Down
32 changes: 25 additions & 7 deletions go/logic/migrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,6 @@ func (this *Migrator) Migrate() (err error) {
this.consumeRowCopyComplete()
log.Infof("Row copy complete")
this.printStatus(ForcePrintStatusRule)
this.migrationContext.MarkPointOfInterest()

if err := this.cutOver(); err != nil {
return err
Expand All @@ -435,10 +434,12 @@ func (this *Migrator) cutOver() (err error) {
log.Debugf("Noop operation; not really swapping tables")
return nil
}
this.migrationContext.MarkPointOfInterest()
this.throttle(func() {
log.Debugf("throttling before swapping tables")
})

this.migrationContext.MarkPointOfInterest()
this.sleepWhileTrue(
func() (bool, error) {
if this.migrationContext.PostponeCutOverFlagFile == "" {
Expand All @@ -454,6 +455,7 @@ func (this *Migrator) cutOver() (err error) {
},
)
atomic.StoreInt64(&this.migrationContext.IsPostponingCutOver, 0)
this.migrationContext.MarkPointOfInterest()

if this.migrationContext.TestOnReplica {
// With `--test-on-replica` we stop replication thread, and then proceed to use
Expand All @@ -478,15 +480,20 @@ func (this *Migrator) cutOver() (err error) {
return err
}
if this.migrationContext.CutOverType == base.CutOverTwoStep {
err := this.retryOperation(this.cutOverTwoStep)
err := this.retryOperation(
func() error {
return this.executeAndThrottleOnError(this.cutOverTwoStep)
},
)
return err
}
return nil
return log.Fatalf("Unknown cut-over type: %d; should never get here!", this.migrationContext.CutOverType)
}

// Inject the "AllEventsUpToLockProcessed" state hint, wait for it to appear in the binary logs,
// make sure the queue is drained.
func (this *Migrator) waitForEventsUpToLock() (err error) {
this.migrationContext.MarkPointOfInterest()
waitForEventsUpToLockStartTime := time.Now()

log.Infof("Writing changelog state: %+v", AllEventsUpToLockProcessed)
Expand Down Expand Up @@ -541,19 +548,28 @@ func (this *Migrator) safeCutOver() (err error) {

okToUnlockTable := make(chan bool, 2)
originalTableRenamed := make(chan error, 1)
var originalTableRenameIntended int64
defer func() {
log.Infof("Checking to see if we need to roll back")
// The following is to make sure we unlock the table no-matter-what!
// There's enough buffer in the channel to support a redundant write here.
okToUnlockTable <- true
// We need to make sure we wait for the original-rename, successful or not,
// so as to be able to rollback in case the ghost-rename fails.
<-originalTableRenamed

if atomic.LoadInt64(&originalTableRenameIntended) == 1 {
log.Infof("Waiting for original table rename result")
// We need to make sure we wait for the original-rename, successful or not,
// so as to be able to rollback in case the ghost-rename fails.
// But we only wait on this queue if there's actually going to be a rename.
// As an example, what happens should the initial `lock tables` fail? We would
// never proceed to rename the table, hence this queue is never written to.
<-originalTableRenamed
}
// Rollback operation
if !this.applier.tableExists(this.migrationContext.OriginalTableName) {
log.Infof("Cannot find %s, rolling back", this.migrationContext.OriginalTableName)
err := this.applier.RenameTable(this.migrationContext.GetOldTableName(), this.migrationContext.OriginalTableName)
log.Errore(err)
} else {
log.Info("No need for rollback")
}
}()
lockOriginalSessionIdChan := make(chan int64, 1)
Expand All @@ -577,6 +593,8 @@ func (this *Migrator) safeCutOver() (err error) {
// We now attempt a RENAME on the original table, and expect it to block
renameOriginalSessionIdChan := make(chan int64, 1)
this.migrationContext.RenameTablesStartTime = time.Now()
atomic.StoreInt64(&originalTableRenameIntended, 1)

go func() {
this.applier.RenameOriginalTable(renameOriginalSessionIdChan, originalTableRenamed)
}()
Expand Down

0 comments on commit b6d88dd

Please sign in to comment.