diff --git a/go/app/cli.go b/go/app/cli.go index e537ce43..3333182c 100644 --- a/go/app/cli.go +++ b/go/app/cli.go @@ -218,7 +218,7 @@ func Cli(command string, strict bool, instance string, destination string, owner // begin commands switch command { // smart mode - case registerCliCommand("relocate", "Smart relocation", `Relocate a slave beneath another instance`), registerCliCommand("relocate-below", "Smart relocation", `Synonym to 'relocate', will be deprecated`): + case registerCliCommand("relocate", "Smart relocation", `Relocate a subordinate beneath another instance`), registerCliCommand("relocate-below", "Smart relocation", `Synonym to 'relocate', will be deprecated`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if destinationKey == nil { @@ -230,25 +230,25 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(fmt.Sprintf("%s<%s", instanceKey.DisplayString(), destinationKey.DisplayString())) } - case registerCliCommand("relocate-slaves", "Smart relocation", `Relocates all or part of the slaves of a given instance under another instance`): + case registerCliCommand("relocate-subordinates", "Smart relocation", `Relocates all or part of the subordinates of a given instance under another instance`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if destinationKey == nil { log.Fatal("Cannot deduce destination:", destination) } - slaves, _, err, errs := inst.RelocateSlaves(instanceKey, destinationKey, pattern) + subordinates, _, err, errs := inst.RelocateSubordinates(instanceKey, destinationKey, pattern) if err != nil { log.Fatale(err) } else { for _, e := range errs { log.Errore(e) } - for _, slave := range slaves { - fmt.Println(slave.Key.DisplayString()) + for _, subordinate := range subordinates { + fmt.Println(subordinate.Key.DisplayString()) } } } - case registerCliCommand("regroup-slaves", "Smart relocation", `Given an instance, pick one of its slave and make it local master of its siblings`): + case registerCliCommand("regroup-subordinates", "Smart relocation", `Given an instance, pick one of its subordinate and make it local main of its siblings`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { @@ -256,50 +256,50 @@ func Cli(command string, strict bool, instance string, destination string, owner } validateInstanceIsFound(instanceKey) - lostSlaves, equalSlaves, aheadSlaves, cannotReplicateSlaves, promotedSlave, err := inst.RegroupSlaves(instanceKey, false, func(candidateSlave *inst.Instance) { fmt.Println(candidateSlave.Key.DisplayString()) }, postponedFunctionsContainer) - lostSlaves = append(lostSlaves, cannotReplicateSlaves...) + lostSubordinates, equalSubordinates, aheadSubordinates, cannotReplicateSubordinates, promotedSubordinate, err := inst.RegroupSubordinates(instanceKey, false, func(candidateSubordinate *inst.Instance) { fmt.Println(candidateSubordinate.Key.DisplayString()) }, postponedFunctionsContainer) + lostSubordinates = append(lostSubordinates, cannotReplicateSubordinates...) postponedFunctionsContainer.InvokePostponed() - if promotedSlave == nil { - log.Fatalf("Could not regroup slaves of %+v; error: %+v", *instanceKey, err) + if promotedSubordinate == nil { + log.Fatalf("Could not regroup subordinates of %+v; error: %+v", *instanceKey, err) } fmt.Println(fmt.Sprintf("%s lost: %d, trivial: %d, pseudo-gtid: %d", - promotedSlave.Key.DisplayString(), len(lostSlaves), len(equalSlaves), len(aheadSlaves))) + promotedSubordinate.Key.DisplayString(), len(lostSubordinates), len(equalSubordinates), len(aheadSubordinates))) if err != nil { log.Fatale(err) } } // General replication commands // move, binlog file:pos - case registerCliCommand("move-up", "Classic file:pos relocation", `Move a slave one level up the topology`): + case registerCliCommand("move-up", "Classic file:pos relocation", `Move a subordinate one level up the topology`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) instance, err := inst.MoveUp(instanceKey) if err != nil { log.Fatale(err) } - fmt.Println(fmt.Sprintf("%s<%s", instanceKey.DisplayString(), instance.MasterKey.DisplayString())) + fmt.Println(fmt.Sprintf("%s<%s", instanceKey.DisplayString(), instance.MainKey.DisplayString())) } - case registerCliCommand("move-up-slaves", "Classic file:pos relocation", `Moves slaves of the given instance one level up the topology`): + case registerCliCommand("move-up-subordinates", "Classic file:pos relocation", `Moves subordinates of the given instance one level up the topology`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { log.Fatal("Cannot deduce instance:", instance) } - movedSlaves, _, err, errs := inst.MoveUpSlaves(instanceKey, pattern) + movedSubordinates, _, err, errs := inst.MoveUpSubordinates(instanceKey, pattern) if err != nil { log.Fatale(err) } else { for _, e := range errs { log.Errore(e) } - for _, slave := range movedSlaves { - fmt.Println(slave.Key.DisplayString()) + for _, subordinate := range movedSubordinates { + fmt.Println(subordinate.Key.DisplayString()) } } } - case registerCliCommand("move-below", "Classic file:pos relocation", `Moves a slave beneath its sibling. Both slaves must be actively replicating from same master.`): + case registerCliCommand("move-below", "Classic file:pos relocation", `Moves a subordinate beneath its sibling. Both subordinates must be actively replicating from same main.`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if destinationKey == nil { @@ -311,7 +311,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(fmt.Sprintf("%s<%s", instanceKey.DisplayString(), destinationKey.DisplayString())) } - case registerCliCommand("move-equivalent", "Classic file:pos relocation", `Moves a slave beneath another server, based on previously recorded "equivalence coordinates"`): + case registerCliCommand("move-equivalent", "Classic file:pos relocation", `Moves a subordinate beneath another server, based on previously recorded "equivalence coordinates"`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if destinationKey == nil { @@ -326,76 +326,76 @@ func Cli(command string, strict bool, instance string, destination string, owner case registerCliCommand("repoint", "Classic file:pos relocation", `Make the given instance replicate from another instance without changing the binglog coordinates. Use with care`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) - // destinationKey can be null, in which case the instance repoints to its existing master + // destinationKey can be null, in which case the instance repoints to its existing main instance, err := inst.Repoint(instanceKey, destinationKey, inst.GTIDHintNeutral) if err != nil { log.Fatale(err) } - fmt.Println(fmt.Sprintf("%s<%s", instanceKey.DisplayString(), instance.MasterKey.DisplayString())) + fmt.Println(fmt.Sprintf("%s<%s", instanceKey.DisplayString(), instance.MainKey.DisplayString())) } - case registerCliCommand("repoint-slaves", "Classic file:pos relocation", `Repoint all slaves of given instance to replicate back from the instance. Use with care`): + case registerCliCommand("repoint-subordinates", "Classic file:pos relocation", `Repoint all subordinates of given instance to replicate back from the instance. Use with care`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) - repointedSlaves, err, errs := inst.RepointSlavesTo(instanceKey, pattern, destinationKey) + repointedSubordinates, err, errs := inst.RepointSubordinatesTo(instanceKey, pattern, destinationKey) if err != nil { log.Fatale(err) } else { for _, e := range errs { log.Errore(e) } - for _, slave := range repointedSlaves { - fmt.Println(fmt.Sprintf("%s<%s", slave.Key.DisplayString(), instanceKey.DisplayString())) + for _, subordinate := range repointedSubordinates { + fmt.Println(fmt.Sprintf("%s<%s", subordinate.Key.DisplayString(), instanceKey.DisplayString())) } } } - case registerCliCommand("enslave-siblings", "Classic file:pos relocation", `Turn all siblings of a slave into its sub-slaves.`): + case registerCliCommand("ensubordinate-siblings", "Classic file:pos relocation", `Turn all siblings of a subordinate into its sub-subordinates.`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { log.Fatal("Cannot deduce instance:", instance) } - _, _, err := inst.EnslaveSiblings(instanceKey) + _, _, err := inst.EnsubordinateSiblings(instanceKey) if err != nil { log.Fatale(err) } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("enslave-master", "Classic file:pos relocation", `Turn an instance into a master of its own master; essentially switch the two.`): + case registerCliCommand("ensubordinate-main", "Classic file:pos relocation", `Turn an instance into a main of its own main; essentially switch the two.`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { log.Fatal("Cannot deduce instance:", instance) } - _, err := inst.EnslaveMaster(instanceKey) + _, err := inst.EnsubordinateMain(instanceKey) if err != nil { log.Fatale(err) } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("make-co-master", "Classic file:pos relocation", `Create a master-master replication. Given instance is a slave which replicates directly from a master.`): + case registerCliCommand("make-co-main", "Classic file:pos relocation", `Create a main-main replication. Given instance is a subordinate which replicates directly from a main.`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) - _, err := inst.MakeCoMaster(instanceKey) + _, err := inst.MakeCoMain(instanceKey) if err != nil { log.Fatale(err) } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("get-candidate-slave", "Classic file:pos relocation", `Information command suggesting the most up-to-date slave of a given instance that is good for promotion`): + case registerCliCommand("get-candidate-subordinate", "Classic file:pos relocation", `Information command suggesting the most up-to-date subordinate of a given instance that is good for promotion`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { log.Fatal("Cannot deduce instance:", instance) } - instance, _, _, _, _, err := inst.GetCandidateSlave(instanceKey, false) + instance, _, _, _, _, err := inst.GetCandidateSubordinate(instanceKey, false) if err != nil { log.Fatale(err) } else { fmt.Println(instance.Key.DisplayString()) } } - case registerCliCommand("regroup-slaves-bls", "Binlog server relocation", `Regroup Binlog Server slaves of a given instance`): + case registerCliCommand("regroup-subordinates-bls", "Binlog server relocation", `Regroup Binlog Server subordinates of a given instance`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { @@ -403,9 +403,9 @@ func Cli(command string, strict bool, instance string, destination string, owner } validateInstanceIsFound(instanceKey) - _, promotedBinlogServer, err := inst.RegroupSlavesBinlogServers(instanceKey, false) + _, promotedBinlogServer, err := inst.RegroupSubordinatesBinlogServers(instanceKey, false) if promotedBinlogServer == nil { - log.Fatalf("Could not regroup binlog server slaves of %+v; error: %+v", *instanceKey, err) + log.Fatalf("Could not regroup binlog server subordinates of %+v; error: %+v", *instanceKey, err) } fmt.Println(promotedBinlogServer.Key.DisplayString()) if err != nil { @@ -413,7 +413,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } } // move, GTID - case registerCliCommand("move-gtid", "GTID relocation", `Move a slave beneath another instance.`): + case registerCliCommand("move-gtid", "GTID relocation", `Move a subordinate beneath another instance.`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if destinationKey == nil { @@ -425,25 +425,25 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(fmt.Sprintf("%s<%s", instanceKey.DisplayString(), destinationKey.DisplayString())) } - case registerCliCommand("move-slaves-gtid", "GTID relocation", `Moves all slaves of a given instance under another (destination) instance using GTID`): + case registerCliCommand("move-subordinates-gtid", "GTID relocation", `Moves all subordinates of a given instance under another (destination) instance using GTID`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if destinationKey == nil { log.Fatal("Cannot deduce destination:", destination) } - movedSlaves, _, err, errs := inst.MoveSlavesGTID(instanceKey, destinationKey, pattern) + movedSubordinates, _, err, errs := inst.MoveSubordinatesGTID(instanceKey, destinationKey, pattern) if err != nil { log.Fatale(err) } else { for _, e := range errs { log.Errore(e) } - for _, slave := range movedSlaves { - fmt.Println(slave.Key.DisplayString()) + for _, subordinate := range movedSubordinates { + fmt.Println(subordinate.Key.DisplayString()) } } } - case registerCliCommand("regroup-slaves-gtid", "GTID relocation", `Given an instance, pick one of its slave and make it local master of its siblings, using GTID.`): + case registerCliCommand("regroup-subordinates-gtid", "GTID relocation", `Given an instance, pick one of its subordinate and make it local main of its siblings, using GTID.`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { @@ -451,20 +451,20 @@ func Cli(command string, strict bool, instance string, destination string, owner } validateInstanceIsFound(instanceKey) - lostSlaves, movedSlaves, cannotReplicateSlaves, promotedSlave, err := inst.RegroupSlavesGTID(instanceKey, false, func(candidateSlave *inst.Instance) { fmt.Println(candidateSlave.Key.DisplayString()) }) - lostSlaves = append(lostSlaves, cannotReplicateSlaves...) + lostSubordinates, movedSubordinates, cannotReplicateSubordinates, promotedSubordinate, err := inst.RegroupSubordinatesGTID(instanceKey, false, func(candidateSubordinate *inst.Instance) { fmt.Println(candidateSubordinate.Key.DisplayString()) }) + lostSubordinates = append(lostSubordinates, cannotReplicateSubordinates...) - if promotedSlave == nil { - log.Fatalf("Could not regroup slaves of %+v; error: %+v", *instanceKey, err) + if promotedSubordinate == nil { + log.Fatalf("Could not regroup subordinates of %+v; error: %+v", *instanceKey, err) } fmt.Println(fmt.Sprintf("%s lost: %d, moved: %d", - promotedSlave.Key.DisplayString(), len(lostSlaves), len(movedSlaves))) + promotedSubordinate.Key.DisplayString(), len(lostSubordinates), len(movedSubordinates))) if err != nil { log.Fatale(err) } } // Pseudo-GTID - case registerCliCommand("match", "Pseudo-GTID relocation", `Matches a slave beneath another (destination) instance using Pseudo-GTID`), + case registerCliCommand("match", "Pseudo-GTID relocation", `Matches a subordinate beneath another (destination) instance using Pseudo-GTID`), registerCliCommand("match-below", "Pseudo-GTID relocation", `Synonym to 'match', will be deprecated`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) @@ -477,28 +477,28 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(fmt.Sprintf("%s<%s", instanceKey.DisplayString(), destinationKey.DisplayString())) } - case registerCliCommand("match-up", "Pseudo-GTID relocation", `Transport the slave one level up the hierarchy, making it child of its grandparent, using Pseudo-GTID`): + case registerCliCommand("match-up", "Pseudo-GTID relocation", `Transport the subordinate one level up the hierarchy, making it child of its grandparent, using Pseudo-GTID`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) instance, _, err := inst.MatchUp(instanceKey, true) if err != nil { log.Fatale(err) } - fmt.Println(fmt.Sprintf("%s<%s", instanceKey.DisplayString(), instance.MasterKey.DisplayString())) + fmt.Println(fmt.Sprintf("%s<%s", instanceKey.DisplayString(), instance.MainKey.DisplayString())) } - case registerCliCommand("rematch", "Pseudo-GTID relocation", `Reconnect a slave onto its master, via PSeudo-GTID.`): + case registerCliCommand("rematch", "Pseudo-GTID relocation", `Reconnect a subordinate onto its main, via PSeudo-GTID.`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) - instance, _, err := inst.RematchSlave(instanceKey, true) + instance, _, err := inst.RematchSubordinate(instanceKey, true) if err != nil { log.Fatale(err) } - fmt.Println(fmt.Sprintf("%s<%s", instanceKey.DisplayString(), instance.MasterKey.DisplayString())) + fmt.Println(fmt.Sprintf("%s<%s", instanceKey.DisplayString(), instance.MainKey.DisplayString())) } - case registerCliCommand("match-slaves", "Pseudo-GTID relocation", `Matches all slaves of a given instance under another (destination) instance using Pseudo-GTID`), - registerCliCommand("multi-match-slaves", "Pseudo-GTID relocation", `Synonym to 'match-slaves', will be deprecated`): + case registerCliCommand("match-subordinates", "Pseudo-GTID relocation", `Matches all subordinates of a given instance under another (destination) instance using Pseudo-GTID`), + registerCliCommand("multi-match-subordinates", "Pseudo-GTID relocation", `Synonym to 'match-subordinates', will be deprecated`): { - // Move all slaves of "instance" beneath "destination" + // Move all subordinates of "instance" beneath "destination" instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { log.Fatal("Cannot deduce instance:", instance) @@ -507,38 +507,38 @@ func Cli(command string, strict bool, instance string, destination string, owner log.Fatal("Cannot deduce destination:", destination) } - matchedSlaves, _, err, errs := inst.MultiMatchSlaves(instanceKey, destinationKey, pattern) + matchedSubordinates, _, err, errs := inst.MultiMatchSubordinates(instanceKey, destinationKey, pattern) if err != nil { log.Fatale(err) } else { for _, e := range errs { log.Errore(e) } - for _, slave := range matchedSlaves { - fmt.Println(slave.Key.DisplayString()) + for _, subordinate := range matchedSubordinates { + fmt.Println(subordinate.Key.DisplayString()) } } } - case registerCliCommand("match-up-slaves", "Pseudo-GTID relocation", `Matches slaves of the given instance one level up the topology, making them siblings of given instance, using Pseudo-GTID`): + case registerCliCommand("match-up-subordinates", "Pseudo-GTID relocation", `Matches subordinates of the given instance one level up the topology, making them siblings of given instance, using Pseudo-GTID`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { log.Fatal("Cannot deduce instance:", instance) } - matchedSlaves, _, err, errs := inst.MatchUpSlaves(instanceKey, pattern) + matchedSubordinates, _, err, errs := inst.MatchUpSubordinates(instanceKey, pattern) if err != nil { log.Fatale(err) } else { for _, e := range errs { log.Errore(e) } - for _, slave := range matchedSlaves { - fmt.Println(slave.Key.DisplayString()) + for _, subordinate := range matchedSubordinates { + fmt.Println(subordinate.Key.DisplayString()) } } } - case registerCliCommand("regroup-slaves-pgtid", "Pseudo-GTID relocation", `Given an instance, pick one of its slave and make it local master of its siblings, using Pseudo-GTID.`): + case registerCliCommand("regroup-subordinates-pgtid", "Pseudo-GTID relocation", `Given an instance, pick one of its subordinate and make it local main of its siblings, using Pseudo-GTID.`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { @@ -546,14 +546,14 @@ func Cli(command string, strict bool, instance string, destination string, owner } validateInstanceIsFound(instanceKey) - lostSlaves, equalSlaves, aheadSlaves, cannotReplicateSlaves, promotedSlave, err := inst.RegroupSlavesPseudoGTID(instanceKey, false, func(candidateSlave *inst.Instance) { fmt.Println(candidateSlave.Key.DisplayString()) }, postponedFunctionsContainer) - lostSlaves = append(lostSlaves, cannotReplicateSlaves...) + lostSubordinates, equalSubordinates, aheadSubordinates, cannotReplicateSubordinates, promotedSubordinate, err := inst.RegroupSubordinatesPseudoGTID(instanceKey, false, func(candidateSubordinate *inst.Instance) { fmt.Println(candidateSubordinate.Key.DisplayString()) }, postponedFunctionsContainer) + lostSubordinates = append(lostSubordinates, cannotReplicateSubordinates...) postponedFunctionsContainer.InvokePostponed() - if promotedSlave == nil { - log.Fatalf("Could not regroup slaves of %+v; error: %+v", *instanceKey, err) + if promotedSubordinate == nil { + log.Fatalf("Could not regroup subordinates of %+v; error: %+v", *instanceKey, err) } fmt.Println(fmt.Sprintf("%s lost: %d, trivial: %d, pseudo-gtid: %d", - promotedSlave.Key.DisplayString(), len(lostSlaves), len(equalSlaves), len(aheadSlaves))) + promotedSubordinate.Key.DisplayString(), len(lostSubordinates), len(equalSubordinates), len(aheadSubordinates))) if err != nil { log.Fatale(err) } @@ -577,16 +577,16 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("reset-master-gtid-remove-own-uuid", "Replication, general", `Reset master on instance, remove GTID entries generated by instance`): + case registerCliCommand("reset-main-gtid-remove-own-uuid", "Replication, general", `Reset main on instance, remove GTID entries generated by instance`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) - _, err := inst.ResetMasterGTIDOperation(instanceKey, true, "") + _, err := inst.ResetMainGTIDOperation(instanceKey, true, "") if err != nil { log.Fatale(err) } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("skip-query", "Replication, general", `Skip a single statement on a slave; either when running with GTID or without`): + case registerCliCommand("skip-query", "Replication, general", `Skip a single statement on a subordinate; either when running with GTID or without`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) _, err := inst.SkipQuery(instanceKey) @@ -595,85 +595,85 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("stop-slave", "Replication, general", `Issue a STOP SLAVE on an instance`): + case registerCliCommand("stop-subordinate", "Replication, general", `Issue a STOP SLAVE on an instance`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) - _, err := inst.StopSlave(instanceKey) + _, err := inst.StopSubordinate(instanceKey) if err != nil { log.Fatale(err) } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("start-slave", "Replication, general", `Issue a START SLAVE on an instance`): + case registerCliCommand("start-subordinate", "Replication, general", `Issue a START SLAVE on an instance`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) - _, err := inst.StartSlave(instanceKey) + _, err := inst.StartSubordinate(instanceKey) if err != nil { log.Fatale(err) } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("restart-slave", "Replication, general", `STOP and START SLAVE on an instance`): + case registerCliCommand("restart-subordinate", "Replication, general", `STOP and START SLAVE on an instance`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) - _, err := inst.RestartSlave(instanceKey) + _, err := inst.RestartSubordinate(instanceKey) if err != nil { log.Fatale(err) } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("reset-slave", "Replication, general", `Issues a RESET SLAVE command; use with care`): + case registerCliCommand("reset-subordinate", "Replication, general", `Issues a RESET SLAVE command; use with care`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) - _, err := inst.ResetSlaveOperation(instanceKey) + _, err := inst.ResetSubordinateOperation(instanceKey) if err != nil { log.Fatale(err) } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("detach-slave", "Replication, general", `Stops replication and modifies binlog position into an impossible, yet reversible, value.`): + case registerCliCommand("detach-subordinate", "Replication, general", `Stops replication and modifies binlog position into an impossible, yet reversible, value.`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) - _, err := inst.DetachSlaveOperation(instanceKey) + _, err := inst.DetachSubordinateOperation(instanceKey) if err != nil { log.Fatale(err) } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("reattach-slave", "Replication, general", `Undo a detach-slave operation`): + case registerCliCommand("reattach-subordinate", "Replication, general", `Undo a detach-subordinate operation`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) - _, err := inst.ReattachSlaveOperation(instanceKey) + _, err := inst.ReattachSubordinateOperation(instanceKey) if err != nil { log.Fatale(err) } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("detach-slave-master-host", "Replication, general", `Stops replication and modifies Master_Host into an impossible, yet reversible, value.`): + case registerCliCommand("detach-subordinate-main-host", "Replication, general", `Stops replication and modifies Main_Host into an impossible, yet reversible, value.`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { log.Fatal("Cannot deduce instance:", instance) } - _, err := inst.DetachSlaveMasterHost(instanceKey) + _, err := inst.DetachSubordinateMainHost(instanceKey) if err != nil { log.Fatale(err) } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("reattach-slave-master-host", "Replication, general", `Undo a detach-slave-master-host operation`): + case registerCliCommand("reattach-subordinate-main-host", "Replication, general", `Undo a detach-subordinate-main-host operation`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { log.Fatal("Cannot deduce instance:", instance) } - _, err := inst.ReattachSlaveMasterHost(instanceKey) + _, err := inst.ReattachSubordinateMainHost(instanceKey) if err != nil { log.Fatale(err) } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("master-pos-wait", "Replication, general", `Wait until slave reaches given replication coordinates (--binlog=file:pos)`): + case registerCliCommand("main-pos-wait", "Replication, general", `Wait until subordinate reaches given replication coordinates (--binlog=file:pos)`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { @@ -691,19 +691,19 @@ func Cli(command string, strict bool, instance string, destination string, owner if binlogCoordinates, err = inst.ParseBinlogCoordinates(*config.RuntimeCLIFlags.BinlogFile); err != nil { log.Fatalf("Expecing --binlog argument as file:pos") } - _, err = inst.MasterPosWait(instanceKey, binlogCoordinates) + _, err = inst.MainPosWait(instanceKey, binlogCoordinates) if err != nil { log.Fatale(err) } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("restart-slave-statements", "Replication, general", `Get a list of statements to execute to stop then restore slave to same execution state. Provide --statement for injected statement`): + case registerCliCommand("restart-subordinate-statements", "Replication, general", `Get a list of statements to execute to stop then restore subordinate to same execution state. Provide --statement for injected statement`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { log.Fatalf("Unresolved instance") } - statements, err := inst.GetSlaveRestartPreserveStatements(instanceKey, *config.RuntimeCLIFlags.Statement) + statements, err := inst.GetSubordinateRestartPreserveStatements(instanceKey, *config.RuntimeCLIFlags.Statement) if err != nil { log.Fatale(err) } @@ -899,9 +899,9 @@ func Cli(command string, strict bool, instance string, destination string, owner fmt.Println(strings.Join(clusters, "\n")) } } - case registerCliCommand("all-clusters-masters", "Information", `List of writeable masters, one per cluster`): + case registerCliCommand("all-clusters-mains", "Information", `List of writeable mains, one per cluster`): { - instances, err := inst.ReadWriteableClustersMasters() + instances, err := inst.ReadWriteableClustersMains() if err != nil { log.Fatale(err) } else { @@ -934,7 +934,7 @@ func Cli(command string, strict bool, instance string, destination string, owner { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { - log.Fatalf("Unable to get master: unresolved instance") + log.Fatalf("Unable to get main: unresolved instance") } instance := validateInstanceIsFound(instanceKey) fmt.Println(instance.Key.DisplayString()) @@ -962,17 +962,17 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("which-cluster-master", "Information", `Output the name of the master in a given cluster`): + case registerCliCommand("which-cluster-main", "Information", `Output the name of the main in a given cluster`): { clusterName := getClusterName(clusterAlias, instanceKey) - masters, err := inst.ReadClusterWriteableMaster(clusterName) + mains, err := inst.ReadClusterWriteableMain(clusterName) if err != nil { log.Fatale(err) } - if len(masters) == 0 { - log.Fatalf("No writeable masters found for cluster %+v", clusterName) + if len(mains) == 0 { + log.Fatalf("No writeable mains found for cluster %+v", clusterName) } - fmt.Println(masters[0].Key.DisplayString()) + fmt.Println(mains[0].Key.DisplayString()) } case registerCliCommand("which-cluster-instances", "Information", `Output the list of instances participating in same cluster as given instance`): { @@ -985,10 +985,10 @@ func Cli(command string, strict bool, instance string, destination string, owner fmt.Println(clusterInstance.Key.DisplayString()) } } - case registerCliCommand("which-cluster-osc-slaves", "Information", `Output a list of slaves in a cluster, that could serve as a pt-online-schema-change operation control slaves`): + case registerCliCommand("which-cluster-osc-subordinates", "Information", `Output a list of subordinates in a cluster, that could serve as a pt-online-schema-change operation control subordinates`): { clusterName := getClusterName(clusterAlias, instanceKey) - instances, err := inst.GetClusterOSCSlaves(clusterName) + instances, err := inst.GetClusterOSCSubordinates(clusterName) if err != nil { log.Fatale(err) } @@ -996,10 +996,10 @@ func Cli(command string, strict bool, instance string, destination string, owner fmt.Println(clusterInstance.Key.DisplayString()) } } - case registerCliCommand("which-cluster-gh-ost-slaves", "Information", `Output a list of slaves in a cluster, that could serve as a gh-ost working server`): + case registerCliCommand("which-cluster-gh-ost-subordinates", "Information", `Output a list of subordinates in a cluster, that could serve as a gh-ost working server`): { clusterName := getClusterName(clusterAlias, instanceKey) - instances, err := inst.GetClusterGhostSlaves(clusterName) + instances, err := inst.GetClusterGhostSubordinates(clusterName) if err != nil { log.Fatale(err) } @@ -1007,29 +1007,29 @@ func Cli(command string, strict bool, instance string, destination string, owner fmt.Println(clusterInstance.Key.DisplayString()) } } - case registerCliCommand("which-master", "Information", `Output the fully-qualified hostname:port representation of a given instance's master`): + case registerCliCommand("which-main", "Information", `Output the fully-qualified hostname:port representation of a given instance's main`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { - log.Fatalf("Unable to get master: unresolved instance") + log.Fatalf("Unable to get main: unresolved instance") } instance := validateInstanceIsFound(instanceKey) - if instance.MasterKey.IsValid() { - fmt.Println(instance.MasterKey.DisplayString()) + if instance.MainKey.IsValid() { + fmt.Println(instance.MainKey.DisplayString()) } } - case registerCliCommand("which-slaves", "Information", `Output the fully-qualified hostname:port list of slaves of a given instance`): + case registerCliCommand("which-subordinates", "Information", `Output the fully-qualified hostname:port list of subordinates of a given instance`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) if instanceKey == nil { - log.Fatalf("Unable to get slaves: unresolved instance") + log.Fatalf("Unable to get subordinates: unresolved instance") } - slaves, err := inst.ReadSlaveInstances(instanceKey) + subordinates, err := inst.ReadSubordinateInstances(instanceKey) if err != nil { log.Fatale(err) } - for _, slave := range slaves { - fmt.Println(slave.Key.DisplayString()) + for _, subordinate := range subordinates { + fmt.Println(subordinate.Key.DisplayString()) } } case registerCliCommand("which-lost-in-recovery", "Information", `List instances marked as downtimed for being lost in a recovery process`): @@ -1164,34 +1164,34 @@ func Cli(command string, strict bool, instance string, destination string, owner } if recoveryAttempted { if promotedInstanceKey == nil { - log.Fatalf("Recovery attempted yet no slave promoted") + log.Fatalf("Recovery attempted yet no subordinate promoted") } fmt.Println(promotedInstanceKey.DisplayString()) } } - case registerCliCommand("force-master-takeover", "Recovery", `Forcibly discard master and promote another (direct child) instance instead, even if everything is running well`): + case registerCliCommand("force-main-takeover", "Recovery", `Forcibly discard main and promote another (direct child) instance instead, even if everything is running well`): { clusterName := getClusterName(clusterAlias, instanceKey) if destinationKey == nil { - log.Fatal("Cannot deduce destination, the instance to promote in place of the master. Please provide with -d") + log.Fatal("Cannot deduce destination, the instance to promote in place of the main. Please provide with -d") } destination := validateInstanceIsFound(destinationKey) - topologyRecovery, err := logic.ForceMasterTakeover(clusterName, destination) + topologyRecovery, err := logic.ForceMainTakeover(clusterName, destination) if err != nil { log.Fatale(err) } fmt.Println(topologyRecovery.SuccessorKey.DisplayString()) } - case registerCliCommand("graceful-master-takeover", "Recovery", `Gracefully discard master and promote another (direct child) instance instead, even if everything is running well`): + case registerCliCommand("graceful-main-takeover", "Recovery", `Gracefully discard main and promote another (direct child) instance instead, even if everything is running well`): { clusterName := getClusterName(clusterAlias, instanceKey) - topologyRecovery, promotedMasterCoordinates, err := logic.GracefulMasterTakeover(clusterName) + topologyRecovery, promotedMainCoordinates, err := logic.GracefulMainTakeover(clusterName) if err != nil { log.Fatale(err) } fmt.Println(topologyRecovery.SuccessorKey.DisplayString()) - fmt.Println(*promotedMasterCoordinates) - log.Debugf("Promoted %+v as new master. Binlog coordinates at time of promotion: %+v", topologyRecovery.SuccessorKey, *promotedMasterCoordinates) + fmt.Println(*promotedMainCoordinates) + log.Debugf("Promoted %+v as new main. Binlog coordinates at time of promotion: %+v", topologyRecovery.SuccessorKey, *promotedMainCoordinates) } case registerCliCommand("replication-analysis", "Recovery", `Request an analysis of potential crash incidents in all known topologies`): { @@ -1229,7 +1229,7 @@ func Cli(command string, strict bool, instance string, destination string, owner fmt.Println(fmt.Sprintf("%d recoveries acknowldged", countRecoveries)) } // Instance meta - case registerCliCommand("register-candidate", "Instance, meta", `Indicate that a specific instance is a preferred candidate for master promotion`): + case registerCliCommand("register-candidate", "Instance, meta", `Indicate that a specific instance is a preferred candidate for main promotion`): { instanceKey = deduceInstanceKeyIfNeeded(instance, instanceKey, true) promotionRule, err := inst.ParseCandidatePromotionRule(*config.RuntimeCLIFlags.PromotionRule) @@ -1260,7 +1260,7 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("set-heuristic-domain-instance", "Instance, meta", `Associate domain name of given cluster with what seems to be the writer master for that cluster`): + case registerCliCommand("set-heuristic-domain-instance", "Instance, meta", `Associate domain name of given cluster with what seems to be the writer main for that cluster`): { clusterName := getClusterName(clusterAlias, instanceKey) instanceKey, err := inst.HeuristicallyApplyClusterDomainInstanceAttribute(clusterName) diff --git a/go/cmd/orchestrator/main.go b/go/cmd/orchestrator/main.go index ed19ddd8..e9b89052 100644 --- a/go/cmd/orchestrator/main.go +++ b/go/cmd/orchestrator/main.go @@ -60,148 +60,148 @@ Cheatsheet: Different flags are required for different commands; see specific documentation per commmand. Topology refactoring, generic aka "smart" commands - These operations let orchestrator pick the best course of action for relocating slaves. It may choose to use + These operations let orchestrator pick the best course of action for relocating subordinates. It may choose to use standard binlog file:pos math, GTID, Pseudo-GTID, or take advantage of binlog servers, or combine two or more methods in a multi-step operation. - In case a of a multi-step operation, failure may result in slaves only moving halfway to destination point. Nonetheless + In case a of a multi-step operation, failure may result in subordinates only moving halfway to destination point. Nonetheless they will be in a valid position. relocate - Relocate a slave beneath another (destination) instance. The choice of destination is almost arbitrary; - it must not be a child/descendant of the instance, but otherwise it can be anywhere, and can be a normal slave - or a binlog server. Orchestrator will choose the best course of action to relocate the slave. - No action taken when destination instance cannot act as master (e.g. has no binary logs, is of incompatible version, incompatible binlog format etc.) + Relocate a subordinate beneath another (destination) instance. The choice of destination is almost arbitrary; + it must not be a child/descendant of the instance, but otherwise it can be anywhere, and can be a normal subordinate + or a binlog server. Orchestrator will choose the best course of action to relocate the subordinate. + No action taken when destination instance cannot act as main (e.g. has no binary logs, is of incompatible version, incompatible binlog format etc.) Examples: - orchestrator -c relocate -i slave.to.relocate.com -d instance.that.becomes.its.master + orchestrator -c relocate -i subordinate.to.relocate.com -d instance.that.becomes.its.main - orchestrator -c relocate -d destination.instance.that.becomes.its.master + orchestrator -c relocate -d destination.instance.that.becomes.its.main -i not given, implicitly assumed local hostname (this command was previously named "relocate-below") - relocate-slaves - Relocates all or part of the slaves of a given instance under another (destination) instance. This is - typically much faster than relocating slaves one by one. - Orchestrator chooses the best course of action to relocation the slaves. It may choose a multi-step operations. - Some slaves may succeed and some may fail the operation. - The instance (slaves' master) itself may be crashed or inaccessible. It is not contacted throughout the operation. + relocate-subordinates + Relocates all or part of the subordinates of a given instance under another (destination) instance. This is + typically much faster than relocating subordinates one by one. + Orchestrator chooses the best course of action to relocation the subordinates. It may choose a multi-step operations. + Some subordinates may succeed and some may fail the operation. + The instance (subordinates' main) itself may be crashed or inaccessible. It is not contacted throughout the operation. Examples: - orchestrator -c relocate-slaves -i instance.whose.slaves.will.relocate -d instance.that.becomes.their.master + orchestrator -c relocate-subordinates -i instance.whose.subordinates.will.relocate -d instance.that.becomes.their.main - orchestrator -c relocate-slaves -i instance.whose.slaves.will.relocate -d instance.that.becomes.their.master --pattern=regexp.filter + orchestrator -c relocate-subordinates -i instance.whose.subordinates.will.relocate -d instance.that.becomes.their.main --pattern=regexp.filter only apply to those instances that match given regex Topology refactoring using classic MySQL replication commands (ie STOP SLAVE; START SLAVE UNTIL; CHANGE MASTER TO; ...) - These commands require connected topology: slaves that are up and running; a lagging, stopped or - failed slave will disable use of most these commands. At least one, and typically two or more slaves + These commands require connected topology: subordinates that are up and running; a lagging, stopped or + failed subordinate will disable use of most these commands. At least one, and typically two or more subordinates will be stopped for a short time during these operations. move-up - Move a slave one level up the topology; makes it replicate from its grandparent and become sibling of - its parent. It is OK if the instance's master is not replicating. Examples: + Move a subordinate one level up the topology; makes it replicate from its grandparent and become sibling of + its parent. It is OK if the instance's main is not replicating. Examples: - orchestrator -c move-up -i slave.to.move.up.com:3306 + orchestrator -c move-up -i subordinate.to.move.up.com:3306 orchestrator -c move-up -i not given, implicitly assumed local hostname - move-up-slaves - Moves slaves of the given instance one level up the topology, making them siblings of given instance. - This is a (faster) shortcut to executing move-up on all slaves of given instance. + move-up-subordinates + Moves subordinates of the given instance one level up the topology, making them siblings of given instance. + This is a (faster) shortcut to executing move-up on all subordinates of given instance. Examples: - orchestrator -c move-up-slaves -i slave.whose.subslaves.will.move.up.com[:3306] + orchestrator -c move-up-subordinates -i subordinate.whose.subsubordinates.will.move.up.com[:3306] - orchestrator -c move-up-slaves -i slave.whose.subslaves.will.move.up.com[:3306] --pattern=regexp.filter + orchestrator -c move-up-subordinates -i subordinate.whose.subsubordinates.will.move.up.com[:3306] --pattern=regexp.filter only apply to those instances that match given regex move-below - Moves a slave beneath its sibling. Both slaves must be actively replicating from same master. - The sibling will become instance's master. No action taken when sibling cannot act as master + Moves a subordinate beneath its sibling. Both subordinates must be actively replicating from same main. + The sibling will become instance's main. No action taken when sibling cannot act as main (e.g. has no binary logs, is of incompatible version, incompatible binlog format etc.) Example: - orchestrator -c move-below -i slave.to.move.com -d sibling.slave.under.which.to.move.com + orchestrator -c move-below -i subordinate.to.move.com -d sibling.subordinate.under.which.to.move.com - orchestrator -c move-below -d sibling.slave.under.which.to.move.com + orchestrator -c move-below -d sibling.subordinate.under.which.to.move.com -i not given, implicitly assumed local hostname move-equivalent - Moves a slave beneath another server, based on previously recorded "equivalence coordinates". Such coordinates - are obtained whenever orchestrator issues a CHANGE MASTER TO. The "before" and "after" masters coordinates are - persisted. In such cases where the newly relocated slave is unable to replicate (e.g. firewall issues) it is then + Moves a subordinate beneath another server, based on previously recorded "equivalence coordinates". Such coordinates + are obtained whenever orchestrator issues a CHANGE MASTER TO. The "before" and "after" mains coordinates are + persisted. In such cases where the newly relocated subordinate is unable to replicate (e.g. firewall issues) it is then easy to revert the relocation via "move-equivalent". - The command works if and only if orchestrator has an exact mapping between the slave's current replication coordinates + The command works if and only if orchestrator has an exact mapping between the subordinate's current replication coordinates and some other coordinates. Example: - orchestrator -c move-equivalent -i slave.to.revert.master.position.com -d master.to.move.to.com + orchestrator -c move-equivalent -i subordinate.to.revert.main.position.com -d main.to.move.to.com - enslave-siblings - Turn all siblings of a slave into its sub-slaves. No action taken for siblings that cannot become - slaves of given instance (e.g. incompatible versions, binlog format etc.). This is a (faster) shortcut + ensubordinate-siblings + Turn all siblings of a subordinate into its sub-subordinates. No action taken for siblings that cannot become + subordinates of given instance (e.g. incompatible versions, binlog format etc.). This is a (faster) shortcut to executing move-below for all siblings of the given instance. Example: - orchestrator -c enslave-siblings -i slave.whose.siblings.will.move.below.com + orchestrator -c ensubordinate-siblings -i subordinate.whose.siblings.will.move.below.com - enslave-master - Turn an instance into a master of its own master; essentially switch the two. Slaves of each of the two + ensubordinate-main + Turn an instance into a main of its own main; essentially switch the two. Subordinates of each of the two involved instances are unaffected, and continue to replicate as they were. - The instance's master must itself be a slave. It does not necessarily have to be actively replicating. + The instance's main must itself be a subordinate. It does not necessarily have to be actively replicating. - orchestrator -c enslave-master -i slave.that.will.switch.places.with.its.master.com + orchestrator -c ensubordinate-main -i subordinate.that.will.switch.places.with.its.main.com repoint Make the given instance replicate from another instance without changing the binglog coordinates. There - are little sanity checks to this and this is a risky operation. Use cases are: a rename of the master's + are little sanity checks to this and this is a risky operation. Use cases are: a rename of the main's host, a corruption in relay-logs, move from beneath MaxScale & Binlog-server. Examples: - orchestrator -c repoint -i slave.to.operate.on.com -d new.master.com + orchestrator -c repoint -i subordinate.to.operate.on.com -d new.main.com - orchestrator -c repoint -i slave.to.operate.on.com - The above will repoint the slave back to its existing master without change + orchestrator -c repoint -i subordinate.to.operate.on.com + The above will repoint the subordinate back to its existing main without change orchestrator -c repoint -i not given, implicitly assumed local hostname - repoint-slaves - Repoint all slaves of given instance to replicate back from the instance. This is a convenience method - which implies a one-by-one "repoint" command on each slave. + repoint-subordinates + Repoint all subordinates of given instance to replicate back from the instance. This is a convenience method + which implies a one-by-one "repoint" command on each subordinate. - orchestrator -c repoint-slaves -i instance.whose.slaves.will.be.repointed.com + orchestrator -c repoint-subordinates -i instance.whose.subordinates.will.be.repointed.com - orchestrator -c repoint-slaves + orchestrator -c repoint-subordinates -i not given, implicitly assumed local hostname - make-co-master - Create a master-master replication. Given instance is a slave which replicates directly from a master. - The master is then turned to be a slave of the instance. The master is expected to not be a slave. + make-co-main + Create a main-main replication. Given instance is a subordinate which replicates directly from a main. + The main is then turned to be a subordinate of the instance. The main is expected to not be a subordinate. The read_only property of the slve is unaffected by this operation. Examples: - orchestrator -c make-co-master -i slave.to.turn.into.co.master.com + orchestrator -c make-co-main -i subordinate.to.turn.into.co.main.com - orchestrator -c make-co-master + orchestrator -c make-co-main -i not given, implicitly assumed local hostname - get-candidate-slave - Information command suggesting the most up-to-date slave of a given instance, which can be promoted - as local master to its siblings. If replication is up and running, this command merely gives an - estimate, since slaves advance and progress continuously in different pace. If all slaves of given + get-candidate-subordinate + Information command suggesting the most up-to-date subordinate of a given instance, which can be promoted + as local main to its siblings. If replication is up and running, this command merely gives an + estimate, since subordinates advance and progress continuously in different pace. If all subordinates of given instance have broken replication (e.g. because given instance is dead), then this command provides - with a definitve candidate, which could act as a replace master. See also regroup-slaves. Example: + with a definitve candidate, which could act as a replace main. See also regroup-subordinates. Example: - orchestrator -c get-candidate-slave -i instance.with.slaves.one.of.which.may.be.candidate.com + orchestrator -c get-candidate-subordinate -i instance.with.subordinates.one.of.which.may.be.candidate.com - regroup-slaves-bls - Given an instance that has Binlog Servers for slaves, promote one such Binlog Server over its other + regroup-subordinates-bls + Given an instance that has Binlog Servers for subordinates, promote one such Binlog Server over its other Binlog Server siblings. Example: - orchestrator -c regroup-slaves-bls -i instance.with.binlog.server.slaves.com + orchestrator -c regroup-subordinates-bls -i instance.with.binlog.server.subordinates.com --debug is your friend. @@ -210,120 +210,120 @@ Cheatsheet: These operations only work if GTID (either Oracle or MariaDB variants) is enabled on your servers. move-gtid - Move a slave beneath another (destination) instance. Orchestrator will reject the operation if GTID is - not enabled on the slave, or is not supported by the would-be master. - You may try and move the slave under any other instance; there are no constraints on the family ties the + Move a subordinate beneath another (destination) instance. Orchestrator will reject the operation if GTID is + not enabled on the subordinate, or is not supported by the would-be main. + You may try and move the subordinate under any other instance; there are no constraints on the family ties the two may have, though you should be careful as not to try and replicate from a descendant (making an impossible loop). Examples: - orchestrator -c move-gtid -i slave.to.move.com -d instance.that.becomes.its.master + orchestrator -c move-gtid -i subordinate.to.move.com -d instance.that.becomes.its.main - orchestrator -c match -d destination.instance.that.becomes.its.master + orchestrator -c match -d destination.instance.that.becomes.its.main -i not given, implicitly assumed local hostname - move-slaves-gtid - Moves all slaves of a given instance under another (destination) instance using GTID. This is a (faster) - shortcut to moving each slave via "move-gtid". - Orchestrator will only move those slaves configured with GTID (either Oracle or MariaDB variants) and under the - condition the would-be master supports GTID. + move-subordinates-gtid + Moves all subordinates of a given instance under another (destination) instance using GTID. This is a (faster) + shortcut to moving each subordinate via "move-gtid". + Orchestrator will only move those subordinates configured with GTID (either Oracle or MariaDB variants) and under the + condition the would-be main supports GTID. Examples: - orchestrator -c move-slaves-gtid -i instance.whose.slaves.will.relocate -d instance.that.becomes.their.master + orchestrator -c move-subordinates-gtid -i instance.whose.subordinates.will.relocate -d instance.that.becomes.their.main - orchestrator -c move-slaves-gtid -i instance.whose.slaves.will.relocate -d instance.that.becomes.their.master --pattern=regexp.filter + orchestrator -c move-subordinates-gtid -i instance.whose.subordinates.will.relocate -d instance.that.becomes.their.main --pattern=regexp.filter only apply to those instances that match given regex - regroup-slaves-gtid - Given an instance (possibly a crashed one; it is never being accessed), pick one of its slave and make it - local master of its siblings, using GTID. The rules are similar to those in the "regroup-slaves" command. + regroup-subordinates-gtid + Given an instance (possibly a crashed one; it is never being accessed), pick one of its subordinate and make it + local main of its siblings, using GTID. The rules are similar to those in the "regroup-subordinates" command. Example: - orchestrator -c regroup-slaves-gtid -i instance.with.gtid.and.slaves.one.of.which.will.turn.local.master.if.possible + orchestrator -c regroup-subordinates-gtid -i instance.with.gtid.and.subordinates.one.of.which.will.turn.local.main.if.possible --debug is your friend. Topology refactoring using Pseudo-GTID - These operations require that the topology's master is periodically injected with pseudo-GTID, + These operations require that the topology's main is periodically injected with pseudo-GTID, and that the PseudoGTIDPattern configuration is setup accordingly. Also consider setting DetectPseudoGTIDQuery. Operations via Pseudo-GTID are typically slower, since they involve scanning of binary/relay logs. They impose less constraints on topology locations and affect less servers. Only servers that - are being relocateed have their replication stopped. Their masters or destinations are unaffected. + are being relocateed have their replication stopped. Their mains or destinations are unaffected. match - Matches a slave beneath another (destination) instance. The choice of destination is almost arbitrary; + Matches a subordinate beneath another (destination) instance. The choice of destination is almost arbitrary; it must not be a child/descendant of the instance. But otherwise they don't have to be direct siblings, and in fact (if you know what you're doing), they don't actually have to belong to the same topology. The operation expects the relocated instance to be "behind" the destination instance. It only finds out whether this is the case by the end; the operation is cancelled in the event this is not the case. - No action taken when destination instance cannot act as master (e.g. has no binary logs, is of incompatible version, incompatible binlog format etc.) + No action taken when destination instance cannot act as main (e.g. has no binary logs, is of incompatible version, incompatible binlog format etc.) Examples: - orchestrator -c match -i slave.to.relocate.com -d instance.that.becomes.its.master + orchestrator -c match -i subordinate.to.relocate.com -d instance.that.becomes.its.main - orchestrator -c match -d destination.instance.that.becomes.its.master + orchestrator -c match -d destination.instance.that.becomes.its.main -i not given, implicitly assumed local hostname (this command was previously named "match-below") - match-slaves - Matches all slaves of a given instance under another (destination) instance. This is a (faster) shortcut - to matching said slaves one by one under the destination instance. In fact, this bulk operation is highly - optimized and can execute in orders of magnitue faster, depeding on the nu,ber of slaves involved and their - respective position behind the instance (the more slaves, the more savings). + match-subordinates + Matches all subordinates of a given instance under another (destination) instance. This is a (faster) shortcut + to matching said subordinates one by one under the destination instance. In fact, this bulk operation is highly + optimized and can execute in orders of magnitue faster, depeding on the nu,ber of subordinates involved and their + respective position behind the instance (the more subordinates, the more savings). The instance itself may be crashed or inaccessible. It is not contacted throughout the operation. Examples: - orchestrator -c match-slaves -i instance.whose.slaves.will.relocate -d instance.that.becomes.their.master + orchestrator -c match-subordinates -i instance.whose.subordinates.will.relocate -d instance.that.becomes.their.main - orchestrator -c match-slaves -i instance.whose.slaves.will.relocate -d instance.that.becomes.their.master --pattern=regexp.filter + orchestrator -c match-subordinates -i instance.whose.subordinates.will.relocate -d instance.that.becomes.their.main --pattern=regexp.filter only apply to those instances that match given regex - (this command was previously named "multi-match-slaves") + (this command was previously named "multi-match-subordinates") match-up - Transport the slave one level up the hierarchy, making it child of its grandparent. This is - similar in essence to move-up, only based on Pseudo-GTID. The master of the given instance + Transport the subordinate one level up the hierarchy, making it child of its grandparent. This is + similar in essence to move-up, only based on Pseudo-GTID. The main of the given instance does not need to be alive or connected (and could in fact be crashed). It is never contacted. Grandparent instance must be alive and accessible. Examples: - orchestrator -c match-up -i slave.to.match.up.com:3306 + orchestrator -c match-up -i subordinate.to.match.up.com:3306 orchestrator -c match-up -i not given, implicitly assumed local hostname - match-up-slaves - Matches slaves of the given instance one level up the topology, making them siblings of given instance. - This is a (faster) shortcut to executing match-up on all slaves of given instance. The instance need + match-up-subordinates + Matches subordinates of the given instance one level up the topology, making them siblings of given instance. + This is a (faster) shortcut to executing match-up on all subordinates of given instance. The instance need not be alive / accessib;e / functional. It can be crashed. Example: - orchestrator -c match-up-slaves -i slave.whose.subslaves.will.match.up.com + orchestrator -c match-up-subordinates -i subordinate.whose.subsubordinates.will.match.up.com - orchestrator -c match-up-slaves -i slave.whose.subslaves.will.match.up.com[:3306] --pattern=regexp.filter + orchestrator -c match-up-subordinates -i subordinate.whose.subsubordinates.will.match.up.com[:3306] --pattern=regexp.filter only apply to those instances that match given regex rematch - Reconnect a slave onto its master, via PSeudo-GTID. The use case for this operation is a non-crash-safe - replication configuration (e.g. MySQL 5.5) with sync_binlog=1 and log_slave_updates. This operation - implies crash-safe-replication and makes it possible for the slave to reconnect. Example: + Reconnect a subordinate onto its main, via PSeudo-GTID. The use case for this operation is a non-crash-safe + replication configuration (e.g. MySQL 5.5) with sync_binlog=1 and log_subordinate_updates. This operation + implies crash-safe-replication and makes it possible for the subordinate to reconnect. Example: - orchestrator -c rematch -i slave.to.rematch.under.its.master + orchestrator -c rematch -i subordinate.to.rematch.under.its.main - regroup-slaves - Given an instance (possibly a crashed one; it is never being accessed), pick one of its slave and make it - local master of its siblings, using Pseudo-GTID. It is uncertain that there *is* a slave that will be able to - become master to all its siblings. But if there is one, orchestrator will pick such one. There are many - constraints, most notably the replication positions of all slaves, whether they use log_slave_updates, and + regroup-subordinates + Given an instance (possibly a crashed one; it is never being accessed), pick one of its subordinate and make it + local main of its siblings, using Pseudo-GTID. It is uncertain that there *is* a subordinate that will be able to + become main to all its siblings. But if there is one, orchestrator will pick such one. There are many + constraints, most notably the replication positions of all subordinates, whether they use log_subordinate_updates, and otherwise version compatabilities etc. - As many slaves that can be regrouped under promoted slves are operated on. The rest are untouched. - This command is useful in the event of a crash. For example, in the event that a master dies, this operation + As many subordinates that can be regrouped under promoted slves are operated on. The rest are untouched. + This command is useful in the event of a crash. For example, in the event that a main dies, this operation can promote a candidate replacement and set up the remaining topology to correctly replicate from that - replacement slave. Example: + replacement subordinate. Example: - orchestrator -c regroup-slaves -i instance.with.slaves.one.of.which.will.turn.local.master.if.possible + orchestrator -c regroup-subordinates -i instance.with.subordinates.one.of.which.will.turn.local.main.if.possible --debug is your friend. @@ -335,89 +335,89 @@ Cheatsheet: Replication is stopped for a short duration so as to reconfigure as GTID. In case of error replication remains stopped. Example: - orchestrator -c enable-gtid -i slave.compatible.with.gtid.com + orchestrator -c enable-gtid -i subordinate.compatible.with.gtid.com disable-gtid - Assuming slave replicates via GTID, disable GTID replication and resume standard file:pos replication. Example: + Assuming subordinate replicates via GTID, disable GTID replication and resume standard file:pos replication. Example: - orchestrator -c disable-gtid -i slave.replicating.via.gtid.com + orchestrator -c disable-gtid -i subordinate.replicating.via.gtid.com - reset-master-gtid-remove-own-uuid - Assuming GTID is enabled, Reset master on instance, remove GTID entries generated by the instance. - This operation is only allowed on Oracle-GTID enabled servers that have no slaves. - Is is used for cleaning up the GTID mess incurred by mistakenly issuing queries on the slave (even such + reset-main-gtid-remove-own-uuid + Assuming GTID is enabled, Reset main on instance, remove GTID entries generated by the instance. + This operation is only allowed on Oracle-GTID enabled servers that have no subordinates. + Is is used for cleaning up the GTID mess incurred by mistakenly issuing queries on the subordinate (even such queries as "FLUSH ENGINE LOGS" that happen to write to binary logs). Example: - orchestrator -c reset-master-gtid-remove-own-uuid -i slave.running.with.gtid.com + orchestrator -c reset-main-gtid-remove-own-uuid -i subordinate.running.with.gtid.com - stop-slave + stop-subordinate Issues a STOP SLAVE; command. Example: - orchestrator -c stop-slave -i slave.to.be.stopped.com + orchestrator -c stop-subordinate -i subordinate.to.be.stopped.com - start-slave + start-subordinate Issues a START SLAVE; command. Example: - orchestrator -c start-slave -i slave.to.be.started.com + orchestrator -c start-subordinate -i subordinate.to.be.started.com - restart-slave + restart-subordinate Issues STOP SLAVE + START SLAVE; Example: - orchestrator -c restart-slave -i slave.to.be.started.com + orchestrator -c restart-subordinate -i subordinate.to.be.started.com skip-query - On a failed replicating slave, skips a single query and attempts to resume replication. + On a failed replicating subordinate, skips a single query and attempts to resume replication. Only applies when the replication seems to be broken on SQL thread (e.g. on duplicate key error). Also works in GTID mode. Example: - orchestrator -c skip-query -i slave.with.broken.sql.thread.com + orchestrator -c skip-query -i subordinate.with.broken.sql.thread.com - reset-slave + reset-subordinate Issues a RESET SLAVE command. Destructive to replication. Example: - orchestrator -c reset-slave -i slave.to.reset.com + orchestrator -c reset-subordinate -i subordinate.to.reset.com - detach-slave + detach-subordinate Stops replication and modifies binlog position into an impossible, yet reversible, value. - This effectively means the replication becomes broken. See reattach-slave. Example: + This effectively means the replication becomes broken. See reattach-subordinate. Example: - orchestrator -c detach-slave -i slave.whose.replication.will.break.com + orchestrator -c detach-subordinate -i subordinate.whose.replication.will.break.com - Issuing this on an already detached slave will do nothing. + Issuing this on an already detached subordinate will do nothing. - reattach-slave - Undo a detach-slave operation. Reverses the binlog change into the original values, and + reattach-subordinate + Undo a detach-subordinate operation. Reverses the binlog change into the original values, and resumes replication. Example: - orchestrator -c reattach-slave -i detahced.slave.whose.replication.will.amend.com + orchestrator -c reattach-subordinate -i detahced.subordinate.whose.replication.will.amend.com - Issuing this on an attached (i.e. normal) slave will do nothing. + Issuing this on an attached (i.e. normal) subordinate will do nothing. - detach-slave-master-host - Stops replication and modifies Master_Host into an impossible, yet reversible, value. - This effectively means the replication becomes broken. See reattach-slave-master-host. Example: + detach-subordinate-main-host + Stops replication and modifies Main_Host into an impossible, yet reversible, value. + This effectively means the replication becomes broken. See reattach-subordinate-main-host. Example: - orchestrator -c detach-slave-master-host -i slave.whose.replication.will.break.com + orchestrator -c detach-subordinate-main-host -i subordinate.whose.replication.will.break.com - Issuing this on an already detached slave will do nothing. + Issuing this on an already detached subordinate will do nothing. - reattach-slave-master-host - Undo a detach-slave-master-host operation. Reverses the hostname change into the original value, and + reattach-subordinate-main-host + Undo a detach-subordinate-main-host operation. Reverses the hostname change into the original value, and resumes replication. Example: - orchestrator -c reattach-slave-master-host -i detahced.slave.whose.replication.will.amend.com + orchestrator -c reattach-subordinate-main-host -i detahced.subordinate.whose.replication.will.amend.com - Issuing this on an attached (i.e. normal) slave will do nothing. + Issuing this on an attached (i.e. normal) subordinate will do nothing. - restart-slave-statements - Prints a list of statements to execute to stop then restore slave to same execution state. + restart-subordinate-statements + Prints a list of statements to execute to stop then restore subordinate to same execution state. Provide --statement for injected statement. - This is useful for issuing a command that can only be executed whiel slave is stopped. Such + This is useful for issuing a command that can only be executed whiel subordinate is stopped. Such commands are any of CHANGE MASTER TO. Orchestrator will not execute given commands, only print them as courtesy. It may not have the privileges to execute them in the first place. Example: - orchestrator -c restart-slave-statements -i some.slave.com -statement="change master to master_heartbeat_period=5" + orchestrator -c restart-subordinate-statements -i some.subordinate.com -statement="change main to main_heartbeat_period=5" General instance commands Applying general instance configuration and state @@ -533,20 +533,20 @@ Cheatsheet: clusters List all clusters known to orchestrator. A cluster (aka topology, aka chain) is identified by its - master (or one of its master if more than one exists). Example: + main (or one of its main if more than one exists). Example: orchesrtator -c clusters -i not given, implicitly assumed local hostname - all-clusters-masters - List of writeable masters, one per cluster. - For most single-master topologies, this is trivially the master. - For active-active master-master topologies, this ensures only one of - the masters is returned. + all-clusters-mains + List of writeable mains, one per cluster. + For most single-main topologies, this is trivially the main. + For active-active main-main topologies, this ensures only one of + the mains is returned. Example: - orchestrator -c all-clusters-masters + orchestrator -c all-clusters-mains topology Show an ascii-graph of a replication topology, given a member of that topology. Example: @@ -619,38 +619,38 @@ Cheatsheet: Detects the domain name for given cluster, reads from key-value store the writer host associated with the domain name. orchestrator -c which-heuristic-domain-instance -i instance.of.some.cluster - Cluster is inferred by a member instance (the instance is not necessarily the master) + Cluster is inferred by a member instance (the instance is not necessarily the main) - which-cluster-master - Output the name of the active master in a given cluster, indicated by instance or alias. - An "active" master is one that is writable and is not marked as downtimed due to a topology recovery. + which-cluster-main + Output the name of the active main in a given cluster, indicated by instance or alias. + An "active" main is one that is writable and is not marked as downtimed due to a topology recovery. Examples: - orchestrator -c which-cluster-master -i instance.to.check.com + orchestrator -c which-cluster-main -i instance.to.check.com - orchestrator -c which-cluster-master + orchestrator -c which-cluster-main -i not given, implicitly assumed local hostname - orchestrator -c which-cluster-master -alias some_alias + orchestrator -c which-cluster-main -alias some_alias assuming some_alias is a known cluster alias (see ClusterNameToAlias or DetectClusterAliasQuery configuration) - which-cluster-osc-slaves - Output a list of slaves in same cluster as given instance, that would server as good candidates as control slaves + which-cluster-osc-subordinates + Output a list of subordinates in same cluster as given instance, that would server as good candidates as control subordinates for a pt-online-schema-change operation. - Those slaves would be used for replication delay so as to throtthe osc operation. Selected slaves will include, - where possible: intermediate masters, their slaves, 3rd level slaves, direct non-intermediate-master slaves. + Those subordinates would be used for replication delay so as to throtthe osc operation. Selected subordinates will include, + where possible: intermediate mains, their subordinates, 3rd level subordinates, direct non-intermediate-main subordinates. - orchestrator -c which-cluster-osc-slaves -i instance.to.check.com + orchestrator -c which-cluster-osc-subordinates -i instance.to.check.com - orchestrator -c which-cluster-osc-slaves + orchestrator -c which-cluster-osc-subordinates -i not given, implicitly assumed local hostname - orchestrator -c which-cluster-osc-slaves -alias some_alias + orchestrator -c which-cluster-osc-subordinates -alias some_alias assuming some_alias is a known cluster alias (see ClusterNameToAlias or DetectClusterAliasQuery configuration) which-lost-in-recovery List instances marked as downtimed for being lost in a recovery process. This depends on the configuration - of MasterFailoverLostInstancesDowntimeMinutes. The output of this command lists heuristically recent + of MainFailoverLostInstancesDowntimeMinutes. The output of this command lists heuristically recent "lost" instances that probabaly should be recycled. Note that when the 'downtime' flag expires (or is reset by '-c end-downtime') an instance no longer appears on this list. The topology recovery process injects a magic hint when downtiming lost instances, that is picked up @@ -659,29 +659,29 @@ Cheatsheet: orchestrator -c which-lost-in-recovery Lists all heuristically-recent known lost instances - which-master - Output the fully-qualified hostname:port representation of a given instance's master. Examples: + which-main + Output the fully-qualified hostname:port representation of a given instance's main. Examples: - orchestrator -c which-master -i a.known.slave.com + orchestrator -c which-main -i a.known.subordinate.com - orchestrator -c which-master + orchestrator -c which-main -i not given, implicitly assumed local hostname - which-slaves - Output the fully-qualified hostname:port list of slaves (one per line) of a given instance (or empty - list if instance is not a master to anyone). Examples: + which-subordinates + Output the fully-qualified hostname:port list of subordinates (one per line) of a given instance (or empty + list if instance is not a main to anyone). Examples: - orchestrator -c which-slaves -i a.known.instance.com + orchestrator -c which-subordinates -i a.known.instance.com - orchestrator -c which-slaves + orchestrator -c which-subordinates -i not given, implicitly assumed local hostname get-cluster-heuristic-lag For a given cluster (indicated by an instance or alias), output a heuristic "representative" lag of that cluster. - The output is obtained by examining the slaves that are member of "which-cluster-osc-slaves"-command, and - getting the maximum slave lag of those slaves. Recall that those slaves are a subset of the entire cluster, + The output is obtained by examining the subordinates that are member of "which-cluster-osc-subordinates"-command, and + getting the maximum subordinate lag of those subordinates. Recall that those subordinates are a subset of the entire cluster, and that they are ebing polled periodically. Hence the output of this command is not necessarily up-to-date - and does not represent all slaves in cluster. Examples: + and does not represent all subordinates in cluster. Examples: orchestrator -c get-cluster-heuristic-lag -i instance.that.is.part.of.cluster.com @@ -701,7 +701,7 @@ Cheatsheet: snapshot-topologies Take a snapshot of existing topologies. This will record minimal replication topology data: the identity - of an instance, its master and its cluster. + of an instance, its main and its cluster. Taking a snapshot later allows for reviewing changes in topologies. One might wish to invoke this command on a daily basis, and later be able to solve questions like 'where was this instacne replicating from before we moved it?', 'which instances were replication from this instance a week ago?' etc. Example: @@ -786,7 +786,7 @@ Cheatsheet: recover Do auto-recovery given a dead instance. Orchestrator chooses the best course of action. - The given instance must be acknowledged as dead and have slaves, or else there's nothing to do. + The given instance must be acknowledged as dead and have subordinates, or else there's nothing to do. See "replication-analysis" command. Orchestrator executes external processes as configured by *Processes variables. --debug is your friend. Example: @@ -799,49 +799,49 @@ Cheatsheet: orchestrator -c recover-lite -i dead.instance.com --debug - force-master-takeover - Forcibly discard master and promote another (direct child) instance instead, even if everything is running well. + force-main-takeover + Forcibly discard main and promote another (direct child) instance instead, even if everything is running well. This allows for planned switchover. NOTE: - You must specify the instance to promote via "-d" - - Promoted instance must be a direct child of the existing master - - This will not work in a master-master configuration - - Orchestrator just treats this command as a DeadMaster failover scenario + - Promoted instance must be a direct child of the existing main + - This will not work in a main-main configuration + - Orchestrator just treats this command as a DeadMain failover scenario - It is STRONGLY suggested that you first relocate everything below your chosen instance-to-promote. It *is* a planned failover thing. - Otherwise orchestrator will do its thing in moving instances around, hopefully promoting your requested server on top. - Orchestrator will issue all relevant pre-failover and post-failover external processes. - - In this command orchestrator will not issue 'SET GLOBAL read_only=1' on the existing master, nor will - it issue a 'FLUSH TABLES WITH READ LOCK'. Please see the 'graceful-master-takeover' command. + - In this command orchestrator will not issue 'SET GLOBAL read_only=1' on the existing main, nor will + it issue a 'FLUSH TABLES WITH READ LOCK'. Please see the 'graceful-main-takeover' command. Examples: - orchestrator -c force-master-takeover -alias mycluster -d immediate.child.of.master.com - Indicate cluster by alias. Orchestrator automatically figures out the master + orchestrator -c force-main-takeover -alias mycluster -d immediate.child.of.main.com + Indicate cluster by alias. Orchestrator automatically figures out the main - orchestrator -c force-master-takeover -i instance.in.relevant.cluster.com -d immediate.child.of.master.com - Indicate cluster by an instance. You don't structly need to specify the master, orchestrator - will infer the master's identify. + orchestrator -c force-main-takeover -i instance.in.relevant.cluster.com -d immediate.child.of.main.com + Indicate cluster by an instance. You don't structly need to specify the main, orchestrator + will infer the main's identify. - graceful-master-takeover - Gracefully discard master and promote another (direct child) instance instead, even if everything is running well. + graceful-main-takeover + Gracefully discard main and promote another (direct child) instance instead, even if everything is running well. This allows for planned switchover. NOTE: - - Promoted instance must be a direct child of the existing master - - Promoted instance must be the *only* direct child of the existing master. It *is* a planned failover thing. - - Orchestrator will first issue a "set global read_only=1" on existing master - - It will promote candidate master to the binlog positions of the existing master after issuing the above - - There _could_ still be statements issued and executed on the existing master by SUPER users, but those are ignored. - - Orchestrator then proceeds to handle a DeadMaster failover scenario + - Promoted instance must be a direct child of the existing main + - Promoted instance must be the *only* direct child of the existing main. It *is* a planned failover thing. + - Orchestrator will first issue a "set global read_only=1" on existing main + - It will promote candidate main to the binlog positions of the existing main after issuing the above + - There _could_ still be statements issued and executed on the existing main by SUPER users, but those are ignored. + - Orchestrator then proceeds to handle a DeadMain failover scenario - Orchestrator will issue all relevant pre-failover and post-failover external processes. Examples: - orchestrator -c graceful-master-takeover -alias mycluster - Indicate cluster by alias. Orchestrator automatically figures out the master and verifies it has a single direct replica + orchestrator -c graceful-main-takeover -alias mycluster + Indicate cluster by alias. Orchestrator automatically figures out the main and verifies it has a single direct replica - orchestrator -c force-master-takeover -i instance.in.relevant.cluster.com - Indicate cluster by an instance. You don't structly need to specify the master, orchestrator - will infer the master's identify. + orchestrator -c force-main-takeover -i instance.in.relevant.cluster.com + Indicate cluster by an instance. You don't structly need to specify the main, orchestrator + will infer the main's identify. replication-analysis Request an analysis of potential crash incidents in all known topologies. @@ -875,15 +875,15 @@ Cheatsheet: Instance meta commands register-candidate - Indicate that a specific instance is a preferred candidate for master promotion. Upon a dead master + Indicate that a specific instance is a preferred candidate for main promotion. Upon a dead main recovery, orchestrator will do its best to promote instances that are marked as candidates. However orchestrator cannot guarantee this will always work. Issues like version compatabilities, binlog format etc. are limiting factors. - You will want to mark an instance as a candidate when: it is replicating directly from the master, has - binary logs and log_slave_updates is enabled, uses same binlog_format as its siblings, compatible version + You will want to mark an instance as a candidate when: it is replicating directly from the main, has + binary logs and log_subordinate_updates is enabled, uses same binlog_format as its siblings, compatible version as its siblings. If you're using DataCenterPattern & PhysicalEnvironmentPattern (see configuration), you would further wish to make sure you have a candidate in each data center. - Orchestrator first promotes the best-possible slave, and only then replaces it with your candidate, + Orchestrator first promotes the best-possible subordinate, and only then replaces it with your candidate, and only if both in same datcenter and physical enviroment. An instance needs to continuously be marked as candidate, so as to make sure orchestrator is not wasting time with stale instances. Orchestrator periodically clears candidate-registration for instances that have @@ -896,12 +896,12 @@ Cheatsheet: -i not given, implicitly assumed local hostname register-hostname-unresolve - Assigns the given instance a virtual (aka "unresolved") name. When moving slaves under an instance with assigned + Assigns the given instance a virtual (aka "unresolved") name. When moving subordinates under an instance with assigned "unresolve" name, orchestrator issues a CHANGE MASTER TO MASTER_HOST='' ... - This is useful in cases where your master is behind virtual IP (e.g. active/passive masters with shared storage or DRBD, + This is useful in cases where your main is behind virtual IP (e.g. active/passive mains with shared storage or DRBD, e.g. binlog servers sharing common VIP). - A "repoint" command is useful after "register-hostname-unresolve": you can repoint slaves of the instance to their exact - same location, and orchestrator will swap the fqdn of their master with the unresolved name. + A "repoint" command is useful after "register-hostname-unresolve": you can repoint subordinates of the instance to their exact + same location, and orchestrator will swap the fqdn of their main with the unresolved name. Such registration must be periodic. Orchestrator automatically expires such registration after ExpiryHostnameResolvesMinutes. Example: @@ -909,25 +909,25 @@ Cheatsheet: deregister-hostname-unresolve Explicitly deregister/dosassociate a hostname with an "unresolved" name. Orchestrator merely remvoes the association, but does - not touch any slave at this point. A "repoint" command can be useful right after calling this command to change slave's master host - name (assumed to be an "unresolved" name, such as a VIP) with the real fqdn of the master host. + not touch any subordinate at this point. A "repoint" command can be useful right after calling this command to change subordinate's main host + name (assumed to be an "unresolved" name, such as a VIP) with the real fqdn of the main host. Example: orchestrator -c deregister-hostname-unresolve -i instance.fqdn.com set-heuristic-domain-instance This is a temporary (sync your watches, watch for next ice age) command which registers the cluster domain name of a given cluster - with the master/writer host for that cluster. It is a one-time-master-discovery operation. + with the main/writer host for that cluster. It is a one-time-main-discovery operation. At this time orchestrator may also act as a small & simple key-value store (recall the "temporary" indication). - Master failover operations will overwrite the domain instance identity. Orchestrator so turns into a mini master-discovery + Main failover operations will overwrite the domain instance identity. Orchestrator so turns into a mini main-discovery service (I said "TEMPORARY"). Really there are other tools for the job. See also: which-heuristic-domain-instance Example: orchestrator -c set-heuristic-domain-instance --alias some_alias - Detects the domain name for given cluster, identifies the writer master of the cluster, associates the two in key-value store + Detects the domain name for given cluster, identifies the writer main of the cluster, associates the two in key-value store orchestrator -c set-heuristic-domain-instance -i instance.of.some.cluster - Cluster is inferred by a member instance (the instance is not necessarily the master) + Cluster is inferred by a member instance (the instance is not necessarily the main) Misc commands diff --git a/go/config/config.go b/go/config/config.go index 67859634..668c0b83 100644 --- a/go/config/config.go +++ b/go/config/config.go @@ -68,9 +68,9 @@ type Configuration struct { MySQLTopologyReadTimeoutSeconds int // Number of seconds before topology mysql read operation is aborted (driver-side). Used for all but discovery queries. MySQLInterpolateParams bool // Do not use sql prepare statement if true DefaultInstancePort int // In case port was not specified on command line - SlaveLagQuery string // custom query to check on slave lg (e.g. heartbeat table) - SlaveStartPostWaitMilliseconds int // Time to wait after START SLAVE before re-readong instance (give slave chance to connect to master) - DiscoverByShowSlaveHosts bool // Attempt SHOW SLAVE HOSTS before PROCESSLIST + SubordinateLagQuery string // custom query to check on subordinate lg (e.g. heartbeat table) + SubordinateStartPostWaitMilliseconds int // Time to wait after START SLAVE before re-readong instance (give subordinate chance to connect to main) + DiscoverByShowSubordinateHosts bool // Attempt SHOW SLAVE HOSTS before PROCESSLIST InstancePollSeconds uint // Number of seconds between instance reads BufferInstanceWrites bool // Discovery process saves instances in bulk updates. This optimises backend DB load. InstanceWriteBufferSize int // Instance write buffer size (max number of instances to flush in one INSERT ODKU) @@ -96,7 +96,7 @@ type Configuration struct { ReasonableMaintenanceReplicationLagSeconds int // Above this value move-up and move-below are blocked MaintenanceExpireMinutes uint // Minutes after which a maintenance flag is considered stale and is cleared MaintenancePurgeDays uint // Days after which maintenance entries are purged from the database - CandidateInstanceExpireMinutes uint // Minutes after which a suggestion to use an instance as a candidate slave (to be preferably promoted on master failover) is expired. + CandidateInstanceExpireMinutes uint // Minutes after which a suggestion to use an instance as a candidate subordinate (to be preferably promoted on main failover) is expired. AuditLogFile string // Name of log file for audit operations. Disabled when empty. AuditToSyslog bool // If true, audit messages are written to syslog AuditPageSize int @@ -114,18 +114,18 @@ type Configuration struct { AccessTokenUseExpirySeconds uint // Time by which an issued token must be used AccessTokenExpiryMinutes uint // Time after which HTTP access token expires ClusterNameToAlias map[string]string // map between regex matching cluster name to a human friendly alias - DetectClusterAliasQuery string // Optional query (executed on topology instance) that returns the alias of a cluster. Query will only be executed on cluster master (though until the topology's master is resovled it may execute on other/all slaves). If provided, must return one row, one column - DetectClusterDomainQuery string // Optional query (executed on topology instance) that returns the VIP/CNAME/Alias/whatever domain name for the master of this cluster. Query will only be executed on cluster master (though until the topology's master is resovled it may execute on other/all slaves). If provided, must return one row, one column + DetectClusterAliasQuery string // Optional query (executed on topology instance) that returns the alias of a cluster. Query will only be executed on cluster main (though until the topology's main is resovled it may execute on other/all subordinates). If provided, must return one row, one column + DetectClusterDomainQuery string // Optional query (executed on topology instance) that returns the VIP/CNAME/Alias/whatever domain name for the main of this cluster. Query will only be executed on cluster main (though until the topology's main is resovled it may execute on other/all subordinates). If provided, must return one row, one column DetectInstanceAliasQuery string // Optional query (executed on topology instance) that returns the alias of an instance. If provided, must return one row, one column DetectPromotionRuleQuery string // Optional query (executed on topology instance) that returns the promotion rule of an instance. If provided, must return one row, one column. DataCenterPattern string // Regexp pattern with one group, extracting the datacenter name from the hostname PhysicalEnvironmentPattern string // Regexp pattern with one group, extracting physical environment info from hostname (e.g. combination of datacenter & prod/dev env) DetectDataCenterQuery string // Optional query (executed on topology instance) that returns the data center of an instance. If provided, must return one row, one column. Overrides DataCenterPattern and useful for installments where DC cannot be inferred by hostname DetectPhysicalEnvironmentQuery string // Optional query (executed on topology instance) that returns the physical environment of an instance. If provided, must return one row, one column. Overrides PhysicalEnvironmentPattern and useful for installments where env cannot be inferred by hostname - DetectSemiSyncEnforcedQuery string // Optional query (executed on topology instance) to determine whether semi-sync is fully enforced for master writes (async fallback is not allowed under any circumstance). If provided, must return one row, one column, value 0 or 1. + DetectSemiSyncEnforcedQuery string // Optional query (executed on topology instance) to determine whether semi-sync is fully enforced for main writes (async fallback is not allowed under any circumstance). If provided, must return one row, one column, value 0 or 1. SupportFuzzyPoolHostnames bool // Should "submit-pool-instances" command be able to pass list of fuzzy instances (fuzzy means non-fqdn, but unique enough to recognize). Defaults 'true', implies more queries on backend db InstancePoolExpiryMinutes uint // Time after which entries in database_instance_pool are expired (resubmit via `submit-pool-instances`) - PromotionIgnoreHostnameFilters []string // Orchestrator will not promote slaves with hostname matching pattern (via -c recovery; for example, avoid promoting dev-dedicated machines) + PromotionIgnoreHostnameFilters []string // Orchestrator will not promote subordinates with hostname matching pattern (via -c recovery; for example, avoid promoting dev-dedicated machines) ServeAgentsHttp bool // Spawn another HTTP interface dedicated for orchestrator-agent AgentsUseSSL bool // When "true" orchestrator will listen on agents port with SSL as well as connect to agents via SSL AgentsUseMutualTLS bool // When "true" Use mutual TLS for the server to agent communication @@ -164,23 +164,23 @@ type Configuration struct { RecoveryPeriodBlockMinutes int // (supported for backwards compatibility but please use newer `RecoveryPeriodBlockSeconds` instead) The time for which an instance's recovery is kept "active", so as to avoid concurrent recoveries on smae instance as well as flapping RecoveryPeriodBlockSeconds int // (overrides `RecoveryPeriodBlockMinutes`) The time for which an instance's recovery is kept "active", so as to avoid concurrent recoveries on smae instance as well as flapping RecoveryIgnoreHostnameFilters []string // Recovery analysis will completely ignore hosts matching given patterns - RecoverMasterClusterFilters []string // Only do master recovery on clusters matching these regexp patterns (of course the ".*" pattern matches everything) - RecoverIntermediateMasterClusterFilters []string // Only do IM recovery on clusters matching these regexp patterns (of course the ".*" pattern matches everything) + RecoverMainClusterFilters []string // Only do main recovery on clusters matching these regexp patterns (of course the ".*" pattern matches everything) + RecoverIntermediateMainClusterFilters []string // Only do IM recovery on clusters matching these regexp patterns (of course the ".*" pattern matches everything) ProcessesShellCommand string // Shell that executes command scripts - OnFailureDetectionProcesses []string // Processes to execute when detecting a failover scenario (before making a decision whether to failover or not). May and should use some of these placeholders: {failureType}, {failureDescription}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countSlaves}, {slaveHosts}, {isDowntimed}, {autoMasterRecovery}, {autoIntermediateMasterRecovery} - PreFailoverProcesses []string // Processes to execute before doing a failover (aborting operation should any once of them exits with non-zero code; order of execution undefined). May and should use some of these placeholders: {failureType}, {failureDescription}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countSlaves}, {slaveHosts}, {isDowntimed} - PostFailoverProcesses []string // Processes to execute after doing a failover (order of execution undefined). May and should use some of these placeholders: {failureType}, {failureDescription}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countSlaves}, {slaveHosts}, {isDowntimed}, {isSuccessful}, {lostSlaves} - PostUnsuccessfulFailoverProcesses []string // Processes to execute after a not-completely-successful failover (order of execution undefined). May and should use some of these placeholders: {failureType}, {failureDescription}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countSlaves}, {slaveHosts}, {isDowntimed}, {isSuccessful}, {lostSlaves} - PostMasterFailoverProcesses []string // Processes to execute after doing a master failover (order of execution undefined). Uses same placeholders as PostFailoverProcesses - PostIntermediateMasterFailoverProcesses []string // Processes to execute after doing a master failover (order of execution undefined). Uses same placeholders as PostFailoverProcesses - UnreachableMasterWithStaleSlavesProcesses []string // Processes to execute when detecting an UnreachableMasterWithStaleSlaves scenario. - CoMasterRecoveryMustPromoteOtherCoMaster bool // When 'false', anything can get promoted (and candidates are prefered over others). When 'true', orchestrator will promote the other co-master or else fail - DetachLostSlavesAfterMasterFailover bool // Should slaves that are not to be lost in master recovery (i.e. were more up-to-date than promoted slave) be forcibly detached - ApplyMySQLPromotionAfterMasterFailover bool // Should orchestrator take upon itself to apply MySQL master promotion: set read_only=0, detach replication, etc. - MasterFailoverLostInstancesDowntimeMinutes uint // Number of minutes to downtime any server that was lost after a master failover (including failed master & lost slaves). 0 to disable - MasterFailoverDetachSlaveMasterHost bool // Should orchestrator issue a detach-slave-master-host on newly promoted master (this makes sure the new master will not attempt to replicate old master if that comes back to life). Defaults 'false'. Meaningless if ApplyMySQLPromotionAfterMasterFailover is 'true'. - PostponeSlaveRecoveryOnLagMinutes uint // On crash recovery, slaves that are lagging more than given minutes are only resurrected late in the recovery process, after master/IM has been elected and processes executed. Value of 0 disables this feature - OSCIgnoreHostnameFilters []string // OSC slaves recommendation will ignore slave hostnames matching given patterns + OnFailureDetectionProcesses []string // Processes to execute when detecting a failover scenario (before making a decision whether to failover or not). May and should use some of these placeholders: {failureType}, {failureDescription}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countSubordinates}, {subordinateHosts}, {isDowntimed}, {autoMainRecovery}, {autoIntermediateMainRecovery} + PreFailoverProcesses []string // Processes to execute before doing a failover (aborting operation should any once of them exits with non-zero code; order of execution undefined). May and should use some of these placeholders: {failureType}, {failureDescription}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countSubordinates}, {subordinateHosts}, {isDowntimed} + PostFailoverProcesses []string // Processes to execute after doing a failover (order of execution undefined). May and should use some of these placeholders: {failureType}, {failureDescription}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countSubordinates}, {subordinateHosts}, {isDowntimed}, {isSuccessful}, {lostSubordinates} + PostUnsuccessfulFailoverProcesses []string // Processes to execute after a not-completely-successful failover (order of execution undefined). May and should use some of these placeholders: {failureType}, {failureDescription}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countSubordinates}, {subordinateHosts}, {isDowntimed}, {isSuccessful}, {lostSubordinates} + PostMainFailoverProcesses []string // Processes to execute after doing a main failover (order of execution undefined). Uses same placeholders as PostFailoverProcesses + PostIntermediateMainFailoverProcesses []string // Processes to execute after doing a main failover (order of execution undefined). Uses same placeholders as PostFailoverProcesses + UnreachableMainWithStaleSubordinatesProcesses []string // Processes to execute when detecting an UnreachableMainWithStaleSubordinates scenario. + CoMainRecoveryMustPromoteOtherCoMain bool // When 'false', anything can get promoted (and candidates are prefered over others). When 'true', orchestrator will promote the other co-main or else fail + DetachLostSubordinatesAfterMainFailover bool // Should subordinates that are not to be lost in main recovery (i.e. were more up-to-date than promoted subordinate) be forcibly detached + ApplyMySQLPromotionAfterMainFailover bool // Should orchestrator take upon itself to apply MySQL main promotion: set read_only=0, detach replication, etc. + MainFailoverLostInstancesDowntimeMinutes uint // Number of minutes to downtime any server that was lost after a main failover (including failed main & lost subordinates). 0 to disable + MainFailoverDetachSubordinateMainHost bool // Should orchestrator issue a detach-subordinate-main-host on newly promoted main (this makes sure the new main will not attempt to replicate old main if that comes back to life). Defaults 'false'. Meaningless if ApplyMySQLPromotionAfterMainFailover is 'true'. + PostponeSubordinateRecoveryOnLagMinutes uint // On crash recovery, subordinates that are lagging more than given minutes are only resurrected late in the recovery process, after main/IM has been elected and processes executed. Value of 0 disables this feature + OSCIgnoreHostnameFilters []string // OSC subordinates recommendation will ignore subordinate hostnames matching given patterns GraphiteAddr string // Optional; address of graphite port. If supplied, metrics will be written here GraphitePath string // Prefix for graphite path. May include {hostname} magic placeholder GraphiteConvertHostnameDotsToUnderscores bool // If true, then hostname's dots are converted to underscores before being used in graphite path @@ -236,8 +236,8 @@ func newConfiguration() *Configuration { BinlogFileHistoryDays: 0, UnseenInstanceForgetHours: 240, SnapshotTopologiesIntervalHours: 0, - SlaveStartPostWaitMilliseconds: 1000, - DiscoverByShowSlaveHosts: false, + SubordinateStartPostWaitMilliseconds: 1000, + DiscoverByShowSubordinateHosts: false, DiscoveryMaxConcurrency: 300, DiscoveryQueueCapacity: 100000, InstanceBulkOperationsWaitTimeoutSeconds: 10, @@ -317,22 +317,22 @@ func newConfiguration() *Configuration { RecoveryPeriodBlockMinutes: 60, RecoveryPeriodBlockSeconds: 3600, RecoveryIgnoreHostnameFilters: []string{}, - RecoverMasterClusterFilters: []string{}, - RecoverIntermediateMasterClusterFilters: []string{}, + RecoverMainClusterFilters: []string{}, + RecoverIntermediateMainClusterFilters: []string{}, ProcessesShellCommand: "bash", OnFailureDetectionProcesses: []string{}, PreFailoverProcesses: []string{}, - PostMasterFailoverProcesses: []string{}, - PostIntermediateMasterFailoverProcesses: []string{}, + PostMainFailoverProcesses: []string{}, + PostIntermediateMainFailoverProcesses: []string{}, PostFailoverProcesses: []string{}, PostUnsuccessfulFailoverProcesses: []string{}, - UnreachableMasterWithStaleSlavesProcesses: []string{}, - CoMasterRecoveryMustPromoteOtherCoMaster: true, - DetachLostSlavesAfterMasterFailover: true, - ApplyMySQLPromotionAfterMasterFailover: false, - MasterFailoverLostInstancesDowntimeMinutes: 0, - MasterFailoverDetachSlaveMasterHost: false, - PostponeSlaveRecoveryOnLagMinutes: 0, + UnreachableMainWithStaleSubordinatesProcesses: []string{}, + CoMainRecoveryMustPromoteOtherCoMain: true, + DetachLostSubordinatesAfterMainFailover: true, + ApplyMySQLPromotionAfterMainFailover: false, + MainFailoverLostInstancesDowntimeMinutes: 0, + MainFailoverDetachSubordinateMainHost: false, + PostponeSubordinateRecoveryOnLagMinutes: 0, OSCIgnoreHostnameFilters: []string{}, GraphiteAddr: "", GraphitePath: "", diff --git a/go/db/db.go b/go/db/db.go index b4cad14e..aa0102ee 100644 --- a/go/db/db.go +++ b/go/db/db.go @@ -55,21 +55,21 @@ var generateSQLBase = []string{ version varchar(128) CHARACTER SET ascii NOT NULL, binlog_format varchar(16) CHARACTER SET ascii NOT NULL, log_bin tinyint(3) unsigned NOT NULL, - log_slave_updates tinyint(3) unsigned NOT NULL, + log_subordinate_updates tinyint(3) unsigned NOT NULL, binary_log_file varchar(128) CHARACTER SET ascii NOT NULL, binary_log_pos bigint(20) unsigned NOT NULL, - master_host varchar(128) CHARACTER SET ascii NOT NULL, - master_port smallint(5) unsigned NOT NULL, - slave_sql_running tinyint(3) unsigned NOT NULL, - slave_io_running tinyint(3) unsigned NOT NULL, - master_log_file varchar(128) CHARACTER SET ascii NOT NULL, - read_master_log_pos bigint(20) unsigned NOT NULL, - relay_master_log_file varchar(128) CHARACTER SET ascii NOT NULL, - exec_master_log_pos bigint(20) unsigned NOT NULL, - seconds_behind_master bigint(20) unsigned DEFAULT NULL, - slave_lag_seconds bigint(20) unsigned DEFAULT NULL, - num_slave_hosts int(10) unsigned NOT NULL, - slave_hosts text CHARACTER SET ascii NOT NULL, + main_host varchar(128) CHARACTER SET ascii NOT NULL, + main_port smallint(5) unsigned NOT NULL, + subordinate_sql_running tinyint(3) unsigned NOT NULL, + subordinate_io_running tinyint(3) unsigned NOT NULL, + main_log_file varchar(128) CHARACTER SET ascii NOT NULL, + read_main_log_pos bigint(20) unsigned NOT NULL, + relay_main_log_file varchar(128) CHARACTER SET ascii NOT NULL, + exec_main_log_pos bigint(20) unsigned NOT NULL, + seconds_behind_main bigint(20) unsigned DEFAULT NULL, + subordinate_lag_seconds bigint(20) unsigned DEFAULT NULL, + num_subordinate_hosts int(10) unsigned NOT NULL, + subordinate_hosts text CHARACTER SET ascii NOT NULL, cluster_name tinytext CHARACTER SET ascii NOT NULL, PRIMARY KEY (hostname,port), KEY cluster_name_idx (cluster_name(128)), @@ -268,8 +268,8 @@ var generateSQLBase = []string{ snapshot_unix_timestamp INT UNSIGNED NOT NULL, hostname varchar(128) CHARACTER SET ascii NOT NULL, port smallint(5) unsigned NOT NULL, - master_host varchar(128) CHARACTER SET ascii NOT NULL, - master_port smallint(5) unsigned NOT NULL, + main_host varchar(128) CHARACTER SET ascii NOT NULL, + main_port smallint(5) unsigned NOT NULL, cluster_name tinytext CHARACTER SET ascii NOT NULL, PRIMARY KEY (snapshot_unix_timestamp, hostname, port), KEY cluster_name_idx (snapshot_unix_timestamp, cluster_name(128)) @@ -311,8 +311,8 @@ var generateSQLBase = []string{ analysis varchar(128) NOT NULL, cluster_name varchar(128) NOT NULL, cluster_alias varchar(128) NOT NULL, - count_affected_slaves int unsigned NOT NULL, - slave_hosts text NOT NULL, + count_affected_subordinates int unsigned NOT NULL, + subordinate_hosts text NOT NULL, PRIMARY KEY (detection_id), UNIQUE KEY hostname_port_active_period_uidx (hostname, port, in_active_period, end_active_period_unixtime), KEY in_active_start_period_idx (in_active_period, start_active_period) @@ -347,20 +347,20 @@ var generateSQLBase = []string{ ) ENGINE=InnoDB DEFAULT CHARSET=ascii `, ` - CREATE TABLE IF NOT EXISTS master_position_equivalence ( + CREATE TABLE IF NOT EXISTS main_position_equivalence ( equivalence_id bigint unsigned not null auto_increment, - master1_hostname varchar(128) CHARACTER SET ascii NOT NULL, - master1_port smallint(5) unsigned NOT NULL, - master1_binary_log_file varchar(128) CHARACTER SET ascii NOT NULL, - master1_binary_log_pos bigint(20) unsigned NOT NULL, - master2_hostname varchar(128) CHARACTER SET ascii NOT NULL, - master2_port smallint(5) unsigned NOT NULL, - master2_binary_log_file varchar(128) CHARACTER SET ascii NOT NULL, - master2_binary_log_pos bigint(20) unsigned NOT NULL, + main1_hostname varchar(128) CHARACTER SET ascii NOT NULL, + main1_port smallint(5) unsigned NOT NULL, + main1_binary_log_file varchar(128) CHARACTER SET ascii NOT NULL, + main1_binary_log_pos bigint(20) unsigned NOT NULL, + main2_hostname varchar(128) CHARACTER SET ascii NOT NULL, + main2_port smallint(5) unsigned NOT NULL, + main2_binary_log_file varchar(128) CHARACTER SET ascii NOT NULL, + main2_binary_log_pos bigint(20) unsigned NOT NULL, last_suggested TIMESTAMP NOT NULL, PRIMARY KEY (equivalence_id), - UNIQUE KEY equivalence_uidx (master1_hostname, master1_port, master1_binary_log_file, master1_binary_log_pos, master2_hostname, master2_port), - KEY master2_idx (master2_hostname, master2_port, master2_binary_log_file, master2_binary_log_pos), + UNIQUE KEY equivalence_uidx (main1_hostname, main1_port, main1_binary_log_file, main1_binary_log_pos, main2_hostname, main2_port), + KEY main2_idx (main2_hostname, main2_port, main2_binary_log_file, main2_binary_log_pos), KEY last_suggested_idx(last_suggested) ) ENGINE=InnoDB DEFAULT CHARSET=ascii `, @@ -517,7 +517,7 @@ var generateSQLPatches = []string{ ` ALTER TABLE database_instance - ADD COLUMN last_sql_error TEXT NOT NULL AFTER exec_master_log_pos + ADD COLUMN last_sql_error TEXT NOT NULL AFTER exec_main_log_pos `, ` ALTER TABLE @@ -532,7 +532,7 @@ var generateSQLPatches = []string{ ` ALTER TABLE database_instance - ADD COLUMN oracle_gtid TINYINT UNSIGNED NOT NULL AFTER slave_io_running + ADD COLUMN oracle_gtid TINYINT UNSIGNED NOT NULL AFTER subordinate_io_running `, ` ALTER TABLE @@ -542,7 +542,7 @@ var generateSQLPatches = []string{ ` ALTER TABLE database_instance - ADD COLUMN relay_log_file varchar(128) CHARACTER SET ascii NOT NULL AFTER exec_master_log_pos + ADD COLUMN relay_log_file varchar(128) CHARACTER SET ascii NOT NULL AFTER exec_main_log_pos `, ` ALTER TABLE @@ -552,7 +552,7 @@ var generateSQLPatches = []string{ ` ALTER TABLE database_instance - ADD INDEX master_host_port_idx (master_host, master_port) + ADD INDEX main_host_port_idx (main_host, main_port) `, ` ALTER TABLE @@ -567,7 +567,7 @@ var generateSQLPatches = []string{ ` ALTER TABLE database_instance - ADD COLUMN has_replication_filters TINYINT UNSIGNED NOT NULL AFTER slave_io_running + ADD COLUMN has_replication_filters TINYINT UNSIGNED NOT NULL AFTER subordinate_io_running `, ` ALTER TABLE @@ -597,7 +597,7 @@ var generateSQLPatches = []string{ ` ALTER TABLE database_instance - ADD COLUMN is_co_master TINYINT UNSIGNED NOT NULL AFTER replication_depth + ADD COLUMN is_co_main TINYINT UNSIGNED NOT NULL AFTER replication_depth `, ` ALTER TABLE @@ -607,7 +607,7 @@ var generateSQLPatches = []string{ ` ALTER TABLE database_instance - ADD COLUMN sql_delay INT UNSIGNED NOT NULL AFTER slave_lag_seconds + ADD COLUMN sql_delay INT UNSIGNED NOT NULL AFTER subordinate_lag_seconds `, ` ALTER TABLE @@ -615,8 +615,8 @@ var generateSQLPatches = []string{ ADD COLUMN analysis varchar(128) CHARACTER SET ascii NOT NULL, ADD COLUMN cluster_name varchar(128) CHARACTER SET ascii NOT NULL, ADD COLUMN cluster_alias varchar(128) CHARACTER SET ascii NOT NULL, - ADD COLUMN count_affected_slaves int unsigned NOT NULL, - ADD COLUMN slave_hosts text CHARACTER SET ascii NOT NULL + ADD COLUMN count_affected_subordinates int unsigned NOT NULL, + ADD COLUMN subordinate_hosts text CHARACTER SET ascii NOT NULL `, ` ALTER TABLE hostname_unresolve @@ -681,9 +681,9 @@ var generateSQLPatches = []string{ ` ALTER TABLE topology_recovery - ADD COLUMN participating_instances text CHARACTER SET ascii NOT NULL after slave_hosts, - ADD COLUMN lost_slaves text CHARACTER SET ascii NOT NULL after participating_instances, - ADD COLUMN all_errors text CHARACTER SET ascii NOT NULL after lost_slaves + ADD COLUMN participating_instances text CHARACTER SET ascii NOT NULL after subordinate_hosts, + ADD COLUMN lost_subordinates text CHARACTER SET ascii NOT NULL after participating_instances, + ADD COLUMN all_errors text CHARACTER SET ascii NOT NULL after lost_subordinates `, ` ALTER TABLE audit @@ -735,7 +735,7 @@ var generateSQLPatches = []string{ MODIFY last_suggested timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP `, ` - ALTER TABLE master_position_equivalence + ALTER TABLE main_position_equivalence MODIFY last_suggested timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP `, ` diff --git a/go/http/api.go b/go/http/api.go index 3aaae95b..e8725cf5 100644 --- a/go/http/api.go +++ b/go/http/api.go @@ -329,8 +329,8 @@ func (this *HttpAPI) MoveUp(params martini.Params, r render.Render, req *http.Re r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Instance %+v moved up", instanceKey), Details: instance}) } -// MoveUpSlaves attempts to move up all slaves of an instance -func (this *HttpAPI) MoveUpSlaves(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// MoveUpSubordinates attempts to move up all subordinates of an instance +func (this *HttpAPI) MoveUpSubordinates(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -341,17 +341,17 @@ func (this *HttpAPI) MoveUpSlaves(params martini.Params, r render.Render, req *h return } - slaves, newMaster, err, errs := inst.MoveUpSlaves(&instanceKey, req.URL.Query().Get("pattern")) + subordinates, newMain, err, errs := inst.MoveUpSubordinates(&instanceKey, req.URL.Query().Get("pattern")) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Moved up %d slaves of %+v below %+v; %d errors: %+v", len(slaves), instanceKey, newMaster.Key, len(errs), errs), Details: newMaster.Key}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Moved up %d subordinates of %+v below %+v; %d errors: %+v", len(subordinates), instanceKey, newMain.Key, len(errs), errs), Details: newMain.Key}) } -// MoveUpSlaves attempts to move up all slaves of an instance -func (this *HttpAPI) RepointSlaves(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// MoveUpSubordinates attempts to move up all subordinates of an instance +func (this *HttpAPI) RepointSubordinates(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -362,17 +362,17 @@ func (this *HttpAPI) RepointSlaves(params martini.Params, r render.Render, req * return } - slaves, err, _ := inst.RepointSlaves(&instanceKey, req.URL.Query().Get("pattern")) + subordinates, err, _ := inst.RepointSubordinates(&instanceKey, req.URL.Query().Get("pattern")) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Repointed %d slaves of %+v", len(slaves), instanceKey), Details: instanceKey}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Repointed %d subordinates of %+v", len(subordinates), instanceKey), Details: instanceKey}) } -// MakeCoMaster attempts to make an instance co-master with its own master -func (this *HttpAPI) MakeCoMaster(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// MakeCoMain attempts to make an instance co-main with its own main +func (this *HttpAPI) MakeCoMain(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -383,17 +383,17 @@ func (this *HttpAPI) MakeCoMaster(params martini.Params, r render.Render, req *h r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - instance, err := inst.MakeCoMaster(&instanceKey) + instance, err := inst.MakeCoMain(&instanceKey) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Instance made co-master: %+v", instance.Key), Details: instance}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Instance made co-main: %+v", instance.Key), Details: instance}) } -// ResetSlave makes a slave forget about its master, effectively breaking the replication -func (this *HttpAPI) ResetSlave(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// ResetSubordinate makes a subordinate forget about its main, effectively breaking the replication +func (this *HttpAPI) ResetSubordinate(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -404,18 +404,18 @@ func (this *HttpAPI) ResetSlave(params martini.Params, r render.Render, req *htt r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - instance, err := inst.ResetSlaveOperation(&instanceKey) + instance, err := inst.ResetSubordinateOperation(&instanceKey) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Slave reset on %+v", instance.Key), Details: instance}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Subordinate reset on %+v", instance.Key), Details: instance}) } -// DetachSlave corrupts a slave's binlog corrdinates (though encodes it in such way +// DetachSubordinate corrupts a subordinate's binlog corrdinates (though encodes it in such way // that is reversible), effectively breaking replication -func (this *HttpAPI) DetachSlave(params martini.Params, r render.Render, req *http.Request, user auth.User) { +func (this *HttpAPI) DetachSubordinate(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -426,18 +426,18 @@ func (this *HttpAPI) DetachSlave(params martini.Params, r render.Render, req *ht r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - instance, err := inst.DetachSlaveOperation(&instanceKey) + instance, err := inst.DetachSubordinateOperation(&instanceKey) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Slave detached: %+v", instance.Key), Details: instance}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Subordinate detached: %+v", instance.Key), Details: instance}) } -// ReattachSlave reverts a DetachSlave commands by reassigning the correct +// ReattachSubordinate reverts a DetachSubordinate commands by reassigning the correct // binlog coordinates to an instance -func (this *HttpAPI) ReattachSlave(params martini.Params, r render.Render, req *http.Request, user auth.User) { +func (this *HttpAPI) ReattachSubordinate(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -448,18 +448,18 @@ func (this *HttpAPI) ReattachSlave(params martini.Params, r render.Render, req * r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - instance, err := inst.ReattachSlaveOperation(&instanceKey) + instance, err := inst.ReattachSubordinateOperation(&instanceKey) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Slave reattached: %+v", instance.Key), Details: instance}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Subordinate reattached: %+v", instance.Key), Details: instance}) } -// ReattachSlaveMasterHost reverts a DetachSlaveMasterHost command -// by resoting the original master hostname in CHANGE MASTER TO -func (this *HttpAPI) ReattachSlaveMasterHost(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// ReattachSubordinateMainHost reverts a DetachSubordinateMainHost command +// by resoting the original main hostname in CHANGE MASTER TO +func (this *HttpAPI) ReattachSubordinateMainHost(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -470,16 +470,16 @@ func (this *HttpAPI) ReattachSlaveMasterHost(params martini.Params, r render.Ren r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - instance, err := inst.ReattachSlaveMasterHost(&instanceKey) + instance, err := inst.ReattachSubordinateMainHost(&instanceKey) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Slave reattached: %+v", instance.Key), Details: instance}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Subordinate reattached: %+v", instance.Key), Details: instance}) } -// EnableGTID attempts to enable GTID on a slave +// EnableGTID attempts to enable GTID on a subordinate func (this *HttpAPI) EnableGTID(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) @@ -500,7 +500,7 @@ func (this *HttpAPI) EnableGTID(params martini.Params, r render.Render, req *htt r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Enabled GTID on %+v", instance.Key), Details: instance}) } -// DisableGTID attempts to disable GTID on a slave, and revert to binlog file:pos +// DisableGTID attempts to disable GTID on a subordinate, and revert to binlog file:pos func (this *HttpAPI) DisableGTID(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) @@ -573,8 +573,8 @@ func (this *HttpAPI) MoveBelowGTID(params martini.Params, r render.Render, req * r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Instance %+v moved below %+v via GTID", instanceKey, belowKey), Details: instance}) } -// MoveSlavesGTID attempts to move an instance below another, via GTID -func (this *HttpAPI) MoveSlavesGTID(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// MoveSubordinatesGTID attempts to move an instance below another, via GTID +func (this *HttpAPI) MoveSubordinatesGTID(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -590,17 +590,17 @@ func (this *HttpAPI) MoveSlavesGTID(params martini.Params, r render.Render, req return } - movedSlaves, _, err, errs := inst.MoveSlavesGTID(&instanceKey, &belowKey, req.URL.Query().Get("pattern")) + movedSubordinates, _, err, errs := inst.MoveSubordinatesGTID(&instanceKey, &belowKey, req.URL.Query().Get("pattern")) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Moved %d slaves of %+v below %+v via GTID; %d errors: %+v", len(movedSlaves), instanceKey, belowKey, len(errs), errs), Details: belowKey}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Moved %d subordinates of %+v below %+v via GTID; %d errors: %+v", len(movedSubordinates), instanceKey, belowKey, len(errs), errs), Details: belowKey}) } -// EnslaveSiblings -func (this *HttpAPI) EnslaveSiblings(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// EnsubordinateSiblings +func (this *HttpAPI) EnsubordinateSiblings(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -611,17 +611,17 @@ func (this *HttpAPI) EnslaveSiblings(params martini.Params, r render.Render, req return } - instance, count, err := inst.EnslaveSiblings(&instanceKey) + instance, count, err := inst.EnsubordinateSiblings(&instanceKey) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Enslaved %d siblings of %+v", count, instanceKey), Details: instance}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Ensubordinated %d siblings of %+v", count, instanceKey), Details: instance}) } -// EnslaveMaster -func (this *HttpAPI) EnslaveMaster(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// EnsubordinateMain +func (this *HttpAPI) EnsubordinateMain(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -632,13 +632,13 @@ func (this *HttpAPI) EnslaveMaster(params martini.Params, r render.Render, req * return } - instance, err := inst.EnslaveMaster(&instanceKey) + instance, err := inst.EnsubordinateMain(&instanceKey) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("%+v enslaved its master", instanceKey), Details: instance}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("%+v ensubordinated its main", instanceKey), Details: instance}) } // RelocateBelow attempts to move an instance below another, orchestrator choosing the best (potentially multi-step) @@ -668,8 +668,8 @@ func (this *HttpAPI) RelocateBelow(params martini.Params, r render.Render, req * r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Instance %+v relocated below %+v", instanceKey, belowKey), Details: instance}) } -// RelocateSlaves attempts to smartly relocate slaves of a given instance below another -func (this *HttpAPI) RelocateSlaves(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// RelocateSubordinates attempts to smartly relocate subordinates of a given instance below another +func (this *HttpAPI) RelocateSubordinates(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -685,16 +685,16 @@ func (this *HttpAPI) RelocateSlaves(params martini.Params, r render.Render, req return } - slaves, _, err, errs := inst.RelocateSlaves(&instanceKey, &belowKey, req.URL.Query().Get("pattern")) + subordinates, _, err, errs := inst.RelocateSubordinates(&instanceKey, &belowKey, req.URL.Query().Get("pattern")) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Relocated %d slaves of %+v below %+v; %d errors: %+v", len(slaves), instanceKey, belowKey, len(errs), errs), Details: slaves}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Relocated %d subordinates of %+v below %+v; %d errors: %+v", len(subordinates), instanceKey, belowKey, len(errs), errs), Details: subordinates}) } -// MoveEquivalent attempts to move an instance below another, baseed on known equivalence master coordinates +// MoveEquivalent attempts to move an instance below another, baseed on known equivalence main coordinates func (this *HttpAPI) MoveEquivalent(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) @@ -802,8 +802,8 @@ func (this *HttpAPI) MatchUp(params martini.Params, r render.Render, req *http.R r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Instance %+v matched up at %+v", instanceKey, *matchedCoordinates), Details: instance}) } -// MultiMatchSlaves attempts to match all slaves of a given instance below another, efficiently -func (this *HttpAPI) MultiMatchSlaves(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// MultiMatchSubordinates attempts to match all subordinates of a given instance below another, efficiently +func (this *HttpAPI) MultiMatchSubordinates(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -819,17 +819,17 @@ func (this *HttpAPI) MultiMatchSlaves(params martini.Params, r render.Render, re return } - slaves, newMaster, err, errs := inst.MultiMatchSlaves(&instanceKey, &belowKey, req.URL.Query().Get("pattern")) + subordinates, newMain, err, errs := inst.MultiMatchSubordinates(&instanceKey, &belowKey, req.URL.Query().Get("pattern")) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Matched %d slaves of %+v below %+v; %d errors: %+v", len(slaves), instanceKey, newMaster.Key, len(errs), errs), Details: newMaster.Key}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Matched %d subordinates of %+v below %+v; %d errors: %+v", len(subordinates), instanceKey, newMain.Key, len(errs), errs), Details: newMain.Key}) } -// MatchUpSlaves attempts to match up all slaves of an instance -func (this *HttpAPI) MatchUpSlaves(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// MatchUpSubordinates attempts to match up all subordinates of an instance +func (this *HttpAPI) MatchUpSubordinates(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -840,18 +840,18 @@ func (this *HttpAPI) MatchUpSlaves(params martini.Params, r render.Render, req * return } - slaves, newMaster, err, errs := inst.MatchUpSlaves(&instanceKey, req.URL.Query().Get("pattern")) + subordinates, newMain, err, errs := inst.MatchUpSubordinates(&instanceKey, req.URL.Query().Get("pattern")) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Matched up %d slaves of %+v below %+v; %d errors: %+v", len(slaves), instanceKey, newMaster.Key, len(errs), errs), Details: newMaster.Key}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Matched up %d subordinates of %+v below %+v; %d errors: %+v", len(subordinates), instanceKey, newMain.Key, len(errs), errs), Details: newMain.Key}) } -// RegroupSlaves attempts to pick a slave of a given instance and make it enslave its siblings, using any +// RegroupSubordinates attempts to pick a subordinate of a given instance and make it ensubordinate its siblings, using any // method possible (GTID, Pseudo-GTID, binlog servers) -func (this *HttpAPI) RegroupSlaves(params martini.Params, r render.Render, req *http.Request, user auth.User) { +func (this *HttpAPI) RegroupSubordinates(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -862,20 +862,20 @@ func (this *HttpAPI) RegroupSlaves(params martini.Params, r render.Render, req * return } - lostSlaves, equalSlaves, aheadSlaves, cannotReplicateSlaves, promotedSlave, err := inst.RegroupSlaves(&instanceKey, false, nil, nil) - lostSlaves = append(lostSlaves, cannotReplicateSlaves...) + lostSubordinates, equalSubordinates, aheadSubordinates, cannotReplicateSubordinates, promotedSubordinate, err := inst.RegroupSubordinates(&instanceKey, false, nil, nil) + lostSubordinates = append(lostSubordinates, cannotReplicateSubordinates...) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("promoted slave: %s, lost: %d, trivial: %d, pseudo-gtid: %d", - promotedSlave.Key.DisplayString(), len(lostSlaves), len(equalSlaves), len(aheadSlaves)), Details: promotedSlave.Key}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("promoted subordinate: %s, lost: %d, trivial: %d, pseudo-gtid: %d", + promotedSubordinate.Key.DisplayString(), len(lostSubordinates), len(equalSubordinates), len(aheadSubordinates)), Details: promotedSubordinate.Key}) } -// RegroupSlaves attempts to pick a slave of a given instance and make it enslave its siblings, efficiently, +// RegroupSubordinates attempts to pick a subordinate of a given instance and make it ensubordinate its siblings, efficiently, // using pseudo-gtid if necessary -func (this *HttpAPI) RegroupSlavesPseudoGTID(params martini.Params, r render.Render, req *http.Request, user auth.User) { +func (this *HttpAPI) RegroupSubordinatesPseudoGTID(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -886,20 +886,20 @@ func (this *HttpAPI) RegroupSlavesPseudoGTID(params martini.Params, r render.Ren return } - lostSlaves, equalSlaves, aheadSlaves, cannotReplicateSlaves, promotedSlave, err := inst.RegroupSlavesPseudoGTID(&instanceKey, false, nil, nil) - lostSlaves = append(lostSlaves, cannotReplicateSlaves...) + lostSubordinates, equalSubordinates, aheadSubordinates, cannotReplicateSubordinates, promotedSubordinate, err := inst.RegroupSubordinatesPseudoGTID(&instanceKey, false, nil, nil) + lostSubordinates = append(lostSubordinates, cannotReplicateSubordinates...) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("promoted slave: %s, lost: %d, trivial: %d, pseudo-gtid: %d", - promotedSlave.Key.DisplayString(), len(lostSlaves), len(equalSlaves), len(aheadSlaves)), Details: promotedSlave.Key}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("promoted subordinate: %s, lost: %d, trivial: %d, pseudo-gtid: %d", + promotedSubordinate.Key.DisplayString(), len(lostSubordinates), len(equalSubordinates), len(aheadSubordinates)), Details: promotedSubordinate.Key}) } -// RegroupSlavesGTID attempts to pick a slave of a given instance and make it enslave its siblings, efficiently, using GTID -func (this *HttpAPI) RegroupSlavesGTID(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// RegroupSubordinatesGTID attempts to pick a subordinate of a given instance and make it ensubordinate its siblings, efficiently, using GTID +func (this *HttpAPI) RegroupSubordinatesGTID(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -910,20 +910,20 @@ func (this *HttpAPI) RegroupSlavesGTID(params martini.Params, r render.Render, r return } - lostSlaves, movedSlaves, cannotReplicateSlaves, promotedSlave, err := inst.RegroupSlavesGTID(&instanceKey, false, nil) - lostSlaves = append(lostSlaves, cannotReplicateSlaves...) + lostSubordinates, movedSubordinates, cannotReplicateSubordinates, promotedSubordinate, err := inst.RegroupSubordinatesGTID(&instanceKey, false, nil) + lostSubordinates = append(lostSubordinates, cannotReplicateSubordinates...) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("promoted slave: %s, lost: %d, moved: %d", - promotedSlave.Key.DisplayString(), len(lostSlaves), len(movedSlaves)), Details: promotedSlave.Key}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("promoted subordinate: %s, lost: %d, moved: %d", + promotedSubordinate.Key.DisplayString(), len(lostSubordinates), len(movedSubordinates)), Details: promotedSubordinate.Key}) } -// RegroupSlavesBinlogServers attempts to pick a slave of a given instance and make it enslave its siblings, efficiently, using GTID -func (this *HttpAPI) RegroupSlavesBinlogServers(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// RegroupSubordinatesBinlogServers attempts to pick a subordinate of a given instance and make it ensubordinate its siblings, efficiently, using GTID +func (this *HttpAPI) RegroupSubordinatesBinlogServers(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -934,7 +934,7 @@ func (this *HttpAPI) RegroupSlavesBinlogServers(params martini.Params, r render. return } - _, promotedBinlogServer, err := inst.RegroupSlavesBinlogServers(&instanceKey, false) + _, promotedBinlogServer, err := inst.RegroupSubordinatesBinlogServers(&instanceKey, false) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) @@ -945,8 +945,8 @@ func (this *HttpAPI) RegroupSlavesBinlogServers(params martini.Params, r render. promotedBinlogServer.Key.DisplayString()), Details: promotedBinlogServer.Key}) } -// MakeMaster attempts to make the given instance a master, and match its siblings to be its slaves -func (this *HttpAPI) MakeMaster(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// MakeMain attempts to make the given instance a main, and match its siblings to be its subordinates +func (this *HttpAPI) MakeMain(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -957,18 +957,18 @@ func (this *HttpAPI) MakeMaster(params martini.Params, r render.Render, req *htt return } - instance, err := inst.MakeMaster(&instanceKey) + instance, err := inst.MakeMain(&instanceKey) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Instance %+v now made master", instanceKey), Details: instance}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Instance %+v now made main", instanceKey), Details: instance}) } -// MakeLocalMaster attempts to make the given instance a local master: take over its master by +// MakeLocalMain attempts to make the given instance a local main: take over its main by // enslaving its siblings and replicating from its grandparent. -func (this *HttpAPI) MakeLocalMaster(params martini.Params, r render.Render, req *http.Request, user auth.User) { +func (this *HttpAPI) MakeLocalMain(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -979,13 +979,13 @@ func (this *HttpAPI) MakeLocalMaster(params martini.Params, r render.Render, req return } - instance, err := inst.MakeLocalMaster(&instanceKey) + instance, err := inst.MakeLocalMain(&instanceKey) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Instance %+v now made local master", instanceKey), Details: instance}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Instance %+v now made local main", instanceKey), Details: instance}) } // SkipQuery skips a single query on a failed replication instance @@ -1009,8 +1009,8 @@ func (this *HttpAPI) SkipQuery(params martini.Params, r render.Render, req *http r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Query skipped on %+v", instance.Key), Details: instance}) } -// StartSlave starts replication on given instance -func (this *HttpAPI) StartSlave(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// StartSubordinate starts replication on given instance +func (this *HttpAPI) StartSubordinate(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -1021,17 +1021,17 @@ func (this *HttpAPI) StartSlave(params martini.Params, r render.Render, req *htt r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - instance, err := inst.StartSlave(&instanceKey) + instance, err := inst.StartSubordinate(&instanceKey) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Slave started: %+v", instance.Key), Details: instance}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Subordinate started: %+v", instance.Key), Details: instance}) } -// RestartSlave stops & starts replication on given instance -func (this *HttpAPI) RestartSlave(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// RestartSubordinate stops & starts replication on given instance +func (this *HttpAPI) RestartSubordinate(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -1042,17 +1042,17 @@ func (this *HttpAPI) RestartSlave(params martini.Params, r render.Render, req *h r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - instance, err := inst.RestartSlave(&instanceKey) + instance, err := inst.RestartSubordinate(&instanceKey) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Slave restarted: %+v", instance.Key), Details: instance}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Subordinate restarted: %+v", instance.Key), Details: instance}) } -// StopSlave stops replication on given instance -func (this *HttpAPI) StopSlave(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// StopSubordinate stops replication on given instance +func (this *HttpAPI) StopSubordinate(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -1063,17 +1063,17 @@ func (this *HttpAPI) StopSlave(params martini.Params, r render.Render, req *http r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - instance, err := inst.StopSlave(&instanceKey) + instance, err := inst.StopSubordinate(&instanceKey) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Slave stopped: %+v", instance.Key), Details: instance}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Subordinate stopped: %+v", instance.Key), Details: instance}) } -// StopSlaveNicely stops replication on given instance, such that sql thead is aligned with IO thread -func (this *HttpAPI) StopSlaveNicely(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// StopSubordinateNicely stops replication on given instance, such that sql thead is aligned with IO thread +func (this *HttpAPI) StopSubordinateNicely(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -1084,17 +1084,17 @@ func (this *HttpAPI) StopSlaveNicely(params martini.Params, r render.Render, req r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - instance, err := inst.StopSlaveNicely(&instanceKey, 0) + instance, err := inst.StopSubordinateNicely(&instanceKey, 0) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return } - r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Slave stopped nicely: %+v", instance.Key), Details: instance}) + r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Subordinate stopped nicely: %+v", instance.Key), Details: instance}) } -// MasterEquivalent provides (possibly empty) list of master coordinates equivalent to the given ones -func (this *HttpAPI) MasterEquivalent(params martini.Params, r render.Render, req *http.Request, user auth.User) { +// MainEquivalent provides (possibly empty) list of main coordinates equivalent to the given ones +func (this *HttpAPI) MainEquivalent(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"}) return @@ -1111,7 +1111,7 @@ func (this *HttpAPI) MasterEquivalent(params martini.Params, r render.Render, re } instanceCoordinates := &inst.InstanceBinlogCoordinates{Key: instanceKey, Coordinates: coordinates} - equivalentCoordinates, err := inst.GetEquivalentMasterCoordinates(instanceCoordinates) + equivalentCoordinates, err := inst.GetEquivalentMainCoordinates(instanceCoordinates) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()}) return @@ -1253,9 +1253,9 @@ func (this *HttpAPI) ClusterInfoByAlias(params martini.Params, r render.Render, this.ClusterInfo(params, r, req) } -// ClusterOSCSlaves returns heuristic list of OSC slaves -func (this *HttpAPI) ClusterOSCSlaves(params martini.Params, r render.Render, req *http.Request) { - instances, err := inst.GetClusterOSCSlaves(params["clusterName"]) +// ClusterOSCSubordinates returns heuristic list of OSC subordinates +func (this *HttpAPI) ClusterOSCSubordinates(params martini.Params, r render.Render, req *http.Request) { + instances, err := inst.GetClusterOSCSubordinates(params["clusterName"]) if err != nil { r.JSON(200, &APIResponse{Code: ERROR, Message: fmt.Sprintf("%+v", err)}) @@ -2059,8 +2059,8 @@ func (this *HttpAPI) RegisterCandidate(params martini.Params, r render.Render, r // AutomatedRecoveryFilters retuens list of clusters which are configured with automated recovery func (this *HttpAPI) AutomatedRecoveryFilters(params martini.Params, r render.Render, req *http.Request) { automatedRecoveryMap := make(map[string]interface{}) - automatedRecoveryMap["RecoverMasterClusterFilters"] = config.Config.RecoverMasterClusterFilters - automatedRecoveryMap["RecoverIntermediateMasterClusterFilters"] = config.Config.RecoverIntermediateMasterClusterFilters + automatedRecoveryMap["RecoverMainClusterFilters"] = config.Config.RecoverMainClusterFilters + automatedRecoveryMap["RecoverIntermediateMainClusterFilters"] = config.Config.RecoverIntermediateMainClusterFilters automatedRecoveryMap["RecoveryIgnoreHostnameFilters"] = config.Config.RecoveryIgnoreHostnameFilters r.JSON(200, &APIResponse{Code: OK, Message: fmt.Sprintf("Automated recovery configuration details"), Details: automatedRecoveryMap}) @@ -2281,52 +2281,52 @@ func (this *HttpAPI) RegisterRequests(m *martini.ClassicMartini) { // Smart relocation: m.Get(this.URLPrefix+"/api/relocate/:host/:port/:belowHost/:belowPort", this.RelocateBelow) m.Get(this.URLPrefix+"/api/relocate-below/:host/:port/:belowHost/:belowPort", this.RelocateBelow) - m.Get(this.URLPrefix+"/api/relocate-slaves/:host/:port/:belowHost/:belowPort", this.RelocateSlaves) - m.Get(this.URLPrefix+"/api/regroup-slaves/:host/:port", this.RegroupSlaves) + m.Get(this.URLPrefix+"/api/relocate-subordinates/:host/:port/:belowHost/:belowPort", this.RelocateSubordinates) + m.Get(this.URLPrefix+"/api/regroup-subordinates/:host/:port", this.RegroupSubordinates) // Classic file:pos relocation: m.Get(this.URLPrefix+"/api/move-up/:host/:port", this.MoveUp) - m.Get(this.URLPrefix+"/api/move-up-slaves/:host/:port", this.MoveUpSlaves) + m.Get(this.URLPrefix+"/api/move-up-subordinates/:host/:port", this.MoveUpSubordinates) m.Get(this.URLPrefix+"/api/move-below/:host/:port/:siblingHost/:siblingPort", this.MoveBelow) m.Get(this.URLPrefix+"/api/move-equivalent/:host/:port/:belowHost/:belowPort", this.MoveEquivalent) - m.Get(this.URLPrefix+"/api/repoint-slaves/:host/:port", this.RepointSlaves) - m.Get(this.URLPrefix+"/api/make-co-master/:host/:port", this.MakeCoMaster) - m.Get(this.URLPrefix+"/api/enslave-siblings/:host/:port", this.EnslaveSiblings) - m.Get(this.URLPrefix+"/api/enslave-master/:host/:port", this.EnslaveMaster) - m.Get(this.URLPrefix+"/api/master-equivalent/:host/:port/:logFile/:logPos", this.MasterEquivalent) + m.Get(this.URLPrefix+"/api/repoint-subordinates/:host/:port", this.RepointSubordinates) + m.Get(this.URLPrefix+"/api/make-co-main/:host/:port", this.MakeCoMain) + m.Get(this.URLPrefix+"/api/ensubordinate-siblings/:host/:port", this.EnsubordinateSiblings) + m.Get(this.URLPrefix+"/api/ensubordinate-main/:host/:port", this.EnsubordinateMain) + m.Get(this.URLPrefix+"/api/main-equivalent/:host/:port/:logFile/:logPos", this.MainEquivalent) // Binlog server relocation: - m.Get(this.URLPrefix+"/api/regroup-slaves-bls/:host/:port", this.RegroupSlavesBinlogServers) + m.Get(this.URLPrefix+"/api/regroup-subordinates-bls/:host/:port", this.RegroupSubordinatesBinlogServers) // GTID relocation: m.Get(this.URLPrefix+"/api/move-below-gtid/:host/:port/:belowHost/:belowPort", this.MoveBelowGTID) - m.Get(this.URLPrefix+"/api/move-slaves-gtid/:host/:port/:belowHost/:belowPort", this.MoveSlavesGTID) - m.Get(this.URLPrefix+"/api/regroup-slaves-gtid/:host/:port", this.RegroupSlavesGTID) + m.Get(this.URLPrefix+"/api/move-subordinates-gtid/:host/:port/:belowHost/:belowPort", this.MoveSubordinatesGTID) + m.Get(this.URLPrefix+"/api/regroup-subordinates-gtid/:host/:port", this.RegroupSubordinatesGTID) // Pseudo-GTID relocation: m.Get(this.URLPrefix+"/api/match/:host/:port/:belowHost/:belowPort", this.MatchBelow) m.Get(this.URLPrefix+"/api/match-below/:host/:port/:belowHost/:belowPort", this.MatchBelow) m.Get(this.URLPrefix+"/api/match-up/:host/:port", this.MatchUp) - m.Get(this.URLPrefix+"/api/match-slaves/:host/:port/:belowHost/:belowPort", this.MultiMatchSlaves) - m.Get(this.URLPrefix+"/api/multi-match-slaves/:host/:port/:belowHost/:belowPort", this.MultiMatchSlaves) - m.Get(this.URLPrefix+"/api/match-up-slaves/:host/:port", this.MatchUpSlaves) - m.Get(this.URLPrefix+"/api/regroup-slaves-pgtid/:host/:port", this.RegroupSlavesPseudoGTID) + m.Get(this.URLPrefix+"/api/match-subordinates/:host/:port/:belowHost/:belowPort", this.MultiMatchSubordinates) + m.Get(this.URLPrefix+"/api/multi-match-subordinates/:host/:port/:belowHost/:belowPort", this.MultiMatchSubordinates) + m.Get(this.URLPrefix+"/api/match-up-subordinates/:host/:port", this.MatchUpSubordinates) + m.Get(this.URLPrefix+"/api/regroup-subordinates-pgtid/:host/:port", this.RegroupSubordinatesPseudoGTID) // Legacy, need to revisit: - m.Get(this.URLPrefix+"/api/make-master/:host/:port", this.MakeMaster) - m.Get(this.URLPrefix+"/api/make-local-master/:host/:port", this.MakeLocalMaster) + m.Get(this.URLPrefix+"/api/make-main/:host/:port", this.MakeMain) + m.Get(this.URLPrefix+"/api/make-local-main/:host/:port", this.MakeLocalMain) // Replication, general: m.Get(this.URLPrefix+"/api/enable-gtid/:host/:port", this.EnableGTID) m.Get(this.URLPrefix+"/api/disable-gtid/:host/:port", this.DisableGTID) m.Get(this.URLPrefix+"/api/skip-query/:host/:port", this.SkipQuery) - m.Get(this.URLPrefix+"/api/start-slave/:host/:port", this.StartSlave) - m.Get(this.URLPrefix+"/api/restart-slave/:host/:port", this.RestartSlave) - m.Get(this.URLPrefix+"/api/stop-slave/:host/:port", this.StopSlave) - m.Get(this.URLPrefix+"/api/stop-slave-nice/:host/:port", this.StopSlaveNicely) - m.Get(this.URLPrefix+"/api/reset-slave/:host/:port", this.ResetSlave) - m.Get(this.URLPrefix+"/api/detach-slave/:host/:port", this.DetachSlave) - m.Get(this.URLPrefix+"/api/reattach-slave/:host/:port", this.ReattachSlave) - m.Get(this.URLPrefix+"/api/reattach-slave-master-host/:host/:port", this.ReattachSlaveMasterHost) + m.Get(this.URLPrefix+"/api/start-subordinate/:host/:port", this.StartSubordinate) + m.Get(this.URLPrefix+"/api/restart-subordinate/:host/:port", this.RestartSubordinate) + m.Get(this.URLPrefix+"/api/stop-subordinate/:host/:port", this.StopSubordinate) + m.Get(this.URLPrefix+"/api/stop-subordinate-nice/:host/:port", this.StopSubordinateNicely) + m.Get(this.URLPrefix+"/api/reset-subordinate/:host/:port", this.ResetSubordinate) + m.Get(this.URLPrefix+"/api/detach-subordinate/:host/:port", this.DetachSubordinate) + m.Get(this.URLPrefix+"/api/reattach-subordinate/:host/:port", this.ReattachSubordinate) + m.Get(this.URLPrefix+"/api/reattach-subordinate-main-host/:host/:port", this.ReattachSubordinateMainHost) // Instance: m.Get(this.URLPrefix+"/api/set-read-only/:host/:port", this.SetReadOnly) @@ -2355,7 +2355,7 @@ func (this *HttpAPI) RegisterRequests(m *martini.ClassicMartini) { m.Get(this.URLPrefix+"/api/cluster/instance/:host/:port", this.ClusterByInstance) m.Get(this.URLPrefix+"/api/cluster-info/:clusterName", this.ClusterInfo) m.Get(this.URLPrefix+"/api/cluster-info/alias/:clusterAlias", this.ClusterInfoByAlias) - m.Get(this.URLPrefix+"/api/cluster-osc-slaves/:clusterName", this.ClusterOSCSlaves) + m.Get(this.URLPrefix+"/api/cluster-osc-subordinates/:clusterName", this.ClusterOSCSubordinates) m.Get(this.URLPrefix+"/api/set-cluster-alias/:clusterName", this.SetClusterAlias) m.Get(this.URLPrefix+"/api/clusters", this.Clusters) m.Get(this.URLPrefix+"/api/clusters-info", this.ClustersInfo) diff --git a/go/inst/analysis.go b/go/inst/analysis.go index 9cce4908..7f21a53f 100644 --- a/go/inst/analysis.go +++ b/go/inst/analysis.go @@ -25,56 +25,56 @@ type StructureAnalysisCode string const ( NoProblem AnalysisCode = "NoProblem" - DeadMasterWithoutSlaves = "DeadMasterWithoutSlaves" - DeadMaster = "DeadMaster" - DeadMasterAndSlaves = "DeadMasterAndSlaves" - DeadMasterAndSomeSlaves = "DeadMasterAndSomeSlaves" - UnreachableMasterWithStaleSlaves = "UnreachableMasterWithStaleSlaves" - UnreachableMaster = "UnreachableMaster" - MasterSingleSlaveNotReplicating = "MasterSingleSlaveNotReplicating" - MasterSingleSlaveDead = "MasterSingleSlaveDead" - AllMasterSlavesNotReplicating = "AllMasterSlavesNotReplicating" - AllMasterSlavesNotReplicatingOrDead = "AllMasterSlavesNotReplicatingOrDead" - AllMasterSlavesStale = "AllMasterSlavesStale" - MasterWithoutSlaves = "MasterWithoutSlaves" - DeadCoMaster = "DeadCoMaster" - DeadCoMasterAndSomeSlaves = "DeadCoMasterAndSomeSlaves" - UnreachableCoMaster = "UnreachableCoMaster" - AllCoMasterSlavesNotReplicating = "AllCoMasterSlavesNotReplicating" - DeadIntermediateMaster = "DeadIntermediateMaster" - DeadIntermediateMasterWithSingleSlave = "DeadIntermediateMasterWithSingleSlave" - DeadIntermediateMasterWithSingleSlaveFailingToConnect = "DeadIntermediateMasterWithSingleSlaveFailingToConnect" - DeadIntermediateMasterAndSomeSlaves = "DeadIntermediateMasterAndSomeSlaves" - UnreachableIntermediateMaster = "UnreachableIntermediateMaster" - AllIntermediateMasterSlavesFailingToConnectOrDead = "AllIntermediateMasterSlavesFailingToConnectOrDead" - AllIntermediateMasterSlavesNotReplicating = "AllIntermediateMasterSlavesNotReplicating" - FirstTierSlaveFailingToConnectToMaster = "FirstTierSlaveFailingToConnectToMaster" - BinlogServerFailingToConnectToMaster = "BinlogServerFailingToConnectToMaster" + DeadMainWithoutSubordinates = "DeadMainWithoutSubordinates" + DeadMain = "DeadMain" + DeadMainAndSubordinates = "DeadMainAndSubordinates" + DeadMainAndSomeSubordinates = "DeadMainAndSomeSubordinates" + UnreachableMainWithStaleSubordinates = "UnreachableMainWithStaleSubordinates" + UnreachableMain = "UnreachableMain" + MainSingleSubordinateNotReplicating = "MainSingleSubordinateNotReplicating" + MainSingleSubordinateDead = "MainSingleSubordinateDead" + AllMainSubordinatesNotReplicating = "AllMainSubordinatesNotReplicating" + AllMainSubordinatesNotReplicatingOrDead = "AllMainSubordinatesNotReplicatingOrDead" + AllMainSubordinatesStale = "AllMainSubordinatesStale" + MainWithoutSubordinates = "MainWithoutSubordinates" + DeadCoMain = "DeadCoMain" + DeadCoMainAndSomeSubordinates = "DeadCoMainAndSomeSubordinates" + UnreachableCoMain = "UnreachableCoMain" + AllCoMainSubordinatesNotReplicating = "AllCoMainSubordinatesNotReplicating" + DeadIntermediateMain = "DeadIntermediateMain" + DeadIntermediateMainWithSingleSubordinate = "DeadIntermediateMainWithSingleSubordinate" + DeadIntermediateMainWithSingleSubordinateFailingToConnect = "DeadIntermediateMainWithSingleSubordinateFailingToConnect" + DeadIntermediateMainAndSomeSubordinates = "DeadIntermediateMainAndSomeSubordinates" + UnreachableIntermediateMain = "UnreachableIntermediateMain" + AllIntermediateMainSubordinatesFailingToConnectOrDead = "AllIntermediateMainSubordinatesFailingToConnectOrDead" + AllIntermediateMainSubordinatesNotReplicating = "AllIntermediateMainSubordinatesNotReplicating" + FirstTierSubordinateFailingToConnectToMain = "FirstTierSubordinateFailingToConnectToMain" + BinlogServerFailingToConnectToMain = "BinlogServerFailingToConnectToMain" ) const ( - StatementAndMixedLoggingSlavesStructureWarning StructureAnalysisCode = "StatementAndMixedLoggingSlavesStructureWarning" - StatementAndRowLoggingSlavesStructureWarning = "StatementAndRowLoggingSlavesStructureWarning" - MixedAndRowLoggingSlavesStructureWarning = "MixedAndRowLoggingSlavesStructureWarning" - MultipleMajorVersionsLoggingSlaves = "MultipleMajorVersionsLoggingSlaves" + StatementAndMixedLoggingSubordinatesStructureWarning StructureAnalysisCode = "StatementAndMixedLoggingSubordinatesStructureWarning" + StatementAndRowLoggingSubordinatesStructureWarning = "StatementAndRowLoggingSubordinatesStructureWarning" + MixedAndRowLoggingSubordinatesStructureWarning = "MixedAndRowLoggingSubordinatesStructureWarning" + MultipleMajorVersionsLoggingSubordinates = "MultipleMajorVersionsLoggingSubordinates" ) // ReplicationAnalysis notes analysis on replication chain status, per instance type ReplicationAnalysis struct { AnalyzedInstanceKey InstanceKey - AnalyzedInstanceMasterKey InstanceKey + AnalyzedInstanceMainKey InstanceKey ClusterDetails ClusterInfo - IsMaster bool - IsCoMaster bool + IsMain bool + IsCoMain bool LastCheckValid bool - CountSlaves uint - CountValidSlaves uint - CountValidReplicatingSlaves uint - CountSlavesFailingToConnectToMaster uint - CountStaleSlaves uint + CountSubordinates uint + CountValidSubordinates uint + CountValidReplicatingSubordinates uint + CountSubordinatesFailingToConnectToMain uint + CountStaleSubordinates uint ReplicationDepth uint - SlaveHosts InstanceKeyMap - IsFailingToConnectToMaster bool + SubordinateHosts InstanceKeyMap + IsFailingToConnectToMain bool Analysis AnalysisCode Description string StructureAnalysis []StructureAnalysisCode @@ -86,10 +86,10 @@ type ReplicationAnalysis struct { OracleGTIDImmediateTopology bool MariaDBGTIDImmediateTopology bool BinlogServerImmediateTopology bool - CountStatementBasedLoggingSlaves uint - CountMixedBasedLoggingSlaves uint - CountRowBasedLoggingSlaves uint - CountDistinctMajorVersionsLoggingSlaves uint + CountStatementBasedLoggingSubordinates uint + CountMixedBasedLoggingSubordinates uint + CountRowBasedLoggingSubordinates uint + CountDistinctMajorVersionsLoggingSubordinates uint } type ReplicationAnalysisChangelog struct { @@ -97,10 +97,10 @@ type ReplicationAnalysisChangelog struct { Changelog string } -// ReadSlaveHostsFromString parses and reads slave keys from comma delimited string -func (this *ReplicationAnalysis) ReadSlaveHostsFromString(slaveHostsString string) error { - this.SlaveHosts = *NewInstanceKeyMap() - return this.SlaveHosts.ReadCommaDelimitedList(slaveHostsString) +// ReadSubordinateHostsFromString parses and reads subordinate keys from comma delimited string +func (this *ReplicationAnalysis) ReadSubordinateHostsFromString(subordinateHostsString string) error { + this.SubordinateHosts = *NewInstanceKeyMap() + return this.SubordinateHosts.ReadCommaDelimitedList(subordinateHostsString) } // AnalysisString returns a human friendly description of all analysis issues diff --git a/go/inst/analysis_dao.go b/go/inst/analysis_dao.go index 7cdfea56..81d7d66a 100644 --- a/go/inst/analysis_dao.go +++ b/go/inst/analysis_dao.go @@ -38,7 +38,7 @@ func init() { var recentInstantAnalysis = cache.New(time.Duration(config.Config.RecoveryPollSeconds*2)*time.Second, time.Second) -// GetReplicationAnalysis will check for replication problems (dead master; unreachable master; etc) +// GetReplicationAnalysis will check for replication problems (dead main; unreachable main; etc) func GetReplicationAnalysis(clusterName string, includeDowntimed bool, auditAnalysis bool) ([]ReplicationAnalysis, error) { result := []ReplicationAnalysis{} @@ -48,74 +48,74 @@ func GetReplicationAnalysis(clusterName string, includeDowntimed bool, auditAnal analysisQueryReductionClause = ` HAVING (MIN( - master_instance.last_checked <= master_instance.last_seen - AND master_instance.last_attempted_check <= master_instance.last_seen + INTERVAL (2 * ?) SECOND + main_instance.last_checked <= main_instance.last_seen + AND main_instance.last_attempted_check <= main_instance.last_seen + INTERVAL (2 * ?) SECOND ) IS TRUE /* AS is_last_check_valid */) = 0 - OR (IFNULL(SUM(slave_instance.last_checked <= slave_instance.last_seen - AND slave_instance.slave_io_running = 0 - AND slave_instance.last_io_error RLIKE 'error (connecting|reconnecting) to master' - AND slave_instance.slave_sql_running = 1), - 0) /* AS count_slaves_failing_to_connect_to_master */ > 0) - OR (IFNULL(SUM(slave_instance.last_checked <= slave_instance.last_seen), - 0) /* AS count_valid_slaves */ < COUNT(slave_instance.server_id) /* AS count_slaves */) - OR (IFNULL(SUM(slave_instance.last_checked <= slave_instance.last_seen - AND slave_instance.slave_io_running != 0 - AND slave_instance.slave_sql_running != 0), - 0) /* AS count_valid_replicating_slaves */ < COUNT(slave_instance.server_id) /* AS count_slaves */) + OR (IFNULL(SUM(subordinate_instance.last_checked <= subordinate_instance.last_seen + AND subordinate_instance.subordinate_io_running = 0 + AND subordinate_instance.last_io_error RLIKE 'error (connecting|reconnecting) to main' + AND subordinate_instance.subordinate_sql_running = 1), + 0) /* AS count_subordinates_failing_to_connect_to_main */ > 0) + OR (IFNULL(SUM(subordinate_instance.last_checked <= subordinate_instance.last_seen), + 0) /* AS count_valid_subordinates */ < COUNT(subordinate_instance.server_id) /* AS count_subordinates */) + OR (IFNULL(SUM(subordinate_instance.last_checked <= subordinate_instance.last_seen + AND subordinate_instance.subordinate_io_running != 0 + AND subordinate_instance.subordinate_sql_running != 0), + 0) /* AS count_valid_replicating_subordinates */ < COUNT(subordinate_instance.server_id) /* AS count_subordinates */) OR (MIN( - master_instance.slave_sql_running = 1 - AND master_instance.slave_io_running = 0 - AND master_instance.last_io_error RLIKE 'error (connecting|reconnecting) to master' - ) /* AS is_failing_to_connect_to_master */) - OR (COUNT(slave_instance.server_id) /* AS count_slaves */ > 0) + main_instance.subordinate_sql_running = 1 + AND main_instance.subordinate_io_running = 0 + AND main_instance.last_io_error RLIKE 'error (connecting|reconnecting) to main' + ) /* AS is_failing_to_connect_to_main */) + OR (COUNT(subordinate_instance.server_id) /* AS count_subordinates */ > 0) ` args = append(args, config.Config.InstancePollSeconds) } - // "OR count_slaves > 0" above is a recent addition, which, granted, makes some previous conditions redundant. + // "OR count_subordinates > 0" above is a recent addition, which, granted, makes some previous conditions redundant. // It gives more output, and more "NoProblem" messages that I am now interested in for purpose of auditing in database_instance_analysis_changelog query := fmt.Sprintf(` SELECT - master_instance.hostname, - master_instance.port, - MIN(master_instance.master_host) AS master_host, - MIN(master_instance.master_port) AS master_port, - MIN(master_instance.cluster_name) AS cluster_name, - MIN(IFNULL(cluster_alias.alias, master_instance.cluster_name)) AS cluster_alias, + main_instance.hostname, + main_instance.port, + MIN(main_instance.main_host) AS main_host, + MIN(main_instance.main_port) AS main_port, + MIN(main_instance.cluster_name) AS cluster_name, + MIN(IFNULL(cluster_alias.alias, main_instance.cluster_name)) AS cluster_alias, MIN( - master_instance.last_checked <= master_instance.last_seen - AND master_instance.last_attempted_check <= master_instance.last_seen + INTERVAL (2 * ?) SECOND + main_instance.last_checked <= main_instance.last_seen + AND main_instance.last_attempted_check <= main_instance.last_seen + INTERVAL (2 * ?) SECOND ) IS TRUE AS is_last_check_valid, - MIN(master_instance.master_host IN ('' , '_') - OR master_instance.master_port = 0 - OR left(master_instance.master_host, 2) = '//') AS is_master, - MIN(master_instance.is_co_master) AS is_co_master, - MIN(CONCAT(master_instance.hostname, + MIN(main_instance.main_host IN ('' , '_') + OR main_instance.main_port = 0 + OR left(main_instance.main_host, 2) = '//') AS is_main, + MIN(main_instance.is_co_main) AS is_co_main, + MIN(CONCAT(main_instance.hostname, ':', - master_instance.port) = master_instance.cluster_name) AS is_cluster_master, - COUNT(slave_instance.server_id) AS count_slaves, - IFNULL(SUM(slave_instance.last_checked <= slave_instance.last_seen), - 0) AS count_valid_slaves, - IFNULL(SUM(slave_instance.last_checked <= slave_instance.last_seen - AND slave_instance.slave_io_running != 0 - AND slave_instance.slave_sql_running != 0), - 0) AS count_valid_replicating_slaves, - IFNULL(SUM(slave_instance.last_checked <= slave_instance.last_seen - AND slave_instance.slave_io_running = 0 - AND slave_instance.last_io_error RLIKE 'error (connecting|reconnecting) to master' - AND slave_instance.slave_sql_running = 1), - 0) AS count_slaves_failing_to_connect_to_master, + main_instance.port) = main_instance.cluster_name) AS is_cluster_main, + COUNT(subordinate_instance.server_id) AS count_subordinates, + IFNULL(SUM(subordinate_instance.last_checked <= subordinate_instance.last_seen), + 0) AS count_valid_subordinates, + IFNULL(SUM(subordinate_instance.last_checked <= subordinate_instance.last_seen + AND subordinate_instance.subordinate_io_running != 0 + AND subordinate_instance.subordinate_sql_running != 0), + 0) AS count_valid_replicating_subordinates, + IFNULL(SUM(subordinate_instance.last_checked <= subordinate_instance.last_seen + AND subordinate_instance.subordinate_io_running = 0 + AND subordinate_instance.last_io_error RLIKE 'error (connecting|reconnecting) to main' + AND subordinate_instance.subordinate_sql_running = 1), + 0) AS count_subordinates_failing_to_connect_to_main, IFNULL(SUM( current_relay_log_file=prev_relay_log_file and current_relay_log_pos=prev_relay_log_pos and current_seen != prev_seen), - 0) AS count_stale_slaves, - MIN(master_instance.replication_depth) AS replication_depth, - GROUP_CONCAT(slave_instance.Hostname, ':', slave_instance.Port) as slave_hosts, + 0) AS count_stale_subordinates, + MIN(main_instance.replication_depth) AS replication_depth, + GROUP_CONCAT(subordinate_instance.Hostname, ':', subordinate_instance.Port) as subordinate_hosts, MIN( - master_instance.slave_sql_running = 1 - AND master_instance.slave_io_running = 0 - AND master_instance.last_io_error RLIKE 'error (connecting|reconnecting) to master' - ) AS is_failing_to_connect_to_master, + main_instance.subordinate_sql_running = 1 + AND main_instance.subordinate_io_running = 0 + AND main_instance.last_io_error RLIKE 'error (connecting|reconnecting) to main' + ) AS is_failing_to_connect_to_main, MIN( database_instance_downtime.downtime_active IS NULL OR database_instance_downtime.end_timestamp < NOW() @@ -127,230 +127,230 @@ func GetReplicationAnalysis(clusterName string, includeDowntimed bool, auditAnal IFNULL(TIMESTAMPDIFF(SECOND, NOW(), database_instance_downtime.end_timestamp), 0) ) AS downtime_remaining_seconds, MIN( - master_instance.binlog_server + main_instance.binlog_server ) AS is_binlog_server, MIN( - master_instance.pseudo_gtid + main_instance.pseudo_gtid ) AS is_pseudo_gtid, MIN( - master_instance.supports_oracle_gtid + main_instance.supports_oracle_gtid ) AS supports_oracle_gtid, SUM( - slave_instance.oracle_gtid - ) AS count_oracle_gtid_slaves, - IFNULL(SUM(slave_instance.last_checked <= slave_instance.last_seen - AND slave_instance.oracle_gtid != 0), - 0) AS count_valid_oracle_gtid_slaves, + subordinate_instance.oracle_gtid + ) AS count_oracle_gtid_subordinates, + IFNULL(SUM(subordinate_instance.last_checked <= subordinate_instance.last_seen + AND subordinate_instance.oracle_gtid != 0), + 0) AS count_valid_oracle_gtid_subordinates, SUM( - slave_instance.binlog_server - ) AS count_binlog_server_slaves, - IFNULL(SUM(slave_instance.last_checked <= slave_instance.last_seen - AND slave_instance.binlog_server != 0), - 0) AS count_valid_binlog_server_slaves, + subordinate_instance.binlog_server + ) AS count_binlog_server_subordinates, + IFNULL(SUM(subordinate_instance.last_checked <= subordinate_instance.last_seen + AND subordinate_instance.binlog_server != 0), + 0) AS count_valid_binlog_server_subordinates, MIN( - master_instance.mariadb_gtid + main_instance.mariadb_gtid ) AS is_mariadb_gtid, SUM( - slave_instance.mariadb_gtid - ) AS count_mariadb_gtid_slaves, - IFNULL(SUM(slave_instance.last_checked <= slave_instance.last_seen - AND slave_instance.mariadb_gtid != 0), - 0) AS count_valid_mariadb_gtid_slaves, - IFNULL(SUM(slave_instance.log_bin - AND slave_instance.log_slave_updates - AND slave_instance.binlog_format = 'STATEMENT'), - 0) AS count_statement_based_loggin_slaves, - IFNULL(SUM(slave_instance.log_bin - AND slave_instance.log_slave_updates - AND slave_instance.binlog_format = 'MIXED'), - 0) AS count_mixed_based_loggin_slaves, - IFNULL(SUM(slave_instance.log_bin - AND slave_instance.log_slave_updates - AND slave_instance.binlog_format = 'ROW'), - 0) AS count_row_based_loggin_slaves, + subordinate_instance.mariadb_gtid + ) AS count_mariadb_gtid_subordinates, + IFNULL(SUM(subordinate_instance.last_checked <= subordinate_instance.last_seen + AND subordinate_instance.mariadb_gtid != 0), + 0) AS count_valid_mariadb_gtid_subordinates, + IFNULL(SUM(subordinate_instance.log_bin + AND subordinate_instance.log_subordinate_updates + AND subordinate_instance.binlog_format = 'STATEMENT'), + 0) AS count_statement_based_loggin_subordinates, + IFNULL(SUM(subordinate_instance.log_bin + AND subordinate_instance.log_subordinate_updates + AND subordinate_instance.binlog_format = 'MIXED'), + 0) AS count_mixed_based_loggin_subordinates, + IFNULL(SUM(subordinate_instance.log_bin + AND subordinate_instance.log_subordinate_updates + AND subordinate_instance.binlog_format = 'ROW'), + 0) AS count_row_based_loggin_subordinates, COUNT(DISTINCT IF( - slave_instance.log_bin AND slave_instance.log_slave_updates, - substring_index(slave_instance.version, '.', 2), + subordinate_instance.log_bin AND subordinate_instance.log_subordinate_updates, + substring_index(subordinate_instance.version, '.', 2), NULL) ) AS count_distinct_logging_major_versions FROM - database_instance master_instance + database_instance main_instance LEFT JOIN - hostname_resolve ON (master_instance.hostname = hostname_resolve.hostname) + hostname_resolve ON (main_instance.hostname = hostname_resolve.hostname) LEFT JOIN - database_instance slave_instance ON (COALESCE(hostname_resolve.resolved_hostname, - master_instance.hostname) = slave_instance.master_host - AND master_instance.port = slave_instance.master_port) + database_instance subordinate_instance ON (COALESCE(hostname_resolve.resolved_hostname, + main_instance.hostname) = subordinate_instance.main_host + AND main_instance.port = subordinate_instance.main_port) LEFT JOIN - database_instance_maintenance ON (master_instance.hostname = database_instance_maintenance.hostname - AND master_instance.port = database_instance_maintenance.port + database_instance_maintenance ON (main_instance.hostname = database_instance_maintenance.hostname + AND main_instance.port = database_instance_maintenance.port AND database_instance_maintenance.maintenance_active = 1) LEFT JOIN - database_instance_downtime ON (master_instance.hostname = database_instance_downtime.hostname - AND master_instance.port = database_instance_downtime.port + database_instance_downtime ON (main_instance.hostname = database_instance_downtime.hostname + AND main_instance.port = database_instance_downtime.port AND database_instance_downtime.downtime_active = 1) LEFT JOIN - cluster_alias ON (cluster_alias.cluster_name = master_instance.cluster_name) + cluster_alias ON (cluster_alias.cluster_name = main_instance.cluster_name) LEFT JOIN database_instance_recent_relaylog_history ON ( - slave_instance.hostname = database_instance_recent_relaylog_history.hostname - AND slave_instance.port = database_instance_recent_relaylog_history.port) + subordinate_instance.hostname = database_instance_recent_relaylog_history.hostname + AND subordinate_instance.port = database_instance_recent_relaylog_history.port) WHERE database_instance_maintenance.database_instance_maintenance_id IS NULL - AND ? IN ('', master_instance.cluster_name) + AND ? IN ('', main_instance.cluster_name) GROUP BY - master_instance.hostname, - master_instance.port + main_instance.hostname, + main_instance.port %s ORDER BY - is_master DESC , - is_cluster_master DESC, - count_slaves DESC + is_main DESC , + is_cluster_main DESC, + count_subordinates DESC `, analysisQueryReductionClause) err := db.QueryOrchestrator(query, args, func(m sqlutils.RowMap) error { a := ReplicationAnalysis{Analysis: NoProblem} - a.IsMaster = m.GetBool("is_master") - a.IsCoMaster = m.GetBool("is_co_master") + a.IsMain = m.GetBool("is_main") + a.IsCoMain = m.GetBool("is_co_main") a.AnalyzedInstanceKey = InstanceKey{Hostname: m.GetString("hostname"), Port: m.GetInt("port")} - a.AnalyzedInstanceMasterKey = InstanceKey{Hostname: m.GetString("master_host"), Port: m.GetInt("master_port")} + a.AnalyzedInstanceMainKey = InstanceKey{Hostname: m.GetString("main_host"), Port: m.GetInt("main_port")} a.ClusterDetails.ClusterName = m.GetString("cluster_name") a.ClusterDetails.ClusterAlias = m.GetString("cluster_alias") a.LastCheckValid = m.GetBool("is_last_check_valid") - a.CountSlaves = m.GetUint("count_slaves") - a.CountValidSlaves = m.GetUint("count_valid_slaves") - a.CountValidReplicatingSlaves = m.GetUint("count_valid_replicating_slaves") - a.CountSlavesFailingToConnectToMaster = m.GetUint("count_slaves_failing_to_connect_to_master") - a.CountStaleSlaves = m.GetUint("count_stale_slaves") + a.CountSubordinates = m.GetUint("count_subordinates") + a.CountValidSubordinates = m.GetUint("count_valid_subordinates") + a.CountValidReplicatingSubordinates = m.GetUint("count_valid_replicating_subordinates") + a.CountSubordinatesFailingToConnectToMain = m.GetUint("count_subordinates_failing_to_connect_to_main") + a.CountStaleSubordinates = m.GetUint("count_stale_subordinates") a.ReplicationDepth = m.GetUint("replication_depth") - a.IsFailingToConnectToMaster = m.GetBool("is_failing_to_connect_to_master") + a.IsFailingToConnectToMain = m.GetBool("is_failing_to_connect_to_main") a.IsDowntimed = m.GetBool("is_downtimed") a.DowntimeEndTimestamp = m.GetString("downtime_end_timestamp") a.DowntimeRemainingSeconds = m.GetInt("downtime_remaining_seconds") a.IsBinlogServer = m.GetBool("is_binlog_server") a.ClusterDetails.ReadRecoveryInfo() - a.SlaveHosts = *NewInstanceKeyMap() - a.SlaveHosts.ReadCommaDelimitedList(m.GetString("slave_hosts")) + a.SubordinateHosts = *NewInstanceKeyMap() + a.SubordinateHosts.ReadCommaDelimitedList(m.GetString("subordinate_hosts")) - countValidOracleGTIDSlaves := m.GetUint("count_valid_oracle_gtid_slaves") - a.OracleGTIDImmediateTopology = countValidOracleGTIDSlaves == a.CountValidSlaves && a.CountValidSlaves > 0 - countValidMariaDBGTIDSlaves := m.GetUint("count_valid_mariadb_gtid_slaves") - a.MariaDBGTIDImmediateTopology = countValidMariaDBGTIDSlaves == a.CountValidSlaves && a.CountValidSlaves > 0 - countValidBinlogServerSlaves := m.GetUint("count_valid_binlog_server_slaves") - a.BinlogServerImmediateTopology = countValidBinlogServerSlaves == a.CountValidSlaves && a.CountValidSlaves > 0 + countValidOracleGTIDSubordinates := m.GetUint("count_valid_oracle_gtid_subordinates") + a.OracleGTIDImmediateTopology = countValidOracleGTIDSubordinates == a.CountValidSubordinates && a.CountValidSubordinates > 0 + countValidMariaDBGTIDSubordinates := m.GetUint("count_valid_mariadb_gtid_subordinates") + a.MariaDBGTIDImmediateTopology = countValidMariaDBGTIDSubordinates == a.CountValidSubordinates && a.CountValidSubordinates > 0 + countValidBinlogServerSubordinates := m.GetUint("count_valid_binlog_server_subordinates") + a.BinlogServerImmediateTopology = countValidBinlogServerSubordinates == a.CountValidSubordinates && a.CountValidSubordinates > 0 a.PseudoGTIDImmediateTopology = m.GetBool("is_pseudo_gtid") - a.CountStatementBasedLoggingSlaves = m.GetUint("count_statement_based_loggin_slaves") - a.CountMixedBasedLoggingSlaves = m.GetUint("count_mixed_based_loggin_slaves") - a.CountRowBasedLoggingSlaves = m.GetUint("count_row_based_loggin_slaves") - a.CountDistinctMajorVersionsLoggingSlaves = m.GetUint("count_distinct_logging_major_versions") + a.CountStatementBasedLoggingSubordinates = m.GetUint("count_statement_based_loggin_subordinates") + a.CountMixedBasedLoggingSubordinates = m.GetUint("count_mixed_based_loggin_subordinates") + a.CountRowBasedLoggingSubordinates = m.GetUint("count_row_based_loggin_subordinates") + a.CountDistinctMajorVersionsLoggingSubordinates = m.GetUint("count_distinct_logging_major_versions") - if a.IsMaster && !a.LastCheckValid && a.CountSlaves == 0 { - a.Analysis = DeadMasterWithoutSlaves - a.Description = "Master cannot be reached by orchestrator and has no slave" + if a.IsMain && !a.LastCheckValid && a.CountSubordinates == 0 { + a.Analysis = DeadMainWithoutSubordinates + a.Description = "Main cannot be reached by orchestrator and has no subordinate" // - } else if a.IsMaster && !a.LastCheckValid && a.CountValidSlaves == a.CountSlaves && a.CountValidReplicatingSlaves == 0 { - a.Analysis = DeadMaster - a.Description = "Master cannot be reached by orchestrator and none of its slaves is replicating" + } else if a.IsMain && !a.LastCheckValid && a.CountValidSubordinates == a.CountSubordinates && a.CountValidReplicatingSubordinates == 0 { + a.Analysis = DeadMain + a.Description = "Main cannot be reached by orchestrator and none of its subordinates is replicating" // - } else if a.IsMaster && !a.LastCheckValid && a.CountSlaves > 0 && a.CountValidSlaves == 0 && a.CountValidReplicatingSlaves == 0 { - a.Analysis = DeadMasterAndSlaves - a.Description = "Master cannot be reached by orchestrator and none of its slaves is replicating" + } else if a.IsMain && !a.LastCheckValid && a.CountSubordinates > 0 && a.CountValidSubordinates == 0 && a.CountValidReplicatingSubordinates == 0 { + a.Analysis = DeadMainAndSubordinates + a.Description = "Main cannot be reached by orchestrator and none of its subordinates is replicating" // - } else if a.IsMaster && !a.LastCheckValid && a.CountValidSlaves < a.CountSlaves && a.CountValidSlaves > 0 && a.CountValidReplicatingSlaves == 0 { - a.Analysis = DeadMasterAndSomeSlaves - a.Description = "Master cannot be reached by orchestrator; some of its slaves are unreachable and none of its reachable slaves is replicating" + } else if a.IsMain && !a.LastCheckValid && a.CountValidSubordinates < a.CountSubordinates && a.CountValidSubordinates > 0 && a.CountValidReplicatingSubordinates == 0 { + a.Analysis = DeadMainAndSomeSubordinates + a.Description = "Main cannot be reached by orchestrator; some of its subordinates are unreachable and none of its reachable subordinates is replicating" // - } else if a.IsMaster && !a.LastCheckValid && a.CountStaleSlaves == a.CountSlaves && a.CountValidReplicatingSlaves > 0 { - a.Analysis = UnreachableMasterWithStaleSlaves - a.Description = "Master cannot be reached by orchestrator and has running yet stale slaves" + } else if a.IsMain && !a.LastCheckValid && a.CountStaleSubordinates == a.CountSubordinates && a.CountValidReplicatingSubordinates > 0 { + a.Analysis = UnreachableMainWithStaleSubordinates + a.Description = "Main cannot be reached by orchestrator and has running yet stale subordinates" // - } else if a.IsMaster && !a.LastCheckValid && a.CountValidSlaves > 0 && a.CountValidReplicatingSlaves > 0 { - a.Analysis = UnreachableMaster - a.Description = "Master cannot be reached by orchestrator but it has replicating slaves; possibly a network/host issue" + } else if a.IsMain && !a.LastCheckValid && a.CountValidSubordinates > 0 && a.CountValidReplicatingSubordinates > 0 { + a.Analysis = UnreachableMain + a.Description = "Main cannot be reached by orchestrator but it has replicating subordinates; possibly a network/host issue" // - } else if a.IsMaster && a.LastCheckValid && a.CountSlaves == 1 && a.CountValidSlaves == a.CountSlaves && a.CountValidReplicatingSlaves == 0 { - a.Analysis = MasterSingleSlaveNotReplicating - a.Description = "Master is reachable but its single slave is not replicating" + } else if a.IsMain && a.LastCheckValid && a.CountSubordinates == 1 && a.CountValidSubordinates == a.CountSubordinates && a.CountValidReplicatingSubordinates == 0 { + a.Analysis = MainSingleSubordinateNotReplicating + a.Description = "Main is reachable but its single subordinate is not replicating" // - } else if a.IsMaster && a.LastCheckValid && a.CountSlaves == 1 && a.CountValidSlaves == 0 { - a.Analysis = MasterSingleSlaveDead - a.Description = "Master is reachable but its single slave is dead" + } else if a.IsMain && a.LastCheckValid && a.CountSubordinates == 1 && a.CountValidSubordinates == 0 { + a.Analysis = MainSingleSubordinateDead + a.Description = "Main is reachable but its single subordinate is dead" // - } else if a.IsMaster && a.LastCheckValid && a.CountSlaves > 1 && a.CountValidSlaves == a.CountSlaves && a.CountValidReplicatingSlaves == 0 { - a.Analysis = AllMasterSlavesNotReplicating - a.Description = "Master is reachable but none of its slaves is replicating" + } else if a.IsMain && a.LastCheckValid && a.CountSubordinates > 1 && a.CountValidSubordinates == a.CountSubordinates && a.CountValidReplicatingSubordinates == 0 { + a.Analysis = AllMainSubordinatesNotReplicating + a.Description = "Main is reachable but none of its subordinates is replicating" // - } else if a.IsMaster && a.LastCheckValid && a.CountSlaves > 1 && a.CountValidSlaves < a.CountSlaves && a.CountValidSlaves > 0 && a.CountValidReplicatingSlaves == 0 { - a.Analysis = AllMasterSlavesNotReplicatingOrDead - a.Description = "Master is reachable but none of its slaves is replicating" + } else if a.IsMain && a.LastCheckValid && a.CountSubordinates > 1 && a.CountValidSubordinates < a.CountSubordinates && a.CountValidSubordinates > 0 && a.CountValidReplicatingSubordinates == 0 { + a.Analysis = AllMainSubordinatesNotReplicatingOrDead + a.Description = "Main is reachable but none of its subordinates is replicating" // - } else if a.IsMaster && a.LastCheckValid && a.CountSlaves > 1 && a.CountStaleSlaves == a.CountSlaves && a.CountValidSlaves > 0 && a.CountValidReplicatingSlaves > 0 { - a.Analysis = AllMasterSlavesStale - a.Description = "Master is reachable but all of its slaves are stale, although attempting to replicate" + } else if a.IsMain && a.LastCheckValid && a.CountSubordinates > 1 && a.CountStaleSubordinates == a.CountSubordinates && a.CountValidSubordinates > 0 && a.CountValidReplicatingSubordinates > 0 { + a.Analysis = AllMainSubordinatesStale + a.Description = "Main is reachable but all of its subordinates are stale, although attempting to replicate" // - } else /* co-master */ if a.IsCoMaster && !a.LastCheckValid && a.CountSlaves > 0 && a.CountValidSlaves == a.CountSlaves && a.CountValidReplicatingSlaves == 0 { - a.Analysis = DeadCoMaster - a.Description = "Co-master cannot be reached by orchestrator and none of its slaves is replicating" + } else /* co-main */ if a.IsCoMain && !a.LastCheckValid && a.CountSubordinates > 0 && a.CountValidSubordinates == a.CountSubordinates && a.CountValidReplicatingSubordinates == 0 { + a.Analysis = DeadCoMain + a.Description = "Co-main cannot be reached by orchestrator and none of its subordinates is replicating" // - } else if a.IsCoMaster && !a.LastCheckValid && a.CountSlaves > 0 && a.CountValidSlaves < a.CountSlaves && a.CountValidSlaves > 0 && a.CountValidReplicatingSlaves == 0 { - a.Analysis = DeadCoMasterAndSomeSlaves - a.Description = "Co-master cannot be reached by orchestrator; some of its slaves are unreachable and none of its reachable slaves is replicating" + } else if a.IsCoMain && !a.LastCheckValid && a.CountSubordinates > 0 && a.CountValidSubordinates < a.CountSubordinates && a.CountValidSubordinates > 0 && a.CountValidReplicatingSubordinates == 0 { + a.Analysis = DeadCoMainAndSomeSubordinates + a.Description = "Co-main cannot be reached by orchestrator; some of its subordinates are unreachable and none of its reachable subordinates is replicating" // - } else if a.IsCoMaster && !a.LastCheckValid && a.CountValidSlaves > 0 && a.CountValidReplicatingSlaves > 0 { - a.Analysis = UnreachableCoMaster - a.Description = "Co-master cannot be reached by orchestrator but it has replicating slaves; possibly a network/host issue" + } else if a.IsCoMain && !a.LastCheckValid && a.CountValidSubordinates > 0 && a.CountValidReplicatingSubordinates > 0 { + a.Analysis = UnreachableCoMain + a.Description = "Co-main cannot be reached by orchestrator but it has replicating subordinates; possibly a network/host issue" // - } else if a.IsCoMaster && a.LastCheckValid && a.CountSlaves > 0 && a.CountValidReplicatingSlaves == 0 { - a.Analysis = AllCoMasterSlavesNotReplicating - a.Description = "Co-master is reachable but none of its slaves is replicating" + } else if a.IsCoMain && a.LastCheckValid && a.CountSubordinates > 0 && a.CountValidReplicatingSubordinates == 0 { + a.Analysis = AllCoMainSubordinatesNotReplicating + a.Description = "Co-main is reachable but none of its subordinates is replicating" // - } else /* intermediate-master */ if !a.IsMaster && !a.LastCheckValid && a.CountSlaves == 1 && a.CountValidSlaves == a.CountSlaves && a.CountSlavesFailingToConnectToMaster == a.CountSlaves && a.CountValidReplicatingSlaves == 0 { - a.Analysis = DeadIntermediateMasterWithSingleSlaveFailingToConnect - a.Description = "Intermediate master cannot be reached by orchestrator and its (single) slave is failing to connect" + } else /* intermediate-main */ if !a.IsMain && !a.LastCheckValid && a.CountSubordinates == 1 && a.CountValidSubordinates == a.CountSubordinates && a.CountSubordinatesFailingToConnectToMain == a.CountSubordinates && a.CountValidReplicatingSubordinates == 0 { + a.Analysis = DeadIntermediateMainWithSingleSubordinateFailingToConnect + a.Description = "Intermediate main cannot be reached by orchestrator and its (single) subordinate is failing to connect" // - } else /* intermediate-master */ if !a.IsMaster && !a.LastCheckValid && a.CountSlaves == 1 && a.CountValidSlaves == a.CountSlaves && a.CountValidReplicatingSlaves == 0 { - a.Analysis = DeadIntermediateMasterWithSingleSlave - a.Description = "Intermediate master cannot be reached by orchestrator and its (single) slave is not replicating" + } else /* intermediate-main */ if !a.IsMain && !a.LastCheckValid && a.CountSubordinates == 1 && a.CountValidSubordinates == a.CountSubordinates && a.CountValidReplicatingSubordinates == 0 { + a.Analysis = DeadIntermediateMainWithSingleSubordinate + a.Description = "Intermediate main cannot be reached by orchestrator and its (single) subordinate is not replicating" // - } else /* intermediate-master */ if !a.IsMaster && !a.LastCheckValid && a.CountSlaves > 1 && a.CountValidSlaves == a.CountSlaves && a.CountValidReplicatingSlaves == 0 { - a.Analysis = DeadIntermediateMaster - a.Description = "Intermediate master cannot be reached by orchestrator and none of its slaves is replicating" + } else /* intermediate-main */ if !a.IsMain && !a.LastCheckValid && a.CountSubordinates > 1 && a.CountValidSubordinates == a.CountSubordinates && a.CountValidReplicatingSubordinates == 0 { + a.Analysis = DeadIntermediateMain + a.Description = "Intermediate main cannot be reached by orchestrator and none of its subordinates is replicating" // - } else if !a.IsMaster && !a.LastCheckValid && a.CountValidSlaves < a.CountSlaves && a.CountValidSlaves > 0 && a.CountValidReplicatingSlaves == 0 { - a.Analysis = DeadIntermediateMasterAndSomeSlaves - a.Description = "Intermediate master cannot be reached by orchestrator; some of its slaves are unreachable and none of its reachable slaves is replicating" + } else if !a.IsMain && !a.LastCheckValid && a.CountValidSubordinates < a.CountSubordinates && a.CountValidSubordinates > 0 && a.CountValidReplicatingSubordinates == 0 { + a.Analysis = DeadIntermediateMainAndSomeSubordinates + a.Description = "Intermediate main cannot be reached by orchestrator; some of its subordinates are unreachable and none of its reachable subordinates is replicating" // - } else if !a.IsMaster && !a.LastCheckValid && a.CountValidSlaves > 0 && a.CountValidReplicatingSlaves > 0 { - a.Analysis = UnreachableIntermediateMaster - a.Description = "Intermediate master cannot be reached by orchestrator but it has replicating slaves; possibly a network/host issue" + } else if !a.IsMain && !a.LastCheckValid && a.CountValidSubordinates > 0 && a.CountValidReplicatingSubordinates > 0 { + a.Analysis = UnreachableIntermediateMain + a.Description = "Intermediate main cannot be reached by orchestrator but it has replicating subordinates; possibly a network/host issue" // - } else if !a.IsMaster && a.LastCheckValid && a.CountSlaves > 1 && a.CountValidReplicatingSlaves == 0 && - a.CountSlavesFailingToConnectToMaster > 0 && a.CountSlavesFailingToConnectToMaster == a.CountValidSlaves { - // All slaves are either failing to connect to master (and at least one of these have to exist) + } else if !a.IsMain && a.LastCheckValid && a.CountSubordinates > 1 && a.CountValidReplicatingSubordinates == 0 && + a.CountSubordinatesFailingToConnectToMain > 0 && a.CountSubordinatesFailingToConnectToMain == a.CountValidSubordinates { + // All subordinates are either failing to connect to main (and at least one of these have to exist) // or completely dead. - // Must have at least two slaves to reach such conclusion -- do note that the intermediate master is still - // reachable to orchestrator, so we base our conclusion on slaves only at this point. - a.Analysis = AllIntermediateMasterSlavesFailingToConnectOrDead - a.Description = "Intermediate master is reachable but all of its slaves are failing to connect" + // Must have at least two subordinates to reach such conclusion -- do note that the intermediate main is still + // reachable to orchestrator, so we base our conclusion on subordinates only at this point. + a.Analysis = AllIntermediateMainSubordinatesFailingToConnectOrDead + a.Description = "Intermediate main is reachable but all of its subordinates are failing to connect" // - } else if !a.IsMaster && a.LastCheckValid && a.CountSlaves > 0 && a.CountValidReplicatingSlaves == 0 { - a.Analysis = AllIntermediateMasterSlavesNotReplicating - a.Description = "Intermediate master is reachable but none of its slaves is replicating" + } else if !a.IsMain && a.LastCheckValid && a.CountSubordinates > 0 && a.CountValidReplicatingSubordinates == 0 { + a.Analysis = AllIntermediateMainSubordinatesNotReplicating + a.Description = "Intermediate main is reachable but none of its subordinates is replicating" // - } else if a.IsBinlogServer && a.IsFailingToConnectToMaster { - a.Analysis = BinlogServerFailingToConnectToMaster - a.Description = "Binlog server is unable to connect to its master" + } else if a.IsBinlogServer && a.IsFailingToConnectToMain { + a.Analysis = BinlogServerFailingToConnectToMain + a.Description = "Binlog server is unable to connect to its main" // - } else if a.ReplicationDepth == 1 && a.IsFailingToConnectToMaster { - a.Analysis = FirstTierSlaveFailingToConnectToMaster - a.Description = "1st tier slave (directly replicating from topology master) is unable to connect to the master" + } else if a.ReplicationDepth == 1 && a.IsFailingToConnectToMain { + a.Analysis = FirstTierSubordinateFailingToConnectToMain + a.Description = "1st tier subordinate (directly replicating from topology main) is unable to connect to the main" // } - // else if a.IsMaster && a.CountSlaves == 0 { - // a.Analysis = MasterWithoutSlaves - // a.Description = "Master has no slaves" + // else if a.IsMain && a.CountSubordinates == 0 { + // a.Analysis = MainWithoutSubordinates + // a.Description = "Main has no subordinates" // } appendAnalysis := func(analysis *ReplicationAnalysis) { @@ -374,22 +374,22 @@ func GetReplicationAnalysis(clusterName string, includeDowntimed bool, auditAnal { // Moving on to structure analysis // We also do structural checks. See if there's potential danger in promotions - if a.IsMaster && a.CountStatementBasedLoggingSlaves > 0 && a.CountMixedBasedLoggingSlaves > 0 { - a.StructureAnalysis = append(a.StructureAnalysis, StatementAndMixedLoggingSlavesStructureWarning) + if a.IsMain && a.CountStatementBasedLoggingSubordinates > 0 && a.CountMixedBasedLoggingSubordinates > 0 { + a.StructureAnalysis = append(a.StructureAnalysis, StatementAndMixedLoggingSubordinatesStructureWarning) } - if a.IsMaster && a.CountStatementBasedLoggingSlaves > 0 && a.CountRowBasedLoggingSlaves > 0 { - a.StructureAnalysis = append(a.StructureAnalysis, StatementAndRowLoggingSlavesStructureWarning) + if a.IsMain && a.CountStatementBasedLoggingSubordinates > 0 && a.CountRowBasedLoggingSubordinates > 0 { + a.StructureAnalysis = append(a.StructureAnalysis, StatementAndRowLoggingSubordinatesStructureWarning) } - if a.IsMaster && a.CountMixedBasedLoggingSlaves > 0 && a.CountRowBasedLoggingSlaves > 0 { - a.StructureAnalysis = append(a.StructureAnalysis, MixedAndRowLoggingSlavesStructureWarning) + if a.IsMain && a.CountMixedBasedLoggingSubordinates > 0 && a.CountRowBasedLoggingSubordinates > 0 { + a.StructureAnalysis = append(a.StructureAnalysis, MixedAndRowLoggingSubordinatesStructureWarning) } - if a.IsMaster && a.CountDistinctMajorVersionsLoggingSlaves > 1 { - a.StructureAnalysis = append(a.StructureAnalysis, MultipleMajorVersionsLoggingSlaves) + if a.IsMain && a.CountDistinctMajorVersionsLoggingSubordinates > 1 { + a.StructureAnalysis = append(a.StructureAnalysis, MultipleMajorVersionsLoggingSubordinates) } } appendAnalysis(&a) - if a.CountSlaves > 0 && auditAnalysis { + if a.CountSubordinates > 0 && auditAnalysis { // Interesting enough for analysis go auditInstanceAnalysisInChangelog(&a.AnalyzedInstanceKey, a.Analysis) } diff --git a/go/inst/cluster.go b/go/inst/cluster.go index 6ee51069..3537266a 100644 --- a/go/inst/cluster.go +++ b/go/inst/cluster.go @@ -26,17 +26,17 @@ import ( type ClusterInfo struct { ClusterName string ClusterAlias string // Human friendly alias - ClusterDomain string // CNAME/VIP/A-record/whatever of the master of this cluster + ClusterDomain string // CNAME/VIP/A-record/whatever of the main of this cluster CountInstances uint HeuristicLag int64 - HasAutomatedMasterRecovery bool - HasAutomatedIntermediateMasterRecovery bool + HasAutomatedMainRecovery bool + HasAutomatedIntermediateMainRecovery bool } // ReadRecoveryInfo func (this *ClusterInfo) ReadRecoveryInfo() { - this.HasAutomatedMasterRecovery = this.filtersMatchCluster(config.Config.RecoverMasterClusterFilters) - this.HasAutomatedIntermediateMasterRecovery = this.filtersMatchCluster(config.Config.RecoverIntermediateMasterClusterFilters) + this.HasAutomatedMainRecovery = this.filtersMatchCluster(config.Config.RecoverMainClusterFilters) + this.HasAutomatedIntermediateMainRecovery = this.filtersMatchCluster(config.Config.RecoverIntermediateMainClusterFilters) } // filtersMatchCluster will see whether the given filters match the given cluster details diff --git a/go/inst/cluster_alias_dao.go b/go/inst/cluster_alias_dao.go index 7f87dd3e..57564d9e 100644 --- a/go/inst/cluster_alias_dao.go +++ b/go/inst/cluster_alias_dao.go @@ -95,7 +95,7 @@ func UpdateClusterAliases() error { cluster_name order by ((last_checked <= last_seen) is true) desc, read_only asc, - num_slave_hosts desc + num_subordinate_hosts desc ), ',', 1) as cluster_name, NOW() from @@ -103,7 +103,7 @@ func UpdateClusterAliases() error { left join database_instance_downtime using (hostname, port) where suggested_cluster_alias!='' - /* exclude newly demoted, downtimed masters */ + /* exclude newly demoted, downtimed mains */ and ifnull( database_instance_downtime.downtime_active = 1 and database_instance_downtime.end_timestamp > now() diff --git a/go/inst/instance.go b/go/inst/instance.go index 3d0c41fb..9c865f85 100644 --- a/go/inst/instance.go +++ b/go/inst/instance.go @@ -63,12 +63,12 @@ type Instance struct { ReadOnly bool Binlog_format string LogBinEnabled bool - LogSlaveUpdatesEnabled bool + LogSubordinateUpdatesEnabled bool SelfBinlogCoordinates BinlogCoordinates - MasterKey InstanceKey - IsDetachedMaster bool - Slave_SQL_Running bool - Slave_IO_Running bool + MainKey InstanceKey + IsDetachedMain bool + Subordinate_SQL_Running bool + Subordinate_IO_Running bool HasReplicationFilters bool SupportsOracleGTID bool UsingOracleGTID bool @@ -80,19 +80,19 @@ type Instance struct { RelaylogCoordinates BinlogCoordinates LastSQLError string LastIOError string - SecondsBehindMaster sql.NullInt64 + SecondsBehindMain sql.NullInt64 SQLDelay uint ExecutedGtidSet string GtidPurged string - SlaveLagSeconds sql.NullInt64 - SlaveHosts InstanceKeyMap + SubordinateLagSeconds sql.NullInt64 + SubordinateHosts InstanceKeyMap ClusterName string SuggestedClusterAlias string DataCenter string PhysicalEnvironment string ReplicationDepth uint - IsCoMaster bool + IsCoMain bool HasReplicationCredentials bool ReplicationCredentialsAvailable bool SemiSyncEnforced bool @@ -117,7 +117,7 @@ type Instance struct { // NewInstance creates a new, empty instance func NewInstance() *Instance { return &Instance{ - SlaveHosts: make(map[InstanceKey]bool), + SubordinateHosts: make(map[InstanceKey]bool), } } @@ -224,14 +224,14 @@ func (instance *Instance) NameAndMajorVersionString() string { return name + "-" + instance.MajorVersionString() } -// IsSlave makes simple heuristics to decide whether this insatnce is a slave of another instance -func (this *Instance) IsSlave() bool { - return this.MasterKey.Hostname != "" && this.MasterKey.Hostname != "_" && this.MasterKey.Port != 0 && (this.ReadBinlogCoordinates.LogFile != "" || this.UsingGTID()) +// IsSubordinate makes simple heuristics to decide whether this insatnce is a subordinate of another instance +func (this *Instance) IsSubordinate() bool { + return this.MainKey.Hostname != "" && this.MainKey.Hostname != "_" && this.MainKey.Port != 0 && (this.ReadBinlogCoordinates.LogFile != "" || this.UsingGTID()) } -// SlaveRunning returns true when this instance's status is of a replicating slave. -func (this *Instance) SlaveRunning() bool { - return this.IsSlave() && this.Slave_SQL_Running && this.Slave_IO_Running +// SubordinateRunning returns true when this instance's status is of a replicating subordinate. +func (this *Instance) SubordinateRunning() bool { + return this.IsSubordinate() && this.Subordinate_SQL_Running && this.Subordinate_IO_Running } // SQLThreadUpToDate returns true when the instance had consumed all relay logs. @@ -239,7 +239,7 @@ func (this *Instance) SQLThreadUpToDate() bool { return this.ReadBinlogCoordinates.Equals(&this.ExecBinlogCoordinates) } -// UsingGTID returns true when this slave is currently replicating via GTID (either Oracle or MariaDB) +// UsingGTID returns true when this subordinate is currently replicating via GTID (either Oracle or MariaDB) func (this *Instance) UsingGTID() bool { return this.UsingOracleGTID || this.UsingMariaDBGTID } @@ -259,9 +259,9 @@ func (this *Instance) NextGTID() (string, error) { return tokens[len(tokens)-1] } // executed GTID set: 4f6d62ed-df65-11e3-b395-60672090eb04:1,b9b4712a-df64-11e3-b391-60672090eb04:1-6 - executedGTIDsFromMaster := lastToken(this.ExecutedGtidSet, ",") - // executedGTIDsFromMaster: b9b4712a-df64-11e3-b391-60672090eb04:1-6 - executedRange := lastToken(executedGTIDsFromMaster, ":") + executedGTIDsFromMain := lastToken(this.ExecutedGtidSet, ",") + // executedGTIDsFromMain: b9b4712a-df64-11e3-b391-60672090eb04:1-6 + executedRange := lastToken(executedGTIDsFromMain, ":") // executedRange: 1-6 lastExecutedNumberToken := lastToken(executedRange, "-") // lastExecutedNumber: 6 @@ -270,13 +270,13 @@ func (this *Instance) NextGTID() (string, error) { return "", err } nextNumber := lastExecutedNumber + 1 - nextGTID := fmt.Sprintf("%s:%d", firstToken(executedGTIDsFromMaster, ":"), nextNumber) + nextGTID := fmt.Sprintf("%s:%d", firstToken(executedGTIDsFromMain, ":"), nextNumber) return nextGTID, nil } -// AddSlaveKey adds a slave to the list of this instance's slaves. -func (this *Instance) AddSlaveKey(slaveKey *InstanceKey) { - this.SlaveHosts.AddKey(*slaveKey) +// AddSubordinateKey adds a subordinate to the list of this instance's subordinates. +func (this *Instance) AddSubordinateKey(subordinateKey *InstanceKey) { + this.SubordinateHosts.AddKey(*subordinateKey) } // GetNextBinaryLog returns the successive, if any, binary log file to the one given @@ -287,14 +287,14 @@ func (this *Instance) GetNextBinaryLog(binlogCoordinates BinlogCoordinates) (Bin return binlogCoordinates.NextFileCoordinates() } -// IsSlaveOf returns true if this instance claims to replicate from given master -func (this *Instance) IsSlaveOf(master *Instance) bool { - return this.MasterKey.Equals(&master.Key) +// IsSubordinateOf returns true if this instance claims to replicate from given main +func (this *Instance) IsSubordinateOf(main *Instance) bool { + return this.MainKey.Equals(&main.Key) } -// IsSlaveOf returns true if this i supposed master of given slave -func (this *Instance) IsMasterOf(slave *Instance) bool { - return slave.IsSlaveOf(this) +// IsSubordinateOf returns true if this i supposed main of given subordinate +func (this *Instance) IsMainOf(subordinate *Instance) bool { + return subordinate.IsSubordinateOf(this) } // CanReplicateFrom uses heursitics to decide whether this instacne can practically replicate from other instance. @@ -306,17 +306,17 @@ func (this *Instance) CanReplicateFrom(other *Instance) (bool, error) { if !other.LogBinEnabled { return false, fmt.Errorf("instance does not have binary logs enabled: %+v", other.Key) } - if other.IsSlave() { - if !other.LogSlaveUpdatesEnabled { - return false, fmt.Errorf("instance does not have log_slave_updates enabled: %+v", other.Key) + if other.IsSubordinate() { + if !other.LogSubordinateUpdatesEnabled { + return false, fmt.Errorf("instance does not have log_subordinate_updates enabled: %+v", other.Key) } - // OK for a master to not have log_slave_updates - // Not OK for a slave, for it has to relay the logs. + // OK for a main to not have log_subordinate_updates + // Not OK for a subordinate, for it has to relay the logs. } if this.IsSmallerMajorVersion(other) && !this.IsBinlogServer() { return false, fmt.Errorf("instance %+v has version %s, which is lower than %s on %+v ", this.Key, this.Version, other.Version, other.Key) } - if this.LogBinEnabled && this.LogSlaveUpdatesEnabled { + if this.LogBinEnabled && this.LogSubordinateUpdatesEnabled { if this.IsSmallerBinlogFormat(other) { return false, fmt.Errorf("Cannot replicate from %+v binlog format on %+v to %+v on %+v", other.Binlog_format, other.Key, this.Binlog_format, this.Key) } @@ -332,13 +332,13 @@ func (this *Instance) CanReplicateFrom(other *Instance) (bool, error) { return true, nil } -// HasReasonableMaintenanceReplicationLag returns true when the slave lag is reasonable, and maintenance operations should have a green light to go. +// HasReasonableMaintenanceReplicationLag returns true when the subordinate lag is reasonable, and maintenance operations should have a green light to go. func (this *Instance) HasReasonableMaintenanceReplicationLag() bool { - // Slaves with SQLDelay are a special case + // Subordinates with SQLDelay are a special case if this.SQLDelay > 0 { - return math.AbsInt64(this.SecondsBehindMaster.Int64-int64(this.SQLDelay)) <= int64(config.Config.ReasonableMaintenanceReplicationLagSeconds) + return math.AbsInt64(this.SecondsBehindMain.Int64-int64(this.SQLDelay)) <= int64(config.Config.ReasonableMaintenanceReplicationLagSeconds) } - return this.SecondsBehindMaster.Int64 <= int64(config.Config.ReasonableMaintenanceReplicationLagSeconds) + return this.SecondsBehindMain.Int64 <= int64(config.Config.ReasonableMaintenanceReplicationLagSeconds) } // CanMove returns true if this instance's state allows it to be repositioned. For example, @@ -350,14 +350,14 @@ func (this *Instance) CanMove() (bool, error) { if !this.IsRecentlyChecked { return false, fmt.Errorf("%+v: not recently checked", this.Key) } - if !this.Slave_SQL_Running { + if !this.Subordinate_SQL_Running { return false, fmt.Errorf("%+v: instance is not replicating", this.Key) } - if !this.Slave_IO_Running { + if !this.Subordinate_IO_Running { return false, fmt.Errorf("%+v: instance is not replicating", this.Key) } - if !this.SecondsBehindMaster.Valid { - return false, fmt.Errorf("%+v: cannot determine slave lag", this.Key) + if !this.SecondsBehindMain.Valid { + return false, fmt.Errorf("%+v: cannot determine subordinate lag", this.Key) } if !this.HasReasonableMaintenanceReplicationLag() { return false, fmt.Errorf("%+v: lags too much", this.Key) @@ -365,8 +365,8 @@ func (this *Instance) CanMove() (bool, error) { return true, nil } -// CanMoveAsCoMaster returns true if this instance's state allows it to be repositioned. -func (this *Instance) CanMoveAsCoMaster() (bool, error) { +// CanMoveAsCoMain returns true if this instance's state allows it to be repositioned. +func (this *Instance) CanMoveAsCoMain() (bool, error) { if !this.IsLastCheckValid { return false, fmt.Errorf("%+v: last check invalid", this.Key) } @@ -395,10 +395,10 @@ func (this *Instance) StatusString() string { if !this.IsRecentlyChecked { return "unchecked" } - if this.IsSlave() && !(this.Slave_SQL_Running && this.Slave_IO_Running) { + if this.IsSubordinate() && !(this.Subordinate_SQL_Running && this.Subordinate_IO_Running) { return "nonreplicating" } - if this.IsSlave() && !this.HasReasonableMaintenanceReplicationLag() { + if this.IsSubordinate() && !this.HasReasonableMaintenanceReplicationLag() { return "lag" } return "ok" @@ -415,16 +415,16 @@ func (this *Instance) LagStatusString() string { if !this.IsRecentlyChecked { return "unknown" } - if this.IsSlave() && !(this.Slave_SQL_Running && this.Slave_IO_Running) { + if this.IsSubordinate() && !(this.Subordinate_SQL_Running && this.Subordinate_IO_Running) { return "null" } - if this.IsSlave() && !this.SecondsBehindMaster.Valid { + if this.IsSubordinate() && !this.SecondsBehindMain.Valid { return "null" } - if this.IsSlave() && this.SlaveLagSeconds.Int64 > int64(config.Config.ReasonableMaintenanceReplicationLagSeconds) { - return fmt.Sprintf("%+vs", this.SlaveLagSeconds.Int64) + if this.IsSubordinate() && this.SubordinateLagSeconds.Int64 > int64(config.Config.ReasonableMaintenanceReplicationLagSeconds) { + return fmt.Sprintf("%+vs", this.SubordinateLagSeconds.Int64) } - return fmt.Sprintf("%+vs", this.SlaveLagSeconds.Int64) + return fmt.Sprintf("%+vs", this.SubordinateLagSeconds.Int64) } // HumanReadableDescription returns a simple readable string describing the status, version, @@ -444,7 +444,7 @@ func (this *Instance) HumanReadableDescription() string { } else { tokens = append(tokens, "nobinlog") } - if this.LogBinEnabled && this.LogSlaveUpdatesEnabled { + if this.LogBinEnabled && this.LogSubordinateUpdatesEnabled { tokens = append(tokens, ">>") } if this.UsingGTID() { diff --git a/go/inst/instance_binlog.go b/go/inst/instance_binlog.go index 5c5d1948..e8a90f05 100644 --- a/go/inst/instance_binlog.go +++ b/go/inst/instance_binlog.go @@ -170,7 +170,7 @@ func (this *BinlogEventCursor) nextRealEvent(recursionLevel int) (*BinlogEvent, // NextCoordinates return the binlog coordinates of the next entry as yet unprocessed by the cursor. // Moreover, when the cursor terminates (consumes last entry), these coordinates indicate what will be the futuristic // coordinates of the next binlog entry. -// The value of this function is used by match-below to move a slave behind another, after exhausting the shared binlog +// The value of this function is used by match-below to move a subordinate behind another, after exhausting the shared binlog // entries of both. func (this *BinlogEventCursor) getNextCoordinates() (BinlogCoordinates, error) { if this.nextCoordinates.LogPos == 0 { diff --git a/go/inst/instance_binlog_dao.go b/go/inst/instance_binlog_dao.go index 576efd49..c6a0d3a2 100644 --- a/go/inst/instance_binlog_dao.go +++ b/go/inst/instance_binlog_dao.go @@ -303,10 +303,10 @@ func SearchEntryInInstanceBinlogs(instance *Instance, entryText string, monotoni for { log.Debugf("Searching for given pseudo gtid entry in binlog %+v of %+v", currentBinlog.LogFile, instance.Key) // loop iteration per binary log. This might turn to be a heavyweight operation. We wish to throttle the operation such that - // the instance does not suffer. If it is a slave, we will only act as long as it's not lagging too much. - if instance.SlaveRunning() { + // the instance does not suffer. If it is a subordinate, we will only act as long as it's not lagging too much. + if instance.SubordinateRunning() { for { - log.Debugf("%+v is a replicating slave. Verifying lag", instance.Key) + log.Debugf("%+v is a replicating subordinate. Verifying lag", instance.Key) instance, err = ReadTopologyInstanceUnbuffered(&instance.Key) if err != nil { break @@ -332,7 +332,7 @@ func SearchEntryInInstanceBinlogs(instance *Instance, entryText string, monotoni } // Got here? Unfound. Keep looking if minBinlogCoordinates != nil && minBinlogCoordinates.LogFile == currentBinlog.LogFile { - log.Debugf("Heuristic master binary logs search failed; continuing exhaustive search") + log.Debugf("Heuristic main binary logs search failed; continuing exhaustive search") minBinlogCoordinates = nil } else { currentBinlog, err = currentBinlog.PreviousFileCoordinates() @@ -411,13 +411,13 @@ func getNextBinlogEventsChunk(instance *Instance, startingCoordinates BinlogCoor return events, err } -// GetNextBinlogCoordinatesToMatch is given a twin-coordinates couple for a would-be slave (instanceKey) and another +// GetNextBinlogCoordinatesToMatch is given a twin-coordinates couple for a would-be subordinate (instanceKey) and another // instance (otherKey). // This is part of the match-below process, and is the heart of the operation: matching the binlog events starting // the twin-coordinates (where both share the same Pseudo-GTID) until "instance" runs out of entries, hopefully // before "other" runs out. // If "other" runs out that means "instance" is more advanced in replication than "other", in which case we can't -// turn it into a slave of "other". +// turn it into a subordinate of "other". // Otherwise "instance" will point to the *next* binlog entry in "other" func GetNextBinlogCoordinatesToMatch(instance *Instance, instanceCoordinates BinlogCoordinates, recordedInstanceRelayLogCoordinates BinlogCoordinates, maxBinlogCoordinates *BinlogCoordinates, other *Instance, otherCoordinates BinlogCoordinates) (*BinlogCoordinates, int, error) { @@ -477,7 +477,7 @@ func GetNextBinlogCoordinatesToMatch(instance *Instance, instanceCoordinates Bin case RelayLog: // Argghhhh! SHOW RELAY LOG EVENTS IN '...' statement returns CRAPPY values for End_log_pos: // instead of returning the end log pos of the current statement in the *relay log*, it shows - // the end log pos of the matching statement in the *master's binary log*! + // the end log pos of the matching statement in the *main's binary log*! // Yes, there's logic to this. But this means the next-ccordinates are meaningless. // As result, in the case where we exhaust (following) the relay log, we cannot do our last // nice sanity test that we've indeed reached the Relay_log_pos coordinate; we are only at the @@ -492,7 +492,7 @@ func GetNextBinlogCoordinatesToMatch(instance *Instance, instanceCoordinates Bin } else if recordedInstanceRelayLogCoordinates.Equals(&event.Coordinates) { // We've passed the maxScanInstanceCoordinates (applies for relay logs) endOfScan = true - log.Debugf("Reached slave relay log coordinates at %+v", recordedInstanceRelayLogCoordinates) + log.Debugf("Reached subordinate relay log coordinates at %+v", recordedInstanceRelayLogCoordinates) } else if recordedInstanceRelayLogCoordinates.SmallerThan(&event.Coordinates) { return nil, 0, log.Errorf("Unexpected problem: relay log scan passed relay log position without hitting it. Ended with: %+v, relay log position: %+v", event.Coordinates, recordedInstanceRelayLogCoordinates) } @@ -518,7 +518,7 @@ func GetNextBinlogCoordinatesToMatch(instance *Instance, instanceCoordinates Bin log.Debugf("> %+v %+v; %+v", rpad(coordinatesStr, beautifyCoordinatesLength), event.EventType, strings.Split(strings.TrimSpace(instanceEventInfo), "\n")[0]) } { - // Extract next binlog/relaylog entry from otherInstance (intended master): + // Extract next binlog/relaylog entry from otherInstance (intended main): event, err := otherCursor.nextRealEvent(0) if err != nil { return nil, 0, log.Errore(err) @@ -526,7 +526,7 @@ func GetNextBinlogCoordinatesToMatch(instance *Instance, instanceCoordinates Bin if event == nil { // end of binary logs for otherInstance: this is unexpected and means instance is more advanced // than otherInstance - return nil, 0, log.Errorf("Unexpected end of binary logs for assumed master (%+v). This means the instance which attempted to be a slave (%+v) was more advanced. Try the other way round", other.Key, instance.Key) + return nil, 0, log.Errorf("Unexpected end of binary logs for assumed main (%+v). This means the instance which attempted to be a subordinate (%+v) was more advanced. Try the other way round", other.Key, instance.Key) } otherEventInfo = event.Info otherEventCoordinates = event.Coordinates diff --git a/go/inst/instance_dao.go b/go/inst/instance_dao.go index c082ce34..8883e1b2 100644 --- a/go/inst/instance_dao.go +++ b/go/inst/instance_dao.go @@ -41,13 +41,13 @@ const backendDBConcurrency = 20 var instanceReadChan = make(chan bool, backendDBConcurrency) var instanceWriteChan = make(chan bool, backendDBConcurrency) -// InstancesByCountSlaveHosts is a sortable type for Instance -type InstancesByCountSlaveHosts [](*Instance) +// InstancesByCountSubordinateHosts is a sortable type for Instance +type InstancesByCountSubordinateHosts [](*Instance) -func (this InstancesByCountSlaveHosts) Len() int { return len(this) } -func (this InstancesByCountSlaveHosts) Swap(i, j int) { this[i], this[j] = this[j], this[i] } -func (this InstancesByCountSlaveHosts) Less(i, j int) bool { - return len(this[i].SlaveHosts) < len(this[j].SlaveHosts) +func (this InstancesByCountSubordinateHosts) Len() int { return len(this) } +func (this InstancesByCountSubordinateHosts) Swap(i, j int) { this[i], this[j] = this[j], this[i] } +func (this InstancesByCountSubordinateHosts) Less(i, j int) bool { + return len(this[i].SubordinateHosts) < len(this[j].SubordinateHosts) } // instanceKeyInformativeClusterName is a non-authoritative cache; used for auditing or general purpose. @@ -115,13 +115,13 @@ func ReadTopologyInstance(instanceKey *InstanceKey, bufferWrites bool) (*Instanc instance := NewInstance() instanceFound := false - foundByShowSlaveHosts := false + foundByShowSubordinateHosts := false longRunningProcesses := []Process{} resolvedHostname := "" - maxScaleMasterHostname := "" + maxScaleMainHostname := "" isMaxScale := false isMaxScale110 := false - slaveStatusFound := false + subordinateStatusFound := false var resolveErr error if !instanceKey.IsValid() { @@ -155,7 +155,7 @@ func ReadTopologyInstance(instanceKey *InstanceKey, bufferWrites bool) (*Instanc instance.Binlog_format = "INHERIT" instance.ReadOnly = true instance.LogBinEnabled = true - instance.LogSlaveUpdatesEnabled = true + instance.LogSubordinateUpdatesEnabled = true resolvedHostname = instance.Key.Hostname UpdateResolvedHostname(resolvedHostname, resolvedHostname) isMaxScale = true @@ -177,9 +177,9 @@ func ReadTopologyInstance(instanceKey *InstanceKey, bufferWrites bool) (*Instanc isMaxScale110 = true } if isMaxScale110 { - // Buggy buggy maxscale 1.1.0. Reported Master_Host can be corrupted. - // Therefore we (currently) take @@hostname (which is masquarading as master host anyhow) - err = db.QueryRow("select @@hostname").Scan(&maxScaleMasterHostname) + // Buggy buggy maxscale 1.1.0. Reported Main_Host can be corrupted. + // Therefore we (currently) take @@hostname (which is masquarading as main host anyhow) + err = db.QueryRow("select @@hostname").Scan(&maxScaleMainHostname) if err != nil { goto Cleanup } @@ -196,8 +196,8 @@ func ReadTopologyInstance(instanceKey *InstanceKey, bufferWrites bool) (*Instanc if !isMaxScale { var mysqlHostname, mysqlReportHost string - err = db.QueryRow("select @@global.hostname, ifnull(@@global.report_host, ''), @@global.server_id, @@global.version, @@global.read_only, @@global.binlog_format, @@global.log_bin, @@global.log_slave_updates").Scan( - &mysqlHostname, &mysqlReportHost, &instance.ServerID, &instance.Version, &instance.ReadOnly, &instance.Binlog_format, &instance.LogBinEnabled, &instance.LogSlaveUpdatesEnabled) + err = db.QueryRow("select @@global.hostname, ifnull(@@global.report_host, ''), @@global.server_id, @@global.version, @@global.read_only, @@global.binlog_format, @@global.log_bin, @@global.log_subordinate_updates").Scan( + &mysqlHostname, &mysqlReportHost, &instance.ServerID, &instance.Version, &instance.ReadOnly, &instance.Binlog_format, &instance.LogBinEnabled, &instance.LogSubordinateUpdatesEnabled) if err != nil { goto Cleanup } @@ -217,14 +217,14 @@ func ReadTopologyInstance(instanceKey *InstanceKey, bufferWrites bool) (*Instanc } if instance.IsOracleMySQL() && !instance.IsSmallerMajorVersionByString("5.6") { - var masterInfoRepositoryOnTable bool + var mainInfoRepositoryOnTable bool // Stuff only supported on Oracle MySQL >= 5.6 // ... // @@gtid_mode only available in Orcale MySQL >= 5.6 // Previous version just issued this query brute-force, but I don't like errors being issued where they shouldn't. - _ = db.QueryRow("select @@global.gtid_mode = 'ON', @@global.server_uuid, @@global.gtid_purged, @@global.master_info_repository = 'TABLE'").Scan(&instance.SupportsOracleGTID, &instance.ServerUUID, &instance.GtidPurged, &masterInfoRepositoryOnTable) - if masterInfoRepositoryOnTable { - _ = db.QueryRow("select count(*) > 0 and MAX(User_name) != '' from mysql.slave_master_info").Scan(&instance.ReplicationCredentialsAvailable) + _ = db.QueryRow("select @@global.gtid_mode = 'ON', @@global.server_uuid, @@global.gtid_purged, @@global.main_info_repository = 'TABLE'").Scan(&instance.SupportsOracleGTID, &instance.ServerUUID, &instance.GtidPurged, &mainInfoRepositoryOnTable) + if mainInfoRepositoryOnTable { + _ = db.QueryRow("select count(*) > 0 and MAX(User_name) != '' from mysql.subordinate_main_info").Scan(&instance.ReplicationCredentialsAvailable) } } } @@ -271,18 +271,18 @@ func ReadTopologyInstance(instanceKey *InstanceKey, bufferWrites bool) (*Instanc // This can be overriden by later invocation of DetectPhysicalEnvironmentQuery } - err = sqlutils.QueryRowsMap(db, "show slave status", func(m sqlutils.RowMap) error { - instance.HasReplicationCredentials = (m.GetString("Master_User") != "") - instance.Slave_IO_Running = (m.GetString("Slave_IO_Running") == "Yes") + err = sqlutils.QueryRowsMap(db, "show subordinate status", func(m sqlutils.RowMap) error { + instance.HasReplicationCredentials = (m.GetString("Main_User") != "") + instance.Subordinate_IO_Running = (m.GetString("Subordinate_IO_Running") == "Yes") if isMaxScale110 { // Covering buggy MaxScale 1.1.0 - instance.Slave_IO_Running = instance.Slave_IO_Running && (m.GetString("Slave_IO_State") == "Binlog Dump") + instance.Subordinate_IO_Running = instance.Subordinate_IO_Running && (m.GetString("Subordinate_IO_State") == "Binlog Dump") } - instance.Slave_SQL_Running = (m.GetString("Slave_SQL_Running") == "Yes") - instance.ReadBinlogCoordinates.LogFile = m.GetString("Master_Log_File") - instance.ReadBinlogCoordinates.LogPos = m.GetInt64("Read_Master_Log_Pos") - instance.ExecBinlogCoordinates.LogFile = m.GetString("Relay_Master_Log_File") - instance.ExecBinlogCoordinates.LogPos = m.GetInt64("Exec_Master_Log_Pos") + instance.Subordinate_SQL_Running = (m.GetString("Subordinate_SQL_Running") == "Yes") + instance.ReadBinlogCoordinates.LogFile = m.GetString("Main_Log_File") + instance.ReadBinlogCoordinates.LogPos = m.GetInt64("Read_Main_Log_Pos") + instance.ExecBinlogCoordinates.LogFile = m.GetString("Relay_Main_Log_File") + instance.ExecBinlogCoordinates.LogPos = m.GetInt64("Exec_Main_Log_Pos") instance.IsDetached, _, _ = instance.ExecBinlogCoordinates.DetachedCoordinates() instance.RelaylogCoordinates.LogFile = m.GetString("Relay_Log_File") instance.RelaylogCoordinates.LogPos = m.GetInt64("Relay_Log_Pos") @@ -295,45 +295,45 @@ func ReadTopologyInstance(instanceKey *InstanceKey, bufferWrites bool) (*Instanc instance.UsingMariaDBGTID = (m.GetStringD("Using_Gtid", "No") != "No") instance.HasReplicationFilters = ((m.GetStringD("Replicate_Do_DB", "") != "") || (m.GetStringD("Replicate_Ignore_DB", "") != "") || (m.GetStringD("Replicate_Do_Table", "") != "") || (m.GetStringD("Replicate_Ignore_Table", "") != "") || (m.GetStringD("Replicate_Wild_Do_Table", "") != "") || (m.GetStringD("Replicate_Wild_Ignore_Table", "") != "")) - masterHostname := m.GetString("Master_Host") + mainHostname := m.GetString("Main_Host") if isMaxScale110 { - // Buggy buggy maxscale 1.1.0. Reported Master_Host can be corrupted. - // Therefore we (currently) take @@hostname (which is masquarading as master host anyhow) - masterHostname = maxScaleMasterHostname + // Buggy buggy maxscale 1.1.0. Reported Main_Host can be corrupted. + // Therefore we (currently) take @@hostname (which is masquarading as main host anyhow) + mainHostname = maxScaleMainHostname } - masterKey, err := NewInstanceKeyFromStrings(masterHostname, m.GetString("Master_Port")) + mainKey, err := NewInstanceKeyFromStrings(mainHostname, m.GetString("Main_Port")) if err != nil { logReadTopologyInstanceError(instanceKey, "NewInstanceKeyFromStrings", err) } - masterKey.Hostname, resolveErr = ResolveHostname(masterKey.Hostname) + mainKey.Hostname, resolveErr = ResolveHostname(mainKey.Hostname) if resolveErr != nil { - logReadTopologyInstanceError(instanceKey, fmt.Sprintf("ResolveHostname(%q)", masterKey.Hostname), resolveErr) + logReadTopologyInstanceError(instanceKey, fmt.Sprintf("ResolveHostname(%q)", mainKey.Hostname), resolveErr) } - instance.MasterKey = *masterKey - instance.IsDetachedMaster = instance.MasterKey.IsDetached() - instance.SecondsBehindMaster = m.GetNullInt64("Seconds_Behind_Master") - if instance.SecondsBehindMaster.Valid && instance.SecondsBehindMaster.Int64 < 0 { - log.Warningf("Host: %+v, instance.SecondsBehindMaster < 0 [%+v], correcting to 0", instanceKey, instance.SecondsBehindMaster.Int64) - instance.SecondsBehindMaster.Int64 = 0 + instance.MainKey = *mainKey + instance.IsDetachedMain = instance.MainKey.IsDetached() + instance.SecondsBehindMain = m.GetNullInt64("Seconds_Behind_Main") + if instance.SecondsBehindMain.Valid && instance.SecondsBehindMain.Int64 < 0 { + log.Warningf("Host: %+v, instance.SecondsBehindMain < 0 [%+v], correcting to 0", instanceKey, instance.SecondsBehindMain.Int64) + instance.SecondsBehindMain.Int64 = 0 } // And until told otherwise: - instance.SlaveLagSeconds = instance.SecondsBehindMaster + instance.SubordinateLagSeconds = instance.SecondsBehindMain - instance.AllowTLS = (m.GetString("Master_SSL_Allowed") == "Yes") + instance.AllowTLS = (m.GetString("Main_SSL_Allowed") == "Yes") // Not breaking the flow even on error - slaveStatusFound = true + subordinateStatusFound = true return nil }) if err != nil { goto Cleanup } - if isMaxScale && !slaveStatusFound { + if isMaxScale && !subordinateStatusFound { err = fmt.Errorf("No 'SHOW SLAVE STATUS' output found for a MaxScale instance: %+v", instanceKey) goto Cleanup } if instance.LogBinEnabled { - err = sqlutils.QueryRowsMap(db, "show master status", func(m sqlutils.RowMap) error { + err = sqlutils.QueryRowsMap(db, "show main status", func(m sqlutils.RowMap) error { var err error instance.SelfBinlogCoordinates.LogFile = m.GetString("File") instance.SelfBinlogCoordinates.LogPos = m.GetInt64("Position") @@ -351,10 +351,10 @@ func ReadTopologyInstance(instanceKey *InstanceKey, bufferWrites bool) (*Instanc // No `goto Cleanup` after this point. // ------------------------------------------------------------------------- - // Get slaves, either by SHOW SLAVE HOSTS or via PROCESSLIST + // Get subordinates, either by SHOW SLAVE HOSTS or via PROCESSLIST // MaxScale does not support PROCESSLIST, so SHOW SLAVE HOSTS is the only option - if config.Config.DiscoverByShowSlaveHosts || isMaxScale { - err := sqlutils.QueryRowsMap(db, `show slave hosts`, + if config.Config.DiscoverByShowSubordinateHosts || isMaxScale { + err := sqlutils.QueryRowsMap(db, `show subordinate hosts`, func(m sqlutils.RowMap) error { // MaxScale 1.1 may trigger an error with this command, but // also we may see issues if anything on the MySQL server locks up. @@ -368,25 +368,25 @@ func ReadTopologyInstance(instanceKey *InstanceKey, bufferWrites bool) (*Instanc return nil } // otherwise report the error to the caller - return fmt.Errorf("ReadTopologyInstance(%+v) 'show slave hosts' returned row with : <%v,%v>", instanceKey, host, port) + return fmt.Errorf("ReadTopologyInstance(%+v) 'show subordinate hosts' returned row with : <%v,%v>", instanceKey, host, port) } // Note: NewInstanceKeyFromStrings calls ResolveHostname() implicitly - slaveKey, err := NewInstanceKeyFromStrings(host, port) - if err == nil && slaveKey.IsValid() { - instance.AddSlaveKey(slaveKey) - foundByShowSlaveHosts = true + subordinateKey, err := NewInstanceKeyFromStrings(host, port) + if err == nil && subordinateKey.IsValid() { + instance.AddSubordinateKey(subordinateKey) + foundByShowSubordinateHosts = true } return err }) - logReadTopologyInstanceError(instanceKey, "show slave hosts", err) + logReadTopologyInstanceError(instanceKey, "show subordinate hosts", err) } - if !foundByShowSlaveHosts && !isMaxScale { + if !foundByShowSubordinateHosts && !isMaxScale { // Either not configured to read SHOW SLAVE HOSTS or nothing was there. // Discover by processlist err := sqlutils.QueryRowsMap(db, ` select - substring_index(host, ':', 1) as slave_hostname + substring_index(host, ':', 1) as subordinate_hostname from information_schema.processlist where @@ -394,12 +394,12 @@ func ReadTopologyInstance(instanceKey *InstanceKey, bufferWrites bool) (*Instanc or command='Binlog Dump GTID' `, func(m sqlutils.RowMap) error { - cname, resolveErr := ResolveHostname(m.GetString("slave_hostname")) + cname, resolveErr := ResolveHostname(m.GetString("subordinate_hostname")) if resolveErr != nil { logReadTopologyInstanceError(instanceKey, "ResolveHostname: processlist", resolveErr) } - slaveKey := InstanceKey{Hostname: cname, Port: instance.Key.Port} - instance.AddSlaveKey(&slaveKey) + subordinateKey := InstanceKey{Hostname: cname, Port: instance.Key.Port} + instance.AddSubordinateKey(&subordinateKey) return err }) @@ -466,15 +466,15 @@ func ReadTopologyInstance(instanceKey *InstanceKey, bufferWrites bool) (*Instanc } } - if config.Config.SlaveLagQuery != "" && !isMaxScale { - if err := db.QueryRow(config.Config.SlaveLagQuery).Scan(&instance.SlaveLagSeconds); err == nil { - if instance.SlaveLagSeconds.Valid && instance.SlaveLagSeconds.Int64 < 0 { - log.Warningf("Host: %+v, instance.SlaveLagSeconds < 0 [%+v], correcting to 0", instanceKey, instance.SlaveLagSeconds.Int64) - instance.SlaveLagSeconds.Int64 = 0 + if config.Config.SubordinateLagQuery != "" && !isMaxScale { + if err := db.QueryRow(config.Config.SubordinateLagQuery).Scan(&instance.SubordinateLagSeconds); err == nil { + if instance.SubordinateLagSeconds.Valid && instance.SubordinateLagSeconds.Int64 < 0 { + log.Warningf("Host: %+v, instance.SubordinateLagSeconds < 0 [%+v], correcting to 0", instanceKey, instance.SubordinateLagSeconds.Int64) + instance.SubordinateLagSeconds.Int64 = 0 } } else { - instance.SlaveLagSeconds = instance.SecondsBehindMaster - logReadTopologyInstanceError(instanceKey, "SlaveLagQuery", err) + instance.SubordinateLagSeconds = instance.SecondsBehindMain + logReadTopologyInstanceError(instanceKey, "SubordinateLagQuery", err) } } @@ -528,7 +528,7 @@ func ReadTopologyInstance(instanceKey *InstanceKey, bufferWrites bool) (*Instanc } if instance.ReplicationDepth == 0 && config.Config.DetectClusterAliasQuery != "" && !isMaxScale { - // Only need to do on masters + // Only need to do on mains clusterAlias := "" err := db.QueryRow(config.Config.DetectClusterAliasQuery).Scan(&clusterAlias) if err != nil { @@ -538,7 +538,7 @@ func ReadTopologyInstance(instanceKey *InstanceKey, bufferWrites bool) (*Instanc instance.SuggestedClusterAlias = clusterAlias } if instance.ReplicationDepth == 0 && config.Config.DetectClusterDomainQuery != "" && !isMaxScale { - // Only need to do on masters + // Only need to do on mains domainName := "" if err := db.QueryRow(config.Config.DetectClusterDomainQuery).Scan(&domainName); err != nil { domainName = "" @@ -574,7 +574,7 @@ Cleanup: return nil, fmt.Errorf("Failed ReadTopologyInstance") } -// ReadInstanceClusterAttributes will return the cluster name for a given instance by looking at its master +// ReadInstanceClusterAttributes will return the cluster name for a given instance by looking at its main // and getting it from there. // It is a non-recursive function and so-called-recursion is performed upon periodic reading of // instances. @@ -583,29 +583,29 @@ func ReadInstanceClusterAttributes(instance *Instance) (err error) { return nil } - var masterMasterKey InstanceKey - var masterClusterName string - var masterReplicationDepth uint - masterDataFound := false + var mainMainKey InstanceKey + var mainClusterName string + var mainReplicationDepth uint + mainDataFound := false - // Read the cluster_name of the _master_ of our instance, derive it from there. + // Read the cluster_name of the _main_ of our instance, derive it from there. query := ` select cluster_name, replication_depth, - master_host, - master_port + main_host, + main_port from database_instance where hostname=? and port=? ` - args := sqlutils.Args(instance.MasterKey.Hostname, instance.MasterKey.Port) + args := sqlutils.Args(instance.MainKey.Hostname, instance.MainKey.Port) err = db.QueryOrchestrator(query, args, func(m sqlutils.RowMap) error { - masterClusterName = m.GetString("cluster_name") - masterReplicationDepth = m.GetUint("replication_depth") - masterMasterKey.Hostname = m.GetString("master_host") - masterMasterKey.Port = m.GetInt("master_port") - masterDataFound = true + mainClusterName = m.GetString("cluster_name") + mainReplicationDepth = m.GetUint("replication_depth") + mainMainKey.Hostname = m.GetString("main_host") + mainMainKey.Port = m.GetInt("main_port") + mainDataFound = true return nil }) if err != nil { @@ -614,25 +614,25 @@ func ReadInstanceClusterAttributes(instance *Instance) (err error) { var replicationDepth uint = 0 var clusterName string - if masterDataFound { - replicationDepth = masterReplicationDepth + 1 - clusterName = masterClusterName + if mainDataFound { + replicationDepth = mainReplicationDepth + 1 + clusterName = mainClusterName } clusterNameByInstanceKey := instance.Key.StringCode() if clusterName == "" { - // Nothing from master; we set it to be named after the instance itself + // Nothing from main; we set it to be named after the instance itself clusterName = clusterNameByInstanceKey } - isCoMaster := false - if masterMasterKey.Equals(&instance.Key) { - // co-master calls for special case, in fear of the infinite loop - isCoMaster = true - clusterNameByCoMasterKey := instance.MasterKey.StringCode() - if clusterName != clusterNameByInstanceKey && clusterName != clusterNameByCoMasterKey { - // Can be caused by a co-master topology failover - log.Errorf("ReadInstanceClusterAttributes: in co-master topology %s is not in (%s, %s). Forcing it to become one of them", clusterName, clusterNameByInstanceKey, clusterNameByCoMasterKey) - clusterName = math.TernaryString(instance.Key.SmallerThan(&instance.MasterKey), clusterNameByInstanceKey, clusterNameByCoMasterKey) + isCoMain := false + if mainMainKey.Equals(&instance.Key) { + // co-main calls for special case, in fear of the infinite loop + isCoMain = true + clusterNameByCoMainKey := instance.MainKey.StringCode() + if clusterName != clusterNameByInstanceKey && clusterName != clusterNameByCoMainKey { + // Can be caused by a co-main topology failover + log.Errorf("ReadInstanceClusterAttributes: in co-main topology %s is not in (%s, %s). Forcing it to become one of them", clusterName, clusterNameByInstanceKey, clusterNameByCoMainKey) + clusterName = math.TernaryString(instance.Key.SmallerThan(&instance.MainKey), clusterNameByInstanceKey, clusterNameByCoMainKey) } if clusterName == clusterNameByInstanceKey { // circular replication. Avoid infinite ++ on replicationDepth @@ -641,7 +641,7 @@ func ReadInstanceClusterAttributes(instance *Instance) (err error) { } instance.ClusterName = clusterName instance.ReplicationDepth = replicationDepth - instance.IsCoMaster = isCoMaster + instance.IsCoMain = isCoMain return nil } @@ -681,7 +681,7 @@ func ReadInstancePromotionRule(instance *Instance) (err error) { } var promotionRule CandidatePromotionRule = NeutralPromoteRule - // Read the cluster_name of the _master_ of our instance, derive it from there. + // Read the cluster_name of the _main_ of our instance, derive it from there. query := ` select ifnull(nullif(promotion_rule, ''), 'neutral') as promotion_rule @@ -711,12 +711,12 @@ func readInstanceRow(m sqlutils.RowMap) *Instance { instance.ReadOnly = m.GetBool("read_only") instance.Binlog_format = m.GetString("binlog_format") instance.LogBinEnabled = m.GetBool("log_bin") - instance.LogSlaveUpdatesEnabled = m.GetBool("log_slave_updates") - instance.MasterKey.Hostname = m.GetString("master_host") - instance.MasterKey.Port = m.GetInt("master_port") - instance.IsDetachedMaster = instance.MasterKey.IsDetached() - instance.Slave_SQL_Running = m.GetBool("slave_sql_running") - instance.Slave_IO_Running = m.GetBool("slave_io_running") + instance.LogSubordinateUpdatesEnabled = m.GetBool("log_subordinate_updates") + instance.MainKey.Hostname = m.GetString("main_host") + instance.MainKey.Port = m.GetInt("main_port") + instance.IsDetachedMain = instance.MainKey.IsDetached() + instance.Subordinate_SQL_Running = m.GetBool("subordinate_sql_running") + instance.Subordinate_IO_Running = m.GetBool("subordinate_io_running") instance.HasReplicationFilters = m.GetBool("has_replication_filters") instance.SupportsOracleGTID = m.GetBool("supports_oracle_gtid") instance.UsingOracleGTID = m.GetBool("oracle_gtid") @@ -726,27 +726,27 @@ func readInstanceRow(m sqlutils.RowMap) *Instance { instance.UsingPseudoGTID = m.GetBool("pseudo_gtid") instance.SelfBinlogCoordinates.LogFile = m.GetString("binary_log_file") instance.SelfBinlogCoordinates.LogPos = m.GetInt64("binary_log_pos") - instance.ReadBinlogCoordinates.LogFile = m.GetString("master_log_file") - instance.ReadBinlogCoordinates.LogPos = m.GetInt64("read_master_log_pos") - instance.ExecBinlogCoordinates.LogFile = m.GetString("relay_master_log_file") - instance.ExecBinlogCoordinates.LogPos = m.GetInt64("exec_master_log_pos") + instance.ReadBinlogCoordinates.LogFile = m.GetString("main_log_file") + instance.ReadBinlogCoordinates.LogPos = m.GetInt64("read_main_log_pos") + instance.ExecBinlogCoordinates.LogFile = m.GetString("relay_main_log_file") + instance.ExecBinlogCoordinates.LogPos = m.GetInt64("exec_main_log_pos") instance.IsDetached, _, _ = instance.ExecBinlogCoordinates.DetachedCoordinates() instance.RelaylogCoordinates.LogFile = m.GetString("relay_log_file") instance.RelaylogCoordinates.LogPos = m.GetInt64("relay_log_pos") instance.RelaylogCoordinates.Type = RelayLog instance.LastSQLError = m.GetString("last_sql_error") instance.LastIOError = m.GetString("last_io_error") - instance.SecondsBehindMaster = m.GetNullInt64("seconds_behind_master") - instance.SlaveLagSeconds = m.GetNullInt64("slave_lag_seconds") + instance.SecondsBehindMain = m.GetNullInt64("seconds_behind_main") + instance.SubordinateLagSeconds = m.GetNullInt64("subordinate_lag_seconds") instance.SQLDelay = m.GetUint("sql_delay") - slaveHostsJSON := m.GetString("slave_hosts") + subordinateHostsJSON := m.GetString("subordinate_hosts") instance.ClusterName = m.GetString("cluster_name") instance.SuggestedClusterAlias = m.GetString("suggested_cluster_alias") instance.DataCenter = m.GetString("data_center") instance.PhysicalEnvironment = m.GetString("physical_environment") instance.SemiSyncEnforced = m.GetBool("semi_sync_enforced") instance.ReplicationDepth = m.GetUint("replication_depth") - instance.IsCoMaster = m.GetBool("is_co_master") + instance.IsCoMain = m.GetBool("is_co_main") instance.ReplicationCredentialsAvailable = m.GetBool("replication_credentials_available") instance.HasReplicationCredentials = m.GetBool("has_replication_credentials") instance.IsUpToDate = (m.GetUint("seconds_since_last_checked") <= config.Config.InstancePollSeconds) @@ -764,7 +764,7 @@ func readInstanceRow(m sqlutils.RowMap) *Instance { instance.AllowTLS = m.GetBool("allow_tls") instance.InstanceAlias = m.GetString("instance_alias") - instance.SlaveHosts.ReadJson(slaveHostsJSON) + instance.SubordinateHosts.ReadJson(subordinateHostsJSON) return instance } @@ -857,32 +857,32 @@ func ReadClusterInstances(clusterName string) ([](*Instance), error) { return readInstancesByCondition(condition, sqlutils.Args(clusterName), "") } -// ReadClusterWriteableMaster returns the/a writeable master of this cluster -// Typically, the cluster name indicates the master of the cluster. However, in circular -// master-master replication one master can assume the name of the cluster, and it is +// ReadClusterWriteableMain returns the/a writeable main of this cluster +// Typically, the cluster name indicates the main of the cluster. However, in circular +// main-main replication one main can assume the name of the cluster, and it is // not guaranteed that it is the writeable one. -func ReadClusterWriteableMaster(clusterName string) ([](*Instance), error) { +func ReadClusterWriteableMain(clusterName string) ([](*Instance), error) { condition := ` cluster_name = ? and read_only = 0 - and (replication_depth = 0 or is_co_master) + and (replication_depth = 0 or is_co_main) ` return readInstancesByCondition(condition, sqlutils.Args(clusterName), "replication_depth asc") } -// ReadWriteableClustersMasters returns writeable masters of all clusters, but only one -// per cluster, in similar logic to ReadClusterWriteableMaster -func ReadWriteableClustersMasters() (instances [](*Instance), err error) { +// ReadWriteableClustersMains returns writeable mains of all clusters, but only one +// per cluster, in similar logic to ReadClusterWriteableMain +func ReadWriteableClustersMains() (instances [](*Instance), err error) { condition := ` read_only = 0 - and (replication_depth = 0 or is_co_master) + and (replication_depth = 0 or is_co_main) ` - allMasters, err := readInstancesByCondition(condition, sqlutils.Args(), "cluster_name asc, replication_depth asc") + allMains, err := readInstancesByCondition(condition, sqlutils.Args(), "cluster_name asc, replication_depth asc") if err != nil { return instances, err } visitedClusters := make(map[string]bool) - for _, instance := range allMasters { + for _, instance := range allMains { if !visitedClusters[instance.ClusterName] { visitedClusters[instance.ClusterName] = true instances = append(instances, instance) @@ -891,43 +891,43 @@ func ReadWriteableClustersMasters() (instances [](*Instance), err error) { return instances, err } -// ReadSlaveInstances reads slaves of a given master -func ReadSlaveInstances(masterKey *InstanceKey) ([](*Instance), error) { +// ReadSubordinateInstances reads subordinates of a given main +func ReadSubordinateInstances(mainKey *InstanceKey) ([](*Instance), error) { condition := ` - master_host = ? - and master_port = ? + main_host = ? + and main_port = ? ` - return readInstancesByCondition(condition, sqlutils.Args(masterKey.Hostname, masterKey.Port), "") + return readInstancesByCondition(condition, sqlutils.Args(mainKey.Hostname, mainKey.Port), "") } -// ReadSlaveInstancesIncludingBinlogServerSubSlaves returns a list of direct slves including any slaves +// ReadSubordinateInstancesIncludingBinlogServerSubSubordinates returns a list of direct slves including any subordinates // of a binlog server replica -func ReadSlaveInstancesIncludingBinlogServerSubSlaves(masterKey *InstanceKey) ([](*Instance), error) { - slaves, err := ReadSlaveInstances(masterKey) +func ReadSubordinateInstancesIncludingBinlogServerSubSubordinates(mainKey *InstanceKey) ([](*Instance), error) { + subordinates, err := ReadSubordinateInstances(mainKey) if err != nil { - return slaves, err + return subordinates, err } - for _, slave := range slaves { - slave := slave - if slave.IsBinlogServer() { - binlogServerSlaves, err := ReadSlaveInstancesIncludingBinlogServerSubSlaves(&slave.Key) + for _, subordinate := range subordinates { + subordinate := subordinate + if subordinate.IsBinlogServer() { + binlogServerSubordinates, err := ReadSubordinateInstancesIncludingBinlogServerSubSubordinates(&subordinate.Key) if err != nil { - return slaves, err + return subordinates, err } - slaves = append(slaves, binlogServerSlaves...) + subordinates = append(subordinates, binlogServerSubordinates...) } } - return slaves, err + return subordinates, err } -// ReadBinlogServerSlaveInstances reads direct slaves of a given master that are binlog servers -func ReadBinlogServerSlaveInstances(masterKey *InstanceKey) ([](*Instance), error) { +// ReadBinlogServerSubordinateInstances reads direct subordinates of a given main that are binlog servers +func ReadBinlogServerSubordinateInstances(mainKey *InstanceKey) ([](*Instance), error) { condition := ` - master_host = ? - and master_port = ? + main_host = ? + and main_port = ? and binlog_server = 1 ` - return readInstancesByCondition(condition, sqlutils.Args(masterKey.Hostname, masterKey.Port), "") + return readInstancesByCondition(condition, sqlutils.Args(mainKey.Hostname, mainKey.Port), "") } // ReadUnseenInstances reads all instances which were not recently seen @@ -943,10 +943,10 @@ func ReadProblemInstances(clusterName string) ([](*Instance), error) { and ( (last_seen < last_checked) or (not ifnull(timestampdiff(second, last_checked, now()) <= ?, false)) - or (not slave_sql_running) - or (not slave_io_running) - or (abs(cast(seconds_behind_master as signed) - cast(sql_delay as signed)) > ?) - or (abs(cast(slave_lag_seconds as signed) - cast(sql_delay as signed)) > ?) + or (not subordinate_sql_running) + or (not subordinate_io_running) + or (abs(cast(seconds_behind_main as signed) - cast(sql_delay as signed)) > ?) + or (abs(cast(subordinate_lag_seconds as signed) - cast(sql_delay as signed)) > ?) ) ` @@ -985,13 +985,13 @@ func SearchInstances(searchString string) ([](*Instance), error) { or concat(port, '') = ? ` args := sqlutils.Args(searchString, searchString, searchString, searchString, searchString, searchString) - return readInstancesByCondition(condition, args, `replication_depth asc, num_slave_hosts desc, cluster_name, hostname, port`) + return readInstancesByCondition(condition, args, `replication_depth asc, num_subordinate_hosts desc, cluster_name, hostname, port`) } // FindInstances reads all instances whose name matches given pattern func FindInstances(regexpPattern string) ([](*Instance), error) { condition := `hostname rlike ?` - return readInstancesByCondition(condition, sqlutils.Args(regexpPattern), `replication_depth asc, num_slave_hosts desc, cluster_name, hostname, port`) + return readInstancesByCondition(condition, sqlutils.Args(regexpPattern), `replication_depth asc, num_subordinate_hosts desc, cluster_name, hostname, port`) } // FindFuzzyInstances return instances whose names are like the one given (host & port substrings) @@ -1001,7 +1001,7 @@ func FindFuzzyInstances(fuzzyInstanceKey *InstanceKey) ([](*Instance), error) { hostname like concat('%', ?, '%') and port = ? ` - return readInstancesByCondition(condition, sqlutils.Args(fuzzyInstanceKey.Hostname, fuzzyInstanceKey.Port), `replication_depth asc, num_slave_hosts desc, cluster_name, hostname, port`) + return readInstancesByCondition(condition, sqlutils.Args(fuzzyInstanceKey.Hostname, fuzzyInstanceKey.Port), `replication_depth asc, num_subordinate_hosts desc, cluster_name, hostname, port`) } // FindClusterNameByFuzzyInstanceKey attempts to find a uniquely identifyable cluster name @@ -1066,7 +1066,7 @@ func ReadFuzzyInstance(fuzzyInstanceKey *InstanceKey) (*Instance, error) { // ReadLostInRecoveryInstances returns all instances (potentially filtered by cluster) // which are currently indicated as downtimed due to being lost during a topology recovery. // Keep in mind: -// - instances are only marked as such when config's MasterFailoverLostInstancesDowntimeMinutes > 0 +// - instances are only marked as such when config's MainFailoverLostInstancesDowntimeMinutes > 0 // - The downtime expires at some point func ReadLostInRecoveryInstances(clusterName string) ([](*Instance), error) { condition := ` @@ -1092,7 +1092,7 @@ func ReadClusterCandidateInstances(clusterName string) ([](*Instance), error) { return readInstancesByCondition(condition, sqlutils.Args(clusterName), "") } -// filterOSCInstances will filter the given list such that only slaves fit for OSC control remain. +// filterOSCInstances will filter the given list such that only subordinates fit for OSC control remain. func filterOSCInstances(instances [](*Instance)) [](*Instance) { result := [](*Instance){} for _, instance := range instances { @@ -1116,10 +1116,10 @@ func filterOSCInstances(instances [](*Instance)) [](*Instance) { return result } -// GetClusterOSCSlaves returns a heuristic list of slaves which are fit as controll slaves for an OSC operation. -// These would be intermediate masters -func GetClusterOSCSlaves(clusterName string) ([](*Instance), error) { - intermediateMasters := [](*Instance){} +// GetClusterOSCSubordinates returns a heuristic list of subordinates which are fit as controll subordinates for an OSC operation. +// These would be intermediate mains +func GetClusterOSCSubordinates(clusterName string) ([](*Instance), error) { + intermediateMains := [](*Instance){} result := [](*Instance){} var err error if strings.Index(clusterName, "'") >= 0 { @@ -1129,91 +1129,91 @@ func GetClusterOSCSlaves(clusterName string) ([](*Instance), error) { // Pick up to two busiest IMs condition := ` replication_depth = 1 - and num_slave_hosts > 0 + and num_subordinate_hosts > 0 and cluster_name = ? ` - intermediateMasters, err = readInstancesByCondition(condition, sqlutils.Args(clusterName), "") + intermediateMains, err = readInstancesByCondition(condition, sqlutils.Args(clusterName), "") if err != nil { return result, err } - sort.Sort(sort.Reverse(InstancesByCountSlaveHosts(intermediateMasters))) - intermediateMasters = filterOSCInstances(intermediateMasters) - intermediateMasters = intermediateMasters[0:math.MinInt(2, len(intermediateMasters))] - result = append(result, intermediateMasters...) + sort.Sort(sort.Reverse(InstancesByCountSubordinateHosts(intermediateMains))) + intermediateMains = filterOSCInstances(intermediateMains) + intermediateMains = intermediateMains[0:math.MinInt(2, len(intermediateMains))] + result = append(result, intermediateMains...) } { - // Get 2 slaves of found IMs, if possible - if len(intermediateMasters) == 1 { - // Pick 2 slaves for this IM - slaves, err := ReadSlaveInstances(&(intermediateMasters[0].Key)) + // Get 2 subordinates of found IMs, if possible + if len(intermediateMains) == 1 { + // Pick 2 subordinates for this IM + subordinates, err := ReadSubordinateInstances(&(intermediateMains[0].Key)) if err != nil { return result, err } - sort.Sort(sort.Reverse(InstancesByCountSlaveHosts(slaves))) - slaves = filterOSCInstances(slaves) - slaves = slaves[0:math.MinInt(2, len(slaves))] - result = append(result, slaves...) + sort.Sort(sort.Reverse(InstancesByCountSubordinateHosts(subordinates))) + subordinates = filterOSCInstances(subordinates) + subordinates = subordinates[0:math.MinInt(2, len(subordinates))] + result = append(result, subordinates...) } - if len(intermediateMasters) == 2 { - // Pick one slave from each IM (should be possible) - for _, im := range intermediateMasters { - slaves, err := ReadSlaveInstances(&im.Key) + if len(intermediateMains) == 2 { + // Pick one subordinate from each IM (should be possible) + for _, im := range intermediateMains { + subordinates, err := ReadSubordinateInstances(&im.Key) if err != nil { return result, err } - sort.Sort(sort.Reverse(InstancesByCountSlaveHosts(slaves))) - slaves = filterOSCInstances(slaves) - if len(slaves) > 0 { - result = append(result, slaves[0]) + sort.Sort(sort.Reverse(InstancesByCountSubordinateHosts(subordinates))) + subordinates = filterOSCInstances(subordinates) + if len(subordinates) > 0 { + result = append(result, subordinates[0]) } } } } { - // Get 2 3rd tier slaves, if possible + // Get 2 3rd tier subordinates, if possible condition := ` replication_depth = 3 and cluster_name = ? ` - slaves, err := readInstancesByCondition(condition, sqlutils.Args(clusterName), "") + subordinates, err := readInstancesByCondition(condition, sqlutils.Args(clusterName), "") if err != nil { return result, err } - sort.Sort(sort.Reverse(InstancesByCountSlaveHosts(slaves))) - slaves = filterOSCInstances(slaves) - slaves = slaves[0:math.MinInt(2, len(slaves))] - result = append(result, slaves...) + sort.Sort(sort.Reverse(InstancesByCountSubordinateHosts(subordinates))) + subordinates = filterOSCInstances(subordinates) + subordinates = subordinates[0:math.MinInt(2, len(subordinates))] + result = append(result, subordinates...) } { - // Get 2 1st tier leaf slaves, if possible + // Get 2 1st tier leaf subordinates, if possible condition := ` replication_depth = 1 - and num_slave_hosts = 0 + and num_subordinate_hosts = 0 and cluster_name = ? ` - slaves, err := readInstancesByCondition(condition, sqlutils.Args(clusterName), "") + subordinates, err := readInstancesByCondition(condition, sqlutils.Args(clusterName), "") if err != nil { return result, err } - slaves = filterOSCInstances(slaves) - slaves = slaves[0:math.MinInt(2, len(slaves))] - result = append(result, slaves...) + subordinates = filterOSCInstances(subordinates) + subordinates = subordinates[0:math.MinInt(2, len(subordinates))] + result = append(result, subordinates...) } return result, nil } -// GetClusterGhostSlaves returns a list of replicas that can serve as the connected servers +// GetClusterGhostSubordinates returns a list of replicas that can serve as the connected servers // for a [gh-ost](https://github.com/github/gh-ost) operation. A gh-ost operation prefers to talk // to a RBR replica that has no children. -func GetClusterGhostSlaves(clusterName string) (result [](*Instance), err error) { +func GetClusterGhostSubordinates(clusterName string) (result [](*Instance), err error) { condition := ` replication_depth > 0 and binlog_format = 'ROW' and cluster_name = ? ` - instances, err := readInstancesByCondition(condition, sqlutils.Args(clusterName), "num_slave_hosts asc") + instances, err := readInstancesByCondition(condition, sqlutils.Args(clusterName), "num_subordinate_hosts asc") if err != nil { return result, err } @@ -1229,7 +1229,7 @@ func GetClusterGhostSlaves(clusterName string) (result [](*Instance), err error) if !instance.LogBinEnabled { skipThisHost = true } - if !instance.LogSlaveUpdatesEnabled { + if !instance.LogSubordinateUpdatesEnabled { skipThisHost = true } if !skipThisHost { @@ -1246,16 +1246,16 @@ func GetInstancesMaxLag(instances [](*Instance)) (maxLag int64, err error) { return 0, log.Errorf("No instances found in GetInstancesMaxLag") } for _, clusterInstance := range instances { - if clusterInstance.SlaveLagSeconds.Valid && clusterInstance.SlaveLagSeconds.Int64 > maxLag { - maxLag = clusterInstance.SlaveLagSeconds.Int64 + if clusterInstance.SubordinateLagSeconds.Valid && clusterInstance.SubordinateLagSeconds.Int64 > maxLag { + maxLag = clusterInstance.SubordinateLagSeconds.Int64 } } return maxLag, nil } -// GetClusterHeuristicLag returns a heuristic lag for a cluster, based on its OSC slaves +// GetClusterHeuristicLag returns a heuristic lag for a cluster, based on its OSC subordinates func GetClusterHeuristicLag(clusterName string) (int64, error) { - instances, err := GetClusterOSCSlaves(clusterName) + instances, err := GetClusterOSCSubordinates(clusterName) if err != nil { return 0, err } @@ -1339,12 +1339,12 @@ func ReviewUnseenInstances() error { for _, instance := range instances { instance := instance - masterHostname, err := ResolveHostname(instance.MasterKey.Hostname) + mainHostname, err := ResolveHostname(instance.MainKey.Hostname) if err != nil { log.Errore(err) continue } - instance.MasterKey.Hostname = masterHostname + instance.MainKey.Hostname = mainHostname savedClusterName := instance.ClusterName if err := ReadInstanceClusterAttributes(instance); err != nil { @@ -1359,30 +1359,30 @@ func ReviewUnseenInstances() error { return err } -// readUnseenMasterKeys will read list of masters that have never been seen, and yet whose slaves +// readUnseenMainKeys will read list of mains that have never been seen, and yet whose subordinates // seem to be replicating. -func readUnseenMasterKeys() ([]InstanceKey, error) { +func readUnseenMainKeys() ([]InstanceKey, error) { res := []InstanceKey{} err := db.QueryOrchestratorRowsMap(` SELECT DISTINCT - slave_instance.master_host, slave_instance.master_port + subordinate_instance.main_host, subordinate_instance.main_port FROM - database_instance slave_instance + database_instance subordinate_instance LEFT JOIN - hostname_resolve ON (slave_instance.master_host = hostname_resolve.hostname) + hostname_resolve ON (subordinate_instance.main_host = hostname_resolve.hostname) LEFT JOIN - database_instance master_instance ON ( - COALESCE(hostname_resolve.resolved_hostname, slave_instance.master_host) = master_instance.hostname - and slave_instance.master_port = master_instance.port) + database_instance main_instance ON ( + COALESCE(hostname_resolve.resolved_hostname, subordinate_instance.main_host) = main_instance.hostname + and subordinate_instance.main_port = main_instance.port) WHERE - master_instance.last_checked IS NULL - and slave_instance.master_host != '' - and slave_instance.master_host != '_' - and slave_instance.master_port > 0 - and slave_instance.slave_io_running = 1 + main_instance.last_checked IS NULL + and subordinate_instance.main_host != '' + and subordinate_instance.main_host != '_' + and subordinate_instance.main_port > 0 + and subordinate_instance.subordinate_io_running = 1 `, func(m sqlutils.RowMap) error { - instanceKey, _ := NewInstanceKeyFromStrings(m.GetString("master_host"), m.GetString("master_port")) + instanceKey, _ := NewInstanceKeyFromStrings(m.GetString("main_host"), m.GetString("main_port")) // we ignore the error. It can be expected that we are unable to resolve the hostname. // Maybe that's how we got here in the first place! res = append(res, *instanceKey) @@ -1396,28 +1396,28 @@ func readUnseenMasterKeys() ([]InstanceKey, error) { return res, nil } -// InjectUnseenMasters will review masters of instances that are known to be replicating, yet which are not listed -// in database_instance. Since their slaves are listed as replicating, we can assume that such masters actually do +// InjectUnseenMains will review mains of instances that are known to be replicating, yet which are not listed +// in database_instance. Since their subordinates are listed as replicating, we can assume that such mains actually do // exist: we shall therefore inject them with minimal details into the database_instance table. -func InjectUnseenMasters() error { +func InjectUnseenMains() error { - unseenMasterKeys, err := readUnseenMasterKeys() + unseenMainKeys, err := readUnseenMainKeys() if err != nil { return err } operations := 0 - for _, masterKey := range unseenMasterKeys { - masterKey := masterKey - clusterName := masterKey.StringCode() + for _, mainKey := range unseenMainKeys { + mainKey := mainKey + clusterName := mainKey.StringCode() // minimal details: - instance := Instance{Key: masterKey, Version: "Unknown", ClusterName: clusterName} + instance := Instance{Key: mainKey, Version: "Unknown", ClusterName: clusterName} if err := writeInstance(&instance, false, nil); err == nil { operations++ } } - AuditOperation("inject-unseen-masters", nil, fmt.Sprintf("Operations: %d", operations)) + AuditOperation("inject-unseen-mains", nil, fmt.Sprintf("Operations: %d", operations)) return err } @@ -1447,28 +1447,28 @@ func ForgetUnseenInstancesDifferentlyResolved() error { return err } -// readUnknownMasterHostnameResolves will figure out the resolved hostnames of master-hosts which cannot be found. +// readUnknownMainHostnameResolves will figure out the resolved hostnames of main-hosts which cannot be found. // It uses the hostname_resolve_history table to heuristically guess the correct hostname (based on "this was the // last time we saw this hostname and it resolves into THAT") -func readUnknownMasterHostnameResolves() (map[string]string, error) { +func readUnknownMainHostnameResolves() (map[string]string, error) { res := make(map[string]string) err := db.QueryOrchestratorRowsMap(` SELECT DISTINCT - slave_instance.master_host, hostname_resolve_history.resolved_hostname + subordinate_instance.main_host, hostname_resolve_history.resolved_hostname FROM - database_instance slave_instance - LEFT JOIN hostname_resolve ON (slave_instance.master_host = hostname_resolve.hostname) - LEFT JOIN database_instance master_instance ON ( - COALESCE(hostname_resolve.resolved_hostname, slave_instance.master_host) = master_instance.hostname - and slave_instance.master_port = master_instance.port - ) LEFT JOIN hostname_resolve_history ON (slave_instance.master_host = hostname_resolve_history.hostname) + database_instance subordinate_instance + LEFT JOIN hostname_resolve ON (subordinate_instance.main_host = hostname_resolve.hostname) + LEFT JOIN database_instance main_instance ON ( + COALESCE(hostname_resolve.resolved_hostname, subordinate_instance.main_host) = main_instance.hostname + and subordinate_instance.main_port = main_instance.port + ) LEFT JOIN hostname_resolve_history ON (subordinate_instance.main_host = hostname_resolve_history.hostname) WHERE - master_instance.last_checked IS NULL - and slave_instance.master_host != '' - and slave_instance.master_host != '_' - and slave_instance.master_port > 0 + main_instance.last_checked IS NULL + and subordinate_instance.main_host != '' + and subordinate_instance.main_host != '_' + and subordinate_instance.main_port > 0 `, func(m sqlutils.RowMap) error { - res[m.GetString("master_host")] = m.GetString("resolved_hostname") + res[m.GetString("main_host")] = m.GetString("resolved_hostname") return nil }) if err != nil { @@ -1478,12 +1478,12 @@ func readUnknownMasterHostnameResolves() (map[string]string, error) { return res, nil } -// ResolveUnknownMasterHostnameResolves fixes missing hostname resolves based on hostname_resolve_history -// The use case is slaves replicating from some unknown-hostname which cannot be otherwise found. This could +// ResolveUnknownMainHostnameResolves fixes missing hostname resolves based on hostname_resolve_history +// The use case is subordinates replicating from some unknown-hostname which cannot be otherwise found. This could // happen due to an expire unresolve together with clearing up of hostname cache. -func ResolveUnknownMasterHostnameResolves() error { +func ResolveUnknownMainHostnameResolves() error { - hostnameResolves, err := readUnknownMasterHostnameResolves() + hostnameResolves, err := readUnknownMainHostnameResolves() if err != nil { return err } @@ -1491,7 +1491,7 @@ func ResolveUnknownMasterHostnameResolves() error { UpdateResolvedHostname(hostname, resolvedHostname) } - AuditOperation("resolve-unknown-masters", nil, fmt.Sprintf("Num resolved hostnames: %d", len(hostnameResolves))) + AuditOperation("resolve-unknown-mains", nil, fmt.Sprintf("Num resolved hostnames: %d", len(hostnameResolves))) return err } @@ -1638,7 +1638,7 @@ func ReadClustersInfo(clusterName string) ([]ClusterInfo, error) { } // HeuristicallyApplyClusterDomainInstanceAttribute writes down the cluster-domain -// to master-hostname as a general attribute, by reading current topology and **trusting** it to be correct +// to main-hostname as a general attribute, by reading current topology and **trusting** it to be correct func HeuristicallyApplyClusterDomainInstanceAttribute(clusterName string) (instanceKey *InstanceKey, err error) { clusterInfo, err := ReadClusterInfo(clusterName) if err != nil { @@ -1649,14 +1649,14 @@ func HeuristicallyApplyClusterDomainInstanceAttribute(clusterName string) (insta return nil, fmt.Errorf("Cannot find domain name for cluster %+v", clusterName) } - masters, err := ReadClusterWriteableMaster(clusterName) + mains, err := ReadClusterWriteableMain(clusterName) if err != nil { return nil, err } - if len(masters) != 1 { - return nil, fmt.Errorf("Found %+v potential master for cluster %+v", len(masters), clusterName) + if len(mains) != 1 { + return nil, fmt.Errorf("Found %+v potential main for cluster %+v", len(mains), clusterName) } - instanceKey = &masters[0].Key + instanceKey = &mains[0].Key return instanceKey, attributes.SetGeneralAttribute(clusterInfo.ClusterDomain, instanceKey.StringCode()) } @@ -1785,13 +1785,13 @@ func mkInsertOdkuForInstances(instances []*Instance, instanceWasActuallyFound bo "read_only", "binlog_format", "log_bin", - "log_slave_updates", + "log_subordinate_updates", "binary_log_file", "binary_log_pos", - "master_host", - "master_port", - "slave_sql_running", - "slave_io_running", + "main_host", + "main_port", + "subordinate_sql_running", + "subordinate_io_running", "has_replication_filters", "supports_oracle_gtid", "oracle_gtid", @@ -1799,25 +1799,25 @@ func mkInsertOdkuForInstances(instances []*Instance, instanceWasActuallyFound bo "gtid_purged", "mariadb_gtid", "pseudo_gtid", - "master_log_file", - "read_master_log_pos", - "relay_master_log_file", - "exec_master_log_pos", + "main_log_file", + "read_main_log_pos", + "relay_main_log_file", + "exec_main_log_pos", "relay_log_file", "relay_log_pos", "last_sql_error", "last_io_error", - "seconds_behind_master", - "slave_lag_seconds", + "seconds_behind_main", + "subordinate_lag_seconds", "sql_delay", - "num_slave_hosts", - "slave_hosts", + "num_subordinate_hosts", + "subordinate_hosts", "cluster_name", "suggested_cluster_alias", "data_center", "physical_environment", "replication_depth", - "is_co_master", + "is_co_main", "replication_credentials_available", "has_replication_credentials", "allow_tls", @@ -1851,13 +1851,13 @@ func mkInsertOdkuForInstances(instances []*Instance, instanceWasActuallyFound bo args = append(args, instance.ReadOnly) args = append(args, instance.Binlog_format) args = append(args, instance.LogBinEnabled) - args = append(args, instance.LogSlaveUpdatesEnabled) + args = append(args, instance.LogSubordinateUpdatesEnabled) args = append(args, instance.SelfBinlogCoordinates.LogFile) args = append(args, instance.SelfBinlogCoordinates.LogPos) - args = append(args, instance.MasterKey.Hostname) - args = append(args, instance.MasterKey.Port) - args = append(args, instance.Slave_SQL_Running) - args = append(args, instance.Slave_IO_Running) + args = append(args, instance.MainKey.Hostname) + args = append(args, instance.MainKey.Port) + args = append(args, instance.Subordinate_SQL_Running) + args = append(args, instance.Subordinate_IO_Running) args = append(args, instance.HasReplicationFilters) args = append(args, instance.SupportsOracleGTID) args = append(args, instance.UsingOracleGTID) @@ -1873,17 +1873,17 @@ func mkInsertOdkuForInstances(instances []*Instance, instanceWasActuallyFound bo args = append(args, instance.RelaylogCoordinates.LogPos) args = append(args, instance.LastSQLError) args = append(args, instance.LastIOError) - args = append(args, instance.SecondsBehindMaster) - args = append(args, instance.SlaveLagSeconds) + args = append(args, instance.SecondsBehindMain) + args = append(args, instance.SubordinateLagSeconds) args = append(args, instance.SQLDelay) - args = append(args, len(instance.SlaveHosts)) - args = append(args, instance.SlaveHosts.ToJSONString()) + args = append(args, len(instance.SubordinateHosts)) + args = append(args, instance.SubordinateHosts.ToJSONString()) args = append(args, instance.ClusterName) args = append(args, instance.SuggestedClusterAlias) args = append(args, instance.DataCenter) args = append(args, instance.PhysicalEnvironment) args = append(args, instance.ReplicationDepth) - args = append(args, instance.IsCoMaster) + args = append(args, instance.IsCoMain) args = append(args, instance.ReplicationCredentialsAvailable) args = append(args, instance.HasReplicationCredentials) args = append(args, instance.AllowTLS) @@ -2083,10 +2083,10 @@ func SnapshotTopologies() error { _, err := db.ExecOrchestrator(` insert ignore into database_instance_topology_history (snapshot_unix_timestamp, - hostname, port, master_host, master_port, cluster_name, version) + hostname, port, main_host, main_port, cluster_name, version) select UNIX_TIMESTAMP(NOW()), - hostname, port, master_host, master_port, cluster_name, version + hostname, port, main_host, main_port, cluster_name, version from database_instance `, @@ -2120,8 +2120,8 @@ func ReadHistoryClusterInstances(clusterName string, historyTimestampPattern str instance.Key.Hostname = m.GetString("hostname") instance.Key.Port = m.GetInt("port") - instance.MasterKey.Hostname = m.GetString("master_host") - instance.MasterKey.Port = m.GetInt("master_port") + instance.MainKey.Hostname = m.GetString("main_host") + instance.MainKey.Port = m.GetInt("main_port") instance.ClusterName = m.GetString("cluster_name") instances = append(instances, instance) @@ -2133,7 +2133,7 @@ func ReadHistoryClusterInstances(clusterName string, historyTimestampPattern str return instances, err } -// RegisterCandidateInstance markes a given instance as suggested for successoring a master in the event of failover. +// RegisterCandidateInstance markes a given instance as suggested for successoring a main in the event of failover. func RegisterCandidateInstance(instanceKey *InstanceKey, promotionRule CandidatePromotionRule) error { writeFunc := func() error { _, err := db.ExecOrchestrator(` @@ -2159,7 +2159,7 @@ func RegisterCandidateInstance(instanceKey *InstanceKey, promotionRule Candidate return ExecDBWriteFunc(writeFunc) } -// ExpireCandidateInstances removes stale master candidate suggestions. +// ExpireCandidateInstances removes stale main candidate suggestions. func ExpireCandidateInstances() error { writeFunc := func() error { _, err := db.ExecOrchestrator(` @@ -2277,8 +2277,8 @@ func RecordInstanceBinlogFileHistory() error { // UpdateInstanceRecentRelaylogHistory updates the database_instance_recent_relaylog_history // table listing the current relaylog coordinates and the one-before. -// This information can be used to diagnoze a stale-replication scenario (for example, master is locked down -// and although slaves are connected, they're not making progress) +// This information can be used to diagnoze a stale-replication scenario (for example, main is locked down +// and although subordinates are connected, they're not making progress) func UpdateInstanceRecentRelaylogHistory() error { writeFunc := func() error { _, err := db.ExecOrchestrator(` diff --git a/go/inst/instance_dao_test.go b/go/inst/instance_dao_test.go index 80385a8e..d5d5f299 100644 --- a/go/inst/instance_dao_test.go +++ b/go/inst/instance_dao_test.go @@ -35,11 +35,11 @@ func TestMkInsertOdku(t *testing.T) { // one instance s1 := `INSERT ignore INTO database_instance - (hostname, port, last_checked, last_attempted_check, uptime, server_id, server_uuid, version, binlog_server, read_only, binlog_format, log_bin, log_slave_updates, binary_log_file, binary_log_pos, master_host, master_port, slave_sql_running, slave_io_running, has_replication_filters, supports_oracle_gtid, oracle_gtid, executed_gtid_set, gtid_purged, mariadb_gtid, pseudo_gtid, master_log_file, read_master_log_pos, relay_master_log_file, exec_master_log_pos, relay_log_file, relay_log_pos, last_sql_error, last_io_error, seconds_behind_master, slave_lag_seconds, sql_delay, num_slave_hosts, slave_hosts, cluster_name, suggested_cluster_alias, data_center, physical_environment, replication_depth, is_co_master, replication_credentials_available, has_replication_credentials, allow_tls, semi_sync_enforced, instance_alias, last_seen) + (hostname, port, last_checked, last_attempted_check, uptime, server_id, server_uuid, version, binlog_server, read_only, binlog_format, log_bin, log_subordinate_updates, binary_log_file, binary_log_pos, main_host, main_port, subordinate_sql_running, subordinate_io_running, has_replication_filters, supports_oracle_gtid, oracle_gtid, executed_gtid_set, gtid_purged, mariadb_gtid, pseudo_gtid, main_log_file, read_main_log_pos, relay_main_log_file, exec_main_log_pos, relay_log_file, relay_log_pos, last_sql_error, last_io_error, seconds_behind_main, subordinate_lag_seconds, sql_delay, num_subordinate_hosts, subordinate_hosts, cluster_name, suggested_cluster_alias, data_center, physical_environment, replication_depth, is_co_main, replication_credentials_available, has_replication_credentials, allow_tls, semi_sync_enforced, instance_alias, last_seen) VALUES (?, ?, NOW(), NOW(), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, NOW()) ON DUPLICATE KEY UPDATE - hostname=VALUES(hostname), port=VALUES(port), last_checked=VALUES(last_checked), last_attempted_check=VALUES(last_attempted_check), uptime=VALUES(uptime), server_id=VALUES(server_id), server_uuid=VALUES(server_uuid), version=VALUES(version), binlog_server=VALUES(binlog_server), read_only=VALUES(read_only), binlog_format=VALUES(binlog_format), log_bin=VALUES(log_bin), log_slave_updates=VALUES(log_slave_updates), binary_log_file=VALUES(binary_log_file), binary_log_pos=VALUES(binary_log_pos), master_host=VALUES(master_host), master_port=VALUES(master_port), slave_sql_running=VALUES(slave_sql_running), slave_io_running=VALUES(slave_io_running), has_replication_filters=VALUES(has_replication_filters), supports_oracle_gtid=VALUES(supports_oracle_gtid), oracle_gtid=VALUES(oracle_gtid), executed_gtid_set=VALUES(executed_gtid_set), gtid_purged=VALUES(gtid_purged), mariadb_gtid=VALUES(mariadb_gtid), pseudo_gtid=VALUES(pseudo_gtid), master_log_file=VALUES(master_log_file), read_master_log_pos=VALUES(read_master_log_pos), relay_master_log_file=VALUES(relay_master_log_file), exec_master_log_pos=VALUES(exec_master_log_pos), relay_log_file=VALUES(relay_log_file), relay_log_pos=VALUES(relay_log_pos), last_sql_error=VALUES(last_sql_error), last_io_error=VALUES(last_io_error), seconds_behind_master=VALUES(seconds_behind_master), slave_lag_seconds=VALUES(slave_lag_seconds), sql_delay=VALUES(sql_delay), num_slave_hosts=VALUES(num_slave_hosts), slave_hosts=VALUES(slave_hosts), cluster_name=VALUES(cluster_name), suggested_cluster_alias=VALUES(suggested_cluster_alias), data_center=VALUES(data_center), physical_environment=VALUES(physical_environment), replication_depth=VALUES(replication_depth), is_co_master=VALUES(is_co_master), replication_credentials_available=VALUES(replication_credentials_available), has_replication_credentials=VALUES(has_replication_credentials), allow_tls=VALUES(allow_tls), semi_sync_enforced=VALUES(semi_sync_enforced), instance_alias=VALUES(instance_alias), last_seen=VALUES(last_seen) + hostname=VALUES(hostname), port=VALUES(port), last_checked=VALUES(last_checked), last_attempted_check=VALUES(last_attempted_check), uptime=VALUES(uptime), server_id=VALUES(server_id), server_uuid=VALUES(server_uuid), version=VALUES(version), binlog_server=VALUES(binlog_server), read_only=VALUES(read_only), binlog_format=VALUES(binlog_format), log_bin=VALUES(log_bin), log_subordinate_updates=VALUES(log_subordinate_updates), binary_log_file=VALUES(binary_log_file), binary_log_pos=VALUES(binary_log_pos), main_host=VALUES(main_host), main_port=VALUES(main_port), subordinate_sql_running=VALUES(subordinate_sql_running), subordinate_io_running=VALUES(subordinate_io_running), has_replication_filters=VALUES(has_replication_filters), supports_oracle_gtid=VALUES(supports_oracle_gtid), oracle_gtid=VALUES(oracle_gtid), executed_gtid_set=VALUES(executed_gtid_set), gtid_purged=VALUES(gtid_purged), mariadb_gtid=VALUES(mariadb_gtid), pseudo_gtid=VALUES(pseudo_gtid), main_log_file=VALUES(main_log_file), read_main_log_pos=VALUES(read_main_log_pos), relay_main_log_file=VALUES(relay_main_log_file), exec_main_log_pos=VALUES(exec_main_log_pos), relay_log_file=VALUES(relay_log_file), relay_log_pos=VALUES(relay_log_pos), last_sql_error=VALUES(last_sql_error), last_io_error=VALUES(last_io_error), seconds_behind_main=VALUES(seconds_behind_main), subordinate_lag_seconds=VALUES(subordinate_lag_seconds), sql_delay=VALUES(sql_delay), num_subordinate_hosts=VALUES(num_subordinate_hosts), subordinate_hosts=VALUES(subordinate_hosts), cluster_name=VALUES(cluster_name), suggested_cluster_alias=VALUES(suggested_cluster_alias), data_center=VALUES(data_center), physical_environment=VALUES(physical_environment), replication_depth=VALUES(replication_depth), is_co_main=VALUES(is_co_main), replication_credentials_available=VALUES(replication_credentials_available), has_replication_credentials=VALUES(has_replication_credentials), allow_tls=VALUES(allow_tls), semi_sync_enforced=VALUES(semi_sync_enforced), instance_alias=VALUES(instance_alias), last_seen=VALUES(last_seen) ` a1 := `i710, 3306, 0, 710, , 5.6.7, false, false, STATEMENT, false, false, , 0, , 0, false, false, false, false, false, , , false, false, , 0, mysql.000007, 10, , 0, , , {0 false}, {0 false}, 0, 0, [], , , , , 0, false, false, false, false, false, , ` @@ -50,13 +50,13 @@ func TestMkInsertOdku(t *testing.T) { // three instances s3 := `INSERT INTO database_instance - (hostname, port, last_checked, last_attempted_check, uptime, server_id, server_uuid, version, binlog_server, read_only, binlog_format, log_bin, log_slave_updates, binary_log_file, binary_log_pos, master_host, master_port, slave_sql_running, slave_io_running, has_replication_filters, supports_oracle_gtid, oracle_gtid, executed_gtid_set, gtid_purged, mariadb_gtid, pseudo_gtid, master_log_file, read_master_log_pos, relay_master_log_file, exec_master_log_pos, relay_log_file, relay_log_pos, last_sql_error, last_io_error, seconds_behind_master, slave_lag_seconds, sql_delay, num_slave_hosts, slave_hosts, cluster_name, suggested_cluster_alias, data_center, physical_environment, replication_depth, is_co_master, replication_credentials_available, has_replication_credentials, allow_tls, semi_sync_enforced, instance_alias, last_seen) + (hostname, port, last_checked, last_attempted_check, uptime, server_id, server_uuid, version, binlog_server, read_only, binlog_format, log_bin, log_subordinate_updates, binary_log_file, binary_log_pos, main_host, main_port, subordinate_sql_running, subordinate_io_running, has_replication_filters, supports_oracle_gtid, oracle_gtid, executed_gtid_set, gtid_purged, mariadb_gtid, pseudo_gtid, main_log_file, read_main_log_pos, relay_main_log_file, exec_main_log_pos, relay_log_file, relay_log_pos, last_sql_error, last_io_error, seconds_behind_main, subordinate_lag_seconds, sql_delay, num_subordinate_hosts, subordinate_hosts, cluster_name, suggested_cluster_alias, data_center, physical_environment, replication_depth, is_co_main, replication_credentials_available, has_replication_credentials, allow_tls, semi_sync_enforced, instance_alias, last_seen) VALUES (?, ?, NOW(), NOW(), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, NOW()), (?, ?, NOW(), NOW(), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, NOW()), (?, ?, NOW(), NOW(), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, NOW()) ON DUPLICATE KEY UPDATE - hostname=VALUES(hostname), port=VALUES(port), last_checked=VALUES(last_checked), last_attempted_check=VALUES(last_attempted_check), uptime=VALUES(uptime), server_id=VALUES(server_id), server_uuid=VALUES(server_uuid), version=VALUES(version), binlog_server=VALUES(binlog_server), read_only=VALUES(read_only), binlog_format=VALUES(binlog_format), log_bin=VALUES(log_bin), log_slave_updates=VALUES(log_slave_updates), binary_log_file=VALUES(binary_log_file), binary_log_pos=VALUES(binary_log_pos), master_host=VALUES(master_host), master_port=VALUES(master_port), slave_sql_running=VALUES(slave_sql_running), slave_io_running=VALUES(slave_io_running), has_replication_filters=VALUES(has_replication_filters), supports_oracle_gtid=VALUES(supports_oracle_gtid), oracle_gtid=VALUES(oracle_gtid), executed_gtid_set=VALUES(executed_gtid_set), gtid_purged=VALUES(gtid_purged), mariadb_gtid=VALUES(mariadb_gtid), pseudo_gtid=VALUES(pseudo_gtid), master_log_file=VALUES(master_log_file), read_master_log_pos=VALUES(read_master_log_pos), relay_master_log_file=VALUES(relay_master_log_file), exec_master_log_pos=VALUES(exec_master_log_pos), relay_log_file=VALUES(relay_log_file), relay_log_pos=VALUES(relay_log_pos), last_sql_error=VALUES(last_sql_error), last_io_error=VALUES(last_io_error), seconds_behind_master=VALUES(seconds_behind_master), slave_lag_seconds=VALUES(slave_lag_seconds), sql_delay=VALUES(sql_delay), num_slave_hosts=VALUES(num_slave_hosts), slave_hosts=VALUES(slave_hosts), cluster_name=VALUES(cluster_name), suggested_cluster_alias=VALUES(suggested_cluster_alias), data_center=VALUES(data_center), physical_environment=VALUES(physical_environment), replication_depth=VALUES(replication_depth), is_co_master=VALUES(is_co_master), replication_credentials_available=VALUES(replication_credentials_available), has_replication_credentials=VALUES(has_replication_credentials), allow_tls=VALUES(allow_tls), semi_sync_enforced=VALUES(semi_sync_enforced), instance_alias=VALUES(instance_alias), last_seen=VALUES(last_seen) + hostname=VALUES(hostname), port=VALUES(port), last_checked=VALUES(last_checked), last_attempted_check=VALUES(last_attempted_check), uptime=VALUES(uptime), server_id=VALUES(server_id), server_uuid=VALUES(server_uuid), version=VALUES(version), binlog_server=VALUES(binlog_server), read_only=VALUES(read_only), binlog_format=VALUES(binlog_format), log_bin=VALUES(log_bin), log_subordinate_updates=VALUES(log_subordinate_updates), binary_log_file=VALUES(binary_log_file), binary_log_pos=VALUES(binary_log_pos), main_host=VALUES(main_host), main_port=VALUES(main_port), subordinate_sql_running=VALUES(subordinate_sql_running), subordinate_io_running=VALUES(subordinate_io_running), has_replication_filters=VALUES(has_replication_filters), supports_oracle_gtid=VALUES(supports_oracle_gtid), oracle_gtid=VALUES(oracle_gtid), executed_gtid_set=VALUES(executed_gtid_set), gtid_purged=VALUES(gtid_purged), mariadb_gtid=VALUES(mariadb_gtid), pseudo_gtid=VALUES(pseudo_gtid), main_log_file=VALUES(main_log_file), read_main_log_pos=VALUES(read_main_log_pos), relay_main_log_file=VALUES(relay_main_log_file), exec_main_log_pos=VALUES(exec_main_log_pos), relay_log_file=VALUES(relay_log_file), relay_log_pos=VALUES(relay_log_pos), last_sql_error=VALUES(last_sql_error), last_io_error=VALUES(last_io_error), seconds_behind_main=VALUES(seconds_behind_main), subordinate_lag_seconds=VALUES(subordinate_lag_seconds), sql_delay=VALUES(sql_delay), num_subordinate_hosts=VALUES(num_subordinate_hosts), subordinate_hosts=VALUES(subordinate_hosts), cluster_name=VALUES(cluster_name), suggested_cluster_alias=VALUES(suggested_cluster_alias), data_center=VALUES(data_center), physical_environment=VALUES(physical_environment), replication_depth=VALUES(replication_depth), is_co_main=VALUES(is_co_main), replication_credentials_available=VALUES(replication_credentials_available), has_replication_credentials=VALUES(has_replication_credentials), allow_tls=VALUES(allow_tls), semi_sync_enforced=VALUES(semi_sync_enforced), instance_alias=VALUES(instance_alias), last_seen=VALUES(last_seen) ` a3 := `i710, 3306, 0, 710, , 5.6.7, false, false, STATEMENT, false, false, , 0, , 0, false, false, false, false, false, , , false, false, , 0, mysql.000007, 10, , 0, , , {0 false}, {0 false}, 0, 0, [], , , , , 0, false, false, false, false, false, , i720, 3306, 0, 720, , 5.6.7, false, false, STATEMENT, false, false, , 0, , 0, false, false, false, false, false, , , false, false, , 0, mysql.000007, 20, , 0, , , {0 false}, {0 false}, 0, 0, [], , , , , 0, false, false, false, false, false, , i730, 3306, 0, 730, , 5.6.7, false, false, STATEMENT, false, false, , 0, , 0, false, false, false, false, false, , , false, false, , 0, mysql.000007, 30, , 0, , , {0 false}, {0 false}, 0, 0, [], , , , , 0, false, false, false, false, false, , ` diff --git a/go/inst/instance_topology.go b/go/inst/instance_topology.go index 08880f76..2a421bd1 100644 --- a/go/inst/instance_topology.go +++ b/go/inst/instance_topology.go @@ -34,13 +34,13 @@ func getASCIITopologyEntry(depth int, instance *Instance, replicationMap map[*In if instance == nil { return []string{} } - if instance.IsCoMaster && depth > 1 { + if instance.IsCoMain && depth > 1 { return []string{} } prefix := "" if depth > 0 { prefix = strings.Repeat(" ", (depth-1)*2) - if instance.SlaveRunning() && instance.IsLastCheckValid && instance.IsRecentlyChecked { + if instance.SubordinateRunning() && instance.IsLastCheckValid && instance.IsRecentlyChecked { prefix += "+ " } else { prefix += "- " @@ -51,9 +51,9 @@ func getASCIITopologyEntry(depth int, instance *Instance, replicationMap map[*In entry = fmt.Sprintf("%s %s", entry, instance.HumanReadableDescription()) } result := []string{entry} - for _, slave := range replicationMap[instance] { - slavesResult := getASCIITopologyEntry(depth+1, slave, replicationMap, extendedOutput) - result = append(result, slavesResult...) + for _, subordinate := range replicationMap[instance] { + subordinatesResult := getASCIITopologyEntry(depth+1, subordinate, replicationMap, extendedOutput) + result = append(result, subordinatesResult...) } return result } @@ -77,28 +77,28 @@ func ASCIITopology(clusterName string, historyTimestampPattern string) (result s } replicationMap := make(map[*Instance]([]*Instance)) - var masterInstance *Instance - // Investigate slaves: + var mainInstance *Instance + // Investigate subordinates: for _, instance := range instances { - master, ok := instancesMap[instance.MasterKey] + main, ok := instancesMap[instance.MainKey] if ok { - if _, ok := replicationMap[master]; !ok { - replicationMap[master] = [](*Instance){} + if _, ok := replicationMap[main]; !ok { + replicationMap[main] = [](*Instance){} } - replicationMap[master] = append(replicationMap[master], instance) + replicationMap[main] = append(replicationMap[main], instance) } else { - masterInstance = instance + mainInstance = instance } } // Get entries: var entries []string - if masterInstance != nil { - // Single master - entries = getASCIITopologyEntry(0, masterInstance, replicationMap, historyTimestampPattern == "") + if mainInstance != nil { + // Single main + entries = getASCIITopologyEntry(0, mainInstance, replicationMap, historyTimestampPattern == "") } else { - // Co-masters? For visualization we put each in its own branch while ignoring its other co-masters. + // Co-mains? For visualization we put each in its own branch while ignoring its other co-mains. for _, instance := range instances { - if instance.IsCoMaster { + if instance.IsCoMain { entries = append(entries, getASCIITopologyEntry(1, instance, replicationMap, historyTimestampPattern == "")...) } } @@ -123,42 +123,42 @@ func ASCIITopology(clusterName string, historyTimestampPattern string) (result s return result, nil } -// GetInstanceMaster synchronously reaches into the replication topology -// and retrieves master's data -func GetInstanceMaster(instance *Instance) (*Instance, error) { - master, err := ReadTopologyInstanceUnbuffered(&instance.MasterKey) - return master, err +// GetInstanceMain synchronously reaches into the replication topology +// and retrieves main's data +func GetInstanceMain(instance *Instance) (*Instance, error) { + main, err := ReadTopologyInstanceUnbuffered(&instance.MainKey) + return main, err } -// InstancesAreSiblings checks whether both instances are replicating from same master +// InstancesAreSiblings checks whether both instances are replicating from same main func InstancesAreSiblings(instance0, instance1 *Instance) bool { - if !instance0.IsSlave() { + if !instance0.IsSubordinate() { return false } - if !instance1.IsSlave() { + if !instance1.IsSubordinate() { return false } if instance0.Key.Equals(&instance1.Key) { // same instance... return false } - return instance0.MasterKey.Equals(&instance1.MasterKey) + return instance0.MainKey.Equals(&instance1.MainKey) } -// InstanceIsMasterOf checks whether an instance is the master of another -func InstanceIsMasterOf(allegedMaster, allegedSlave *Instance) bool { - if !allegedSlave.IsSlave() { +// InstanceIsMainOf checks whether an instance is the main of another +func InstanceIsMainOf(allegedMain, allegedSubordinate *Instance) bool { + if !allegedSubordinate.IsSubordinate() { return false } - if allegedMaster.Key.Equals(&allegedSlave.Key) { + if allegedMain.Key.Equals(&allegedSubordinate.Key) { // same instance... return false } - return allegedMaster.Key.Equals(&allegedSlave.MasterKey) + return allegedMain.Key.Equals(&allegedSubordinate.MainKey) } // MoveEquivalent will attempt moving instance indicated by instanceKey below another instance, -// based on known master coordinates equivalence +// based on known main coordinates equivalence func MoveEquivalent(instanceKey, otherKey *InstanceKey) (*Instance, error) { instance, found, err := ReadInstance(instanceKey) if err != nil || !found { @@ -169,23 +169,23 @@ func MoveEquivalent(instanceKey, otherKey *InstanceKey) (*Instance, error) { } // Are there equivalent coordinates to this instance? - instanceCoordinates := &InstanceBinlogCoordinates{Key: instance.MasterKey, Coordinates: instance.ExecBinlogCoordinates} + instanceCoordinates := &InstanceBinlogCoordinates{Key: instance.MainKey, Coordinates: instance.ExecBinlogCoordinates} binlogCoordinates, err := GetEquivalentBinlogCoordinatesFor(instanceCoordinates, otherKey) if err != nil { return instance, err } if binlogCoordinates == nil { - return instance, fmt.Errorf("No equivalent coordinates found for %+v replicating from %+v at %+v", instance.Key, instance.MasterKey, instance.ExecBinlogCoordinates) + return instance, fmt.Errorf("No equivalent coordinates found for %+v replicating from %+v at %+v", instance.Key, instance.MainKey, instance.ExecBinlogCoordinates) } - // For performance reasons, we did all the above before even checking the slave is stopped or stopping it at all. + // For performance reasons, we did all the above before even checking the subordinate is stopped or stopping it at all. // This allows us to quickly skip the entire operation should there NOT be coordinates. - // To elaborate: if the slave is actually running AND making progress, it is unlikely/impossible for it to have + // To elaborate: if the subordinate is actually running AND making progress, it is unlikely/impossible for it to have // equivalent coordinates, as the current coordinates are like to have never been seen. - // This excludes the case, for example, that the master is itself not replicating. + // This excludes the case, for example, that the main is itself not replicating. // Now if we DO get to happen on equivalent coordinates, we need to double check. For CHANGE MASTER to happen we must - // stop the slave anyhow. But then let's verify the position hasn't changed. + // stop the subordinate anyhow. But then let's verify the position hasn't changed. knownExecBinlogCoordinates := instance.ExecBinlogCoordinates - instance, err = StopSlave(instanceKey) + instance, err = StopSubordinate(instanceKey) if err != nil { goto Cleanup } @@ -194,10 +194,10 @@ func MoveEquivalent(instanceKey, otherKey *InstanceKey) (*Instance, error) { err = fmt.Errorf("MoveEquivalent(): ExecBinlogCoordinates changed after stopping replication on %+v; aborting", instance.Key) goto Cleanup } - instance, err = ChangeMasterTo(instanceKey, otherKey, binlogCoordinates, false, GTIDHintNeutral) + instance, err = ChangeMainTo(instanceKey, otherKey, binlogCoordinates, false, GTIDHintNeutral) Cleanup: - instance, _ = StartSlave(instanceKey) + instance, _ = StartSubordinate(instanceKey) if err == nil { message := fmt.Sprintf("moved %+v via equivalence coordinates below %+v", *instanceKey, *otherKey) @@ -209,34 +209,34 @@ Cleanup: // MoveUp will attempt moving instance indicated by instanceKey up the topology hierarchy. // It will perform all safety and sanity checks and will tamper with this instance's replication -// as well as its master. +// as well as its main. func MoveUp(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, err } - if !instance.IsSlave() { - return instance, fmt.Errorf("instance is not a slave: %+v", instanceKey) + if !instance.IsSubordinate() { + return instance, fmt.Errorf("instance is not a subordinate: %+v", instanceKey) } rinstance, _, _ := ReadInstance(&instance.Key) if canMove, merr := rinstance.CanMove(); !canMove { return instance, merr } - master, err := GetInstanceMaster(instance) + main, err := GetInstanceMain(instance) if err != nil { - return instance, log.Errorf("Cannot GetInstanceMaster() for %+v. error=%+v", instance.Key, err) + return instance, log.Errorf("Cannot GetInstanceMain() for %+v. error=%+v", instance.Key, err) } - if !master.IsSlave() { - return instance, fmt.Errorf("master is not a slave itself: %+v", master.Key) + if !main.IsSubordinate() { + return instance, fmt.Errorf("main is not a subordinate itself: %+v", main.Key) } - if canReplicate, err := instance.CanReplicateFrom(master); canReplicate == false { + if canReplicate, err := instance.CanReplicateFrom(main); canReplicate == false { return instance, err } - if master.IsBinlogServer() { + if main.IsBinlogServer() { // Quick solution via binlog servers - return Repoint(instanceKey, &master.MasterKey, GTIDHintDeny) + return Repoint(instanceKey, &main.MainKey, GTIDHintDeny) } log.Infof("Will move %+v up the topology", *instanceKey) @@ -247,177 +247,177 @@ func MoveUp(instanceKey *InstanceKey) (*Instance, error) { } else { defer EndMaintenance(maintenanceToken) } - if maintenanceToken, merr := BeginMaintenance(&master.Key, GetMaintenanceOwner(), fmt.Sprintf("child %+v moves up", *instanceKey)); merr != nil { - err = fmt.Errorf("Cannot begin maintenance on %+v", master.Key) + if maintenanceToken, merr := BeginMaintenance(&main.Key, GetMaintenanceOwner(), fmt.Sprintf("child %+v moves up", *instanceKey)); merr != nil { + err = fmt.Errorf("Cannot begin maintenance on %+v", main.Key) goto Cleanup } else { defer EndMaintenance(maintenanceToken) } if !instance.UsingMariaDBGTID { - master, err = StopSlave(&master.Key) + main, err = StopSubordinate(&main.Key) if err != nil { goto Cleanup } } - instance, err = StopSlave(instanceKey) + instance, err = StopSubordinate(instanceKey) if err != nil { goto Cleanup } if !instance.UsingMariaDBGTID { - instance, err = StartSlaveUntilMasterCoordinates(instanceKey, &master.SelfBinlogCoordinates) + instance, err = StartSubordinateUntilMainCoordinates(instanceKey, &main.SelfBinlogCoordinates) if err != nil { goto Cleanup } } - // We can skip hostname unresolve; we just copy+paste whatever our master thinks of its master. - instance, err = ChangeMasterTo(instanceKey, &master.MasterKey, &master.ExecBinlogCoordinates, true, GTIDHintDeny) + // We can skip hostname unresolve; we just copy+paste whatever our main thinks of its main. + instance, err = ChangeMainTo(instanceKey, &main.MainKey, &main.ExecBinlogCoordinates, true, GTIDHintDeny) if err != nil { goto Cleanup } Cleanup: - instance, _ = StartSlave(instanceKey) + instance, _ = StartSubordinate(instanceKey) if !instance.UsingMariaDBGTID { - master, _ = StartSlave(&master.Key) + main, _ = StartSubordinate(&main.Key) } if err != nil { return instance, log.Errore(err) } // and we're done (pending deferred functions) - AuditOperation("move-up", instanceKey, fmt.Sprintf("moved up %+v. Previous master: %+v", *instanceKey, master.Key)) + AuditOperation("move-up", instanceKey, fmt.Sprintf("moved up %+v. Previous main: %+v", *instanceKey, main.Key)) return instance, err } -// MoveUpSlaves will attempt moving up all slaves of a given instance, at the same time. -// Clock-time, this is fater than moving one at a time. However this means all slaves of the given instance, and the instance itself, +// MoveUpSubordinates will attempt moving up all subordinates of a given instance, at the same time. +// Clock-time, this is fater than moving one at a time. However this means all subordinates of the given instance, and the instance itself, // will all stop replicating together. -func MoveUpSlaves(instanceKey *InstanceKey, pattern string) ([](*Instance), *Instance, error, []error) { +func MoveUpSubordinates(instanceKey *InstanceKey, pattern string) ([](*Instance), *Instance, error, []error) { res := [](*Instance){} errs := []error{} - slaveMutex := make(chan bool, 1) + subordinateMutex := make(chan bool, 1) var barrier chan *InstanceKey instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return res, nil, err, errs } - if !instance.IsSlave() { - return res, instance, fmt.Errorf("instance is not a slave: %+v", instanceKey), errs + if !instance.IsSubordinate() { + return res, instance, fmt.Errorf("instance is not a subordinate: %+v", instanceKey), errs } - _, err = GetInstanceMaster(instance) + _, err = GetInstanceMain(instance) if err != nil { - return res, instance, log.Errorf("Cannot GetInstanceMaster() for %+v. error=%+v", instance.Key, err), errs + return res, instance, log.Errorf("Cannot GetInstanceMain() for %+v. error=%+v", instance.Key, err), errs } if instance.IsBinlogServer() { - slaves, err, errors := RepointSlavesTo(instanceKey, pattern, &instance.MasterKey) + subordinates, err, errors := RepointSubordinatesTo(instanceKey, pattern, &instance.MainKey) // Bail out! - return slaves, instance, err, errors + return subordinates, instance, err, errors } - slaves, err := ReadSlaveInstances(instanceKey) + subordinates, err := ReadSubordinateInstances(instanceKey) if err != nil { return res, instance, err, errs } - slaves = filterInstancesByPattern(slaves, pattern) - if len(slaves) == 0 { + subordinates = filterInstancesByPattern(subordinates, pattern) + if len(subordinates) == 0 { return res, instance, nil, errs } - log.Infof("Will move slaves of %+v up the topology", *instanceKey) + log.Infof("Will move subordinates of %+v up the topology", *instanceKey) - if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "move up slaves"); merr != nil { + if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "move up subordinates"); merr != nil { err = fmt.Errorf("Cannot begin maintenance on %+v", *instanceKey) goto Cleanup } else { defer EndMaintenance(maintenanceToken) } - for _, slave := range slaves { - if maintenanceToken, merr := BeginMaintenance(&slave.Key, GetMaintenanceOwner(), fmt.Sprintf("%+v moves up", slave.Key)); merr != nil { - err = fmt.Errorf("Cannot begin maintenance on %+v", slave.Key) + for _, subordinate := range subordinates { + if maintenanceToken, merr := BeginMaintenance(&subordinate.Key, GetMaintenanceOwner(), fmt.Sprintf("%+v moves up", subordinate.Key)); merr != nil { + err = fmt.Errorf("Cannot begin maintenance on %+v", subordinate.Key) goto Cleanup } else { defer EndMaintenance(maintenanceToken) } } - instance, err = StopSlave(instanceKey) + instance, err = StopSubordinate(instanceKey) if err != nil { goto Cleanup } barrier = make(chan *InstanceKey) - for _, slave := range slaves { - slave := slave + for _, subordinate := range subordinates { + subordinate := subordinate go func() { defer func() { - defer func() { barrier <- &slave.Key }() - StartSlave(&slave.Key) + defer func() { barrier <- &subordinate.Key }() + StartSubordinate(&subordinate.Key) }() - var slaveErr error + var subordinateErr error ExecuteOnTopology(func() { - if canReplicate, err := slave.CanReplicateFrom(instance); canReplicate == false || err != nil { - slaveErr = err + if canReplicate, err := subordinate.CanReplicateFrom(instance); canReplicate == false || err != nil { + subordinateErr = err return } if instance.IsBinlogServer() { // Special case. Just repoint - slave, err = Repoint(&slave.Key, instanceKey, GTIDHintDeny) + subordinate, err = Repoint(&subordinate.Key, instanceKey, GTIDHintDeny) if err != nil { - slaveErr = err + subordinateErr = err return } } else { // Normal case. Do the math. - slave, err = StopSlave(&slave.Key) + subordinate, err = StopSubordinate(&subordinate.Key) if err != nil { - slaveErr = err + subordinateErr = err return } - slave, err = StartSlaveUntilMasterCoordinates(&slave.Key, &instance.SelfBinlogCoordinates) + subordinate, err = StartSubordinateUntilMainCoordinates(&subordinate.Key, &instance.SelfBinlogCoordinates) if err != nil { - slaveErr = err + subordinateErr = err return } - slave, err = ChangeMasterTo(&slave.Key, &instance.MasterKey, &instance.ExecBinlogCoordinates, false, GTIDHintDeny) + subordinate, err = ChangeMainTo(&subordinate.Key, &instance.MainKey, &instance.ExecBinlogCoordinates, false, GTIDHintDeny) if err != nil { - slaveErr = err + subordinateErr = err return } } }) func() { - slaveMutex <- true - defer func() { <-slaveMutex }() - if slaveErr == nil { - res = append(res, slave) + subordinateMutex <- true + defer func() { <-subordinateMutex }() + if subordinateErr == nil { + res = append(res, subordinate) } else { - errs = append(errs, slaveErr) + errs = append(errs, subordinateErr) } }() }() } - for range slaves { + for range subordinates { <-barrier } Cleanup: - instance, _ = StartSlave(instanceKey) + instance, _ = StartSubordinate(instanceKey) if err != nil { return res, instance, log.Errore(err), errs } - if len(errs) == len(slaves) { + if len(errs) == len(subordinates) { // All returned with error return res, instance, log.Error("Error on all operations"), errs } - AuditOperation("move-up-slaves", instanceKey, fmt.Sprintf("moved up %d/%d slaves of %+v. New master: %+v", len(res), len(slaves), *instanceKey, instance.MasterKey)) + AuditOperation("move-up-subordinates", instanceKey, fmt.Sprintf("moved up %d/%d subordinates of %+v. New main: %+v", len(res), len(subordinates), *instanceKey, instance.MainKey)) return res, instance, err, errs } @@ -436,7 +436,7 @@ func MoveBelow(instanceKey, siblingKey *InstanceKey) (*Instance, error) { } if sibling.IsBinlogServer() { - // Binlog server has same coordinates as master + // Binlog server has same coordinates as main // Easy solution! return Repoint(instanceKey, &sibling.Key, GTIDHintDeny) } @@ -472,36 +472,36 @@ func MoveBelow(instanceKey, siblingKey *InstanceKey) (*Instance, error) { defer EndMaintenance(maintenanceToken) } - instance, err = StopSlave(instanceKey) + instance, err = StopSubordinate(instanceKey) if err != nil { goto Cleanup } - sibling, err = StopSlave(siblingKey) + sibling, err = StopSubordinate(siblingKey) if err != nil { goto Cleanup } if instance.ExecBinlogCoordinates.SmallerThan(&sibling.ExecBinlogCoordinates) { - instance, err = StartSlaveUntilMasterCoordinates(instanceKey, &sibling.ExecBinlogCoordinates) + instance, err = StartSubordinateUntilMainCoordinates(instanceKey, &sibling.ExecBinlogCoordinates) if err != nil { goto Cleanup } } else if sibling.ExecBinlogCoordinates.SmallerThan(&instance.ExecBinlogCoordinates) { - sibling, err = StartSlaveUntilMasterCoordinates(siblingKey, &instance.ExecBinlogCoordinates) + sibling, err = StartSubordinateUntilMainCoordinates(siblingKey, &instance.ExecBinlogCoordinates) if err != nil { goto Cleanup } } // At this point both siblings have executed exact same statements and are identical - instance, err = ChangeMasterTo(instanceKey, &sibling.Key, &sibling.SelfBinlogCoordinates, false, GTIDHintDeny) + instance, err = ChangeMainTo(instanceKey, &sibling.Key, &sibling.SelfBinlogCoordinates, false, GTIDHintDeny) if err != nil { goto Cleanup } Cleanup: - instance, _ = StartSlave(instanceKey) - sibling, _ = StartSlave(siblingKey) + instance, _ = StartSubordinate(instanceKey) + sibling, _ = StartSubordinate(siblingKey) if err != nil { return instance, log.Errore(err) @@ -548,17 +548,17 @@ func moveInstanceBelowViaGTID(instance, otherInstance *Instance) (*Instance, err defer EndMaintenance(maintenanceToken) } - instance, err = StopSlave(instanceKey) + instance, err = StopSubordinate(instanceKey) if err != nil { goto Cleanup } - instance, err = ChangeMasterTo(instanceKey, &otherInstance.Key, &otherInstance.SelfBinlogCoordinates, false, GTIDHintForce) + instance, err = ChangeMainTo(instanceKey, &otherInstance.Key, &otherInstance.SelfBinlogCoordinates, false, GTIDHintForce) if err != nil { goto Cleanup } Cleanup: - instance, _ = StartSlave(instanceKey) + instance, _ = StartSubordinate(instanceKey) if err != nil { return instance, log.Errore(err) } @@ -581,125 +581,125 @@ func MoveBelowGTID(instanceKey, otherKey *InstanceKey) (*Instance, error) { return moveInstanceBelowViaGTID(instance, other) } -// moveSlavesViaGTID moves a list of slaves under another instance via GTID, returning those slaves +// moveSubordinatesViaGTID moves a list of subordinates under another instance via GTID, returning those subordinates // that could not be moved (do not use GTID) -func moveSlavesViaGTID(slaves [](*Instance), other *Instance) (movedSlaves [](*Instance), unmovedSlaves [](*Instance), err error, errs []error) { - slaves = RemoveInstance(slaves, &other.Key) - if len(slaves) == 0 { +func moveSubordinatesViaGTID(subordinates [](*Instance), other *Instance) (movedSubordinates [](*Instance), unmovedSubordinates [](*Instance), err error, errs []error) { + subordinates = RemoveInstance(subordinates, &other.Key) + if len(subordinates) == 0 { // Nothing to do - return movedSlaves, unmovedSlaves, nil, errs + return movedSubordinates, unmovedSubordinates, nil, errs } - log.Infof("Will move %+v slaves below %+v via GTID", len(slaves), other.Key) + log.Infof("Will move %+v subordinates below %+v via GTID", len(subordinates), other.Key) barrier := make(chan *InstanceKey) - slaveMutex := make(chan bool, 1) - for _, slave := range slaves { - slave := slave + subordinateMutex := make(chan bool, 1) + for _, subordinate := range subordinates { + subordinate := subordinate // Parallelize repoints go func() { - defer func() { barrier <- &slave.Key }() + defer func() { barrier <- &subordinate.Key }() ExecuteOnTopology(func() { - var slaveErr error - if _, _, canMove := canMoveViaGTID(slave, other); canMove { - slave, slaveErr = moveInstanceBelowViaGTID(slave, other) + var subordinateErr error + if _, _, canMove := canMoveViaGTID(subordinate, other); canMove { + subordinate, subordinateErr = moveInstanceBelowViaGTID(subordinate, other) } else { - slaveErr = fmt.Errorf("%+v cannot move below %+v via GTID", slave.Key, other.Key) + subordinateErr = fmt.Errorf("%+v cannot move below %+v via GTID", subordinate.Key, other.Key) } func() { // Instantaneous mutex. - slaveMutex <- true - defer func() { <-slaveMutex }() - if slaveErr == nil { - movedSlaves = append(movedSlaves, slave) + subordinateMutex <- true + defer func() { <-subordinateMutex }() + if subordinateErr == nil { + movedSubordinates = append(movedSubordinates, subordinate) } else { - unmovedSlaves = append(unmovedSlaves, slave) - errs = append(errs, slaveErr) + unmovedSubordinates = append(unmovedSubordinates, subordinate) + errs = append(errs, subordinateErr) } }() }) }() } - for range slaves { + for range subordinates { <-barrier } - if len(errs) == len(slaves) { + if len(errs) == len(subordinates) { // All returned with error - return movedSlaves, unmovedSlaves, fmt.Errorf("moveSlavesViaGTID: Error on all %+v operations", len(errs)), errs + return movedSubordinates, unmovedSubordinates, fmt.Errorf("moveSubordinatesViaGTID: Error on all %+v operations", len(errs)), errs } - AuditOperation("move-slaves-gtid", &other.Key, fmt.Sprintf("moved %d/%d slaves below %+v via GTID", len(movedSlaves), len(slaves), other.Key)) + AuditOperation("move-subordinates-gtid", &other.Key, fmt.Sprintf("moved %d/%d subordinates below %+v via GTID", len(movedSubordinates), len(subordinates), other.Key)) - return movedSlaves, unmovedSlaves, err, errs + return movedSubordinates, unmovedSubordinates, err, errs } -// MoveSlavesGTID will (attempt to) move all slaves of given master below given instance. -func MoveSlavesGTID(masterKey *InstanceKey, belowKey *InstanceKey, pattern string) (movedSlaves [](*Instance), unmovedSlaves [](*Instance), err error, errs []error) { +// MoveSubordinatesGTID will (attempt to) move all subordinates of given main below given instance. +func MoveSubordinatesGTID(mainKey *InstanceKey, belowKey *InstanceKey, pattern string) (movedSubordinates [](*Instance), unmovedSubordinates [](*Instance), err error, errs []error) { belowInstance, err := ReadTopologyInstanceUnbuffered(belowKey) if err != nil { - // Can't access "below" ==> can't move slaves beneath it - return movedSlaves, unmovedSlaves, err, errs + // Can't access "below" ==> can't move subordinates beneath it + return movedSubordinates, unmovedSubordinates, err, errs } - // slaves involved - slaves, err := ReadSlaveInstancesIncludingBinlogServerSubSlaves(masterKey) + // subordinates involved + subordinates, err := ReadSubordinateInstancesIncludingBinlogServerSubSubordinates(mainKey) if err != nil { - return movedSlaves, unmovedSlaves, err, errs + return movedSubordinates, unmovedSubordinates, err, errs } - slaves = filterInstancesByPattern(slaves, pattern) - movedSlaves, unmovedSlaves, err, errs = moveSlavesViaGTID(slaves, belowInstance) + subordinates = filterInstancesByPattern(subordinates, pattern) + movedSubordinates, unmovedSubordinates, err, errs = moveSubordinatesViaGTID(subordinates, belowInstance) if err != nil { log.Errore(err) } - if len(unmovedSlaves) > 0 { - err = fmt.Errorf("MoveSlavesGTID: only moved %d out of %d slaves of %+v; error is: %+v", len(movedSlaves), len(slaves), *masterKey, err) + if len(unmovedSubordinates) > 0 { + err = fmt.Errorf("MoveSubordinatesGTID: only moved %d out of %d subordinates of %+v; error is: %+v", len(movedSubordinates), len(subordinates), *mainKey, err) } - return movedSlaves, unmovedSlaves, err, errs + return movedSubordinates, unmovedSubordinates, err, errs } -// Repoint connects a slave to a master using its exact same executing coordinates. -// The given masterKey can be null, in which case the existing master is used. +// Repoint connects a subordinate to a main using its exact same executing coordinates. +// The given mainKey can be null, in which case the existing main is used. // Two use cases: -// - masterKey is nil: use case is corrupted relay logs on slave -// - masterKey is not nil: using Binlog servers (coordinates remain the same) -func Repoint(instanceKey *InstanceKey, masterKey *InstanceKey, gtidHint OperationGTIDHint) (*Instance, error) { +// - mainKey is nil: use case is corrupted relay logs on subordinate +// - mainKey is not nil: using Binlog servers (coordinates remain the same) +func Repoint(instanceKey *InstanceKey, mainKey *InstanceKey, gtidHint OperationGTIDHint) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, err } - if !instance.IsSlave() { - return instance, fmt.Errorf("instance is not a slave: %+v", *instanceKey) + if !instance.IsSubordinate() { + return instance, fmt.Errorf("instance is not a subordinate: %+v", *instanceKey) } - if masterKey == nil { - masterKey = &instance.MasterKey + if mainKey == nil { + mainKey = &instance.MainKey } - // With repoint we *prefer* the master to be alive, but we don't strictly require it. - // The use case for the master being alive is with hostname-resolve or hostname-unresolve: asking the slave - // to reconnect to its same master while changing the MASTER_HOST in CHANGE MASTER TO due to DNS changes etc. - master, err := ReadTopologyInstanceUnbuffered(masterKey) - masterIsAccessible := (err == nil) - if !masterIsAccessible { - master, _, err = ReadInstance(masterKey) + // With repoint we *prefer* the main to be alive, but we don't strictly require it. + // The use case for the main being alive is with hostname-resolve or hostname-unresolve: asking the subordinate + // to reconnect to its same main while changing the MASTER_HOST in CHANGE MASTER TO due to DNS changes etc. + main, err := ReadTopologyInstanceUnbuffered(mainKey) + mainIsAccessible := (err == nil) + if !mainIsAccessible { + main, _, err = ReadInstance(mainKey) if err != nil { return instance, err } } - if canReplicate, err := instance.CanReplicateFrom(master); !canReplicate { + if canReplicate, err := instance.CanReplicateFrom(main); !canReplicate { return instance, err } // if a binlog server check it is sufficiently up to date - if master.IsBinlogServer() { + if main.IsBinlogServer() { // "Repoint" operation trusts the user. But only so much. Repoiting to a binlog server which is not yet there is strictly wrong. - if !instance.ExecBinlogCoordinates.SmallerThanOrEquals(&master.SelfBinlogCoordinates) { - return instance, fmt.Errorf("repoint: binlog server %+v is not sufficiently up to date to repoint %+v below it", *masterKey, *instanceKey) + if !instance.ExecBinlogCoordinates.SmallerThanOrEquals(&main.SelfBinlogCoordinates) { + return instance, fmt.Errorf("repoint: binlog server %+v is not sufficiently up to date to repoint %+v below it", *mainKey, *instanceKey) } } - log.Infof("Will repoint %+v to master %+v", *instanceKey, *masterKey) + log.Infof("Will repoint %+v to main %+v", *instanceKey, *mainKey) if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "repoint"); merr != nil { err = fmt.Errorf("Cannot begin maintenance on %+v", *instanceKey) @@ -708,42 +708,42 @@ func Repoint(instanceKey *InstanceKey, masterKey *InstanceKey, gtidHint Operatio defer EndMaintenance(maintenanceToken) } - instance, err = StopSlave(instanceKey) + instance, err = StopSubordinate(instanceKey) if err != nil { goto Cleanup } - // See above, we are relaxed about the master being accessible/inaccessible. + // See above, we are relaxed about the main being accessible/inaccessible. // If accessible, we wish to do hostname-unresolve. If inaccessible, we can skip the test and not fail the - // ChangeMasterTo operation. This is why we pass "!masterIsAccessible" below. + // ChangeMainTo operation. This is why we pass "!mainIsAccessible" below. if instance.ExecBinlogCoordinates.IsEmpty() { instance.ExecBinlogCoordinates.LogFile = "orchestrator-unknown-log-file" } - instance, err = ChangeMasterTo(instanceKey, masterKey, &instance.ExecBinlogCoordinates, !masterIsAccessible, gtidHint) + instance, err = ChangeMainTo(instanceKey, mainKey, &instance.ExecBinlogCoordinates, !mainIsAccessible, gtidHint) if err != nil { goto Cleanup } Cleanup: - instance, _ = StartSlave(instanceKey) + instance, _ = StartSubordinate(instanceKey) if err != nil { return instance, log.Errore(err) } // and we're done (pending deferred functions) - AuditOperation("repoint", instanceKey, fmt.Sprintf("slave %+v repointed to master: %+v", *instanceKey, *masterKey)) + AuditOperation("repoint", instanceKey, fmt.Sprintf("subordinate %+v repointed to main: %+v", *instanceKey, *mainKey)) return instance, err } -// RepointTo repoints list of slaves onto another master. +// RepointTo repoints list of subordinates onto another main. // Binlog Server is the major use case -func RepointTo(slaves [](*Instance), belowKey *InstanceKey) ([](*Instance), error, []error) { +func RepointTo(subordinates [](*Instance), belowKey *InstanceKey) ([](*Instance), error, []error) { res := [](*Instance){} errs := []error{} - slaves = RemoveInstance(slaves, belowKey) - if len(slaves) == 0 { + subordinates = RemoveInstance(subordinates, belowKey) + if len(subordinates) == 0 { // Nothing to do return res, nil, errs } @@ -751,76 +751,76 @@ func RepointTo(slaves [](*Instance), belowKey *InstanceKey) ([](*Instance), erro return res, log.Errorf("RepointTo received nil belowKey"), errs } - log.Infof("Will repoint %+v slaves below %+v", len(slaves), *belowKey) + log.Infof("Will repoint %+v subordinates below %+v", len(subordinates), *belowKey) barrier := make(chan *InstanceKey) - slaveMutex := make(chan bool, 1) - for _, slave := range slaves { - slave := slave + subordinateMutex := make(chan bool, 1) + for _, subordinate := range subordinates { + subordinate := subordinate // Parallelize repoints go func() { - defer func() { barrier <- &slave.Key }() + defer func() { barrier <- &subordinate.Key }() ExecuteOnTopology(func() { - slave, slaveErr := Repoint(&slave.Key, belowKey, GTIDHintNeutral) + subordinate, subordinateErr := Repoint(&subordinate.Key, belowKey, GTIDHintNeutral) func() { // Instantaneous mutex. - slaveMutex <- true - defer func() { <-slaveMutex }() - if slaveErr == nil { - res = append(res, slave) + subordinateMutex <- true + defer func() { <-subordinateMutex }() + if subordinateErr == nil { + res = append(res, subordinate) } else { - errs = append(errs, slaveErr) + errs = append(errs, subordinateErr) } }() }) }() } - for range slaves { + for range subordinates { <-barrier } - if len(errs) == len(slaves) { + if len(errs) == len(subordinates) { // All returned with error return res, log.Error("Error on all operations"), errs } - AuditOperation("repoint-to", belowKey, fmt.Sprintf("repointed %d/%d slaves to %+v", len(res), len(slaves), *belowKey)) + AuditOperation("repoint-to", belowKey, fmt.Sprintf("repointed %d/%d subordinates to %+v", len(res), len(subordinates), *belowKey)) return res, nil, errs } -// RepointSlavesTo repoints slaves of a given instance (possibly filtered) onto another master. +// RepointSubordinatesTo repoints subordinates of a given instance (possibly filtered) onto another main. // Binlog Server is the major use case -func RepointSlavesTo(instanceKey *InstanceKey, pattern string, belowKey *InstanceKey) ([](*Instance), error, []error) { +func RepointSubordinatesTo(instanceKey *InstanceKey, pattern string, belowKey *InstanceKey) ([](*Instance), error, []error) { res := [](*Instance){} errs := []error{} - slaves, err := ReadSlaveInstances(instanceKey) + subordinates, err := ReadSubordinateInstances(instanceKey) if err != nil { return res, err, errs } - slaves = RemoveInstance(slaves, belowKey) - slaves = filterInstancesByPattern(slaves, pattern) - if len(slaves) == 0 { + subordinates = RemoveInstance(subordinates, belowKey) + subordinates = filterInstancesByPattern(subordinates, pattern) + if len(subordinates) == 0 { // Nothing to do return res, nil, errs } if belowKey == nil { - // Default to existing master. All slaves are of the same master, hence just pick one. - belowKey = &slaves[0].MasterKey + // Default to existing main. All subordinates are of the same main, hence just pick one. + belowKey = &subordinates[0].MainKey } - log.Infof("Will repoint slaves of %+v to %+v", *instanceKey, *belowKey) - return RepointTo(slaves, belowKey) + log.Infof("Will repoint subordinates of %+v to %+v", *instanceKey, *belowKey) + return RepointTo(subordinates, belowKey) } -// RepointSlaves repoints all slaves of a given instance onto its existing master. -func RepointSlaves(instanceKey *InstanceKey, pattern string) ([](*Instance), error, []error) { - return RepointSlavesTo(instanceKey, pattern, nil) +// RepointSubordinates repoints all subordinates of a given instance onto its existing main. +func RepointSubordinates(instanceKey *InstanceKey, pattern string) ([](*Instance), error, []error) { + return RepointSubordinatesTo(instanceKey, pattern, nil) } -// MakeCoMaster will attempt to make an instance co-master with its master, by making its master a slave of its own. -// This only works out if the master is not replicating; the master does not have a known master (it may have an unknown master). -func MakeCoMaster(instanceKey *InstanceKey) (*Instance, error) { +// MakeCoMain will attempt to make an instance co-main with its main, by making its main a subordinate of its own. +// This only works out if the main is not replicating; the main does not have a known main (it may have an unknown main). +func MakeCoMain(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, err @@ -828,138 +828,138 @@ func MakeCoMaster(instanceKey *InstanceKey) (*Instance, error) { if canMove, merr := instance.CanMove(); !canMove { return instance, merr } - master, err := GetInstanceMaster(instance) + main, err := GetInstanceMain(instance) if err != nil { return instance, err } - log.Debugf("Will check whether %+v's master (%+v) can become its co-master", instance.Key, master.Key) - if canMove, merr := master.CanMoveAsCoMaster(); !canMove { + log.Debugf("Will check whether %+v's main (%+v) can become its co-main", instance.Key, main.Key) + if canMove, merr := main.CanMoveAsCoMain(); !canMove { return instance, merr } - if instanceKey.Equals(&master.MasterKey) { - return instance, fmt.Errorf("instance %+v is already co master of %+v", instance.Key, master.Key) + if instanceKey.Equals(&main.MainKey) { + return instance, fmt.Errorf("instance %+v is already co main of %+v", instance.Key, main.Key) } if !instance.ReadOnly { - return instance, fmt.Errorf("instance %+v is not read-only; first make it read-only before making it co-master", instance.Key) + return instance, fmt.Errorf("instance %+v is not read-only; first make it read-only before making it co-main", instance.Key) } - if master.IsCoMaster { - // We allow breaking of an existing co-master replication. Here's the breakdown: - // Ideally, this would not eb allowed, and we would first require the user to RESET SLAVE on 'master' - // prior to making it participate as co-master with our 'instance'. + if main.IsCoMain { + // We allow breaking of an existing co-main replication. Here's the breakdown: + // Ideally, this would not eb allowed, and we would first require the user to RESET SLAVE on 'main' + // prior to making it participate as co-main with our 'instance'. // However there's the problem that upon RESET SLAVE we lose the replication's user/password info. // Thus, we come up with the following rule: - // If S replicates from M1, and M1<->M2 are co masters, we allow S to become co-master of M1 (S<->M1) if: + // If S replicates from M1, and M1<->M2 are co mains, we allow S to become co-main of M1 (S<->M1) if: // - M1 is writeable // - M2 is read-only or is unreachable/invalid // - S is read-only - // And so we will be replacing one read-only co-master with another. - otherCoMaster, found, _ := ReadInstance(&master.MasterKey) - if found && otherCoMaster.IsLastCheckValid && !otherCoMaster.ReadOnly { - return instance, fmt.Errorf("master %+v is already co-master with %+v, and %+v is alive, and not read-only; cowardly refusing to demote it. Please set it as read-only beforehand", master.Key, otherCoMaster.Key, otherCoMaster.Key) + // And so we will be replacing one read-only co-main with another. + otherCoMain, found, _ := ReadInstance(&main.MainKey) + if found && otherCoMain.IsLastCheckValid && !otherCoMain.ReadOnly { + return instance, fmt.Errorf("main %+v is already co-main with %+v, and %+v is alive, and not read-only; cowardly refusing to demote it. Please set it as read-only beforehand", main.Key, otherCoMain.Key, otherCoMain.Key) } // OK, good to go. - } else if _, found, _ := ReadInstance(&master.MasterKey); found { - return instance, fmt.Errorf("%+v is not a real master; it replicates from: %+v", master.Key, master.MasterKey) + } else if _, found, _ := ReadInstance(&main.MainKey); found { + return instance, fmt.Errorf("%+v is not a real main; it replicates from: %+v", main.Key, main.MainKey) } - if canReplicate, err := master.CanReplicateFrom(instance); !canReplicate { + if canReplicate, err := main.CanReplicateFrom(instance); !canReplicate { return instance, err } - log.Infof("Will make %+v co-master of %+v", instanceKey, master.Key) + log.Infof("Will make %+v co-main of %+v", instanceKey, main.Key) - if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), fmt.Sprintf("make co-master of %+v", master.Key)); merr != nil { + if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), fmt.Sprintf("make co-main of %+v", main.Key)); merr != nil { err = fmt.Errorf("Cannot begin maintenance on %+v", *instanceKey) goto Cleanup } else { defer EndMaintenance(maintenanceToken) } - if maintenanceToken, merr := BeginMaintenance(&master.Key, GetMaintenanceOwner(), fmt.Sprintf("%+v turns into co-master of this", *instanceKey)); merr != nil { - err = fmt.Errorf("Cannot begin maintenance on %+v", master.Key) + if maintenanceToken, merr := BeginMaintenance(&main.Key, GetMaintenanceOwner(), fmt.Sprintf("%+v turns into co-main of this", *instanceKey)); merr != nil { + err = fmt.Errorf("Cannot begin maintenance on %+v", main.Key) goto Cleanup } else { defer EndMaintenance(maintenanceToken) } - // the coMaster used to be merely a slave. Just point master into *some* position - // within coMaster... - if master.IsSlave() { - // this is the case of a co-master. For masters, the StopSlave operation throws an error, and + // the coMain used to be merely a subordinate. Just point main into *some* position + // within coMain... + if main.IsSubordinate() { + // this is the case of a co-main. For mains, the StopSubordinate operation throws an error, and // there's really no point in doing it. - master, err = StopSlave(&master.Key) + main, err = StopSubordinate(&main.Key) if err != nil { goto Cleanup } } - if instance.ReplicationCredentialsAvailable && !master.HasReplicationCredentials { - // Yay! We can get credentials from the slave! + if instance.ReplicationCredentialsAvailable && !main.HasReplicationCredentials { + // Yay! We can get credentials from the subordinate! replicationUser, replicationPassword, err := ReadReplicationCredentials(&instance.Key) if err != nil { goto Cleanup } log.Debugf("Got credentials from a replica. will now apply") - _, err = ChangeMasterCredentials(&master.Key, replicationUser, replicationPassword) + _, err = ChangeMainCredentials(&main.Key, replicationUser, replicationPassword) if err != nil { goto Cleanup } } - master, err = ChangeMasterTo(&master.Key, instanceKey, &instance.SelfBinlogCoordinates, false, GTIDHintNeutral) + main, err = ChangeMainTo(&main.Key, instanceKey, &instance.SelfBinlogCoordinates, false, GTIDHintNeutral) if err != nil { goto Cleanup } Cleanup: - master, _ = StartSlave(&master.Key) + main, _ = StartSubordinate(&main.Key) if err != nil { return instance, log.Errore(err) } // and we're done (pending deferred functions) - AuditOperation("make-co-master", instanceKey, fmt.Sprintf("%+v made co-master of %+v", *instanceKey, master.Key)) + AuditOperation("make-co-main", instanceKey, fmt.Sprintf("%+v made co-main of %+v", *instanceKey, main.Key)) return instance, err } -// ResetSlaveOperation will reset a slave -func ResetSlaveOperation(instanceKey *InstanceKey) (*Instance, error) { +// ResetSubordinateOperation will reset a subordinate +func ResetSubordinateOperation(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, err } - log.Infof("Will reset slave on %+v", instanceKey) + log.Infof("Will reset subordinate on %+v", instanceKey) - if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "reset slave"); merr != nil { + if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "reset subordinate"); merr != nil { err = fmt.Errorf("Cannot begin maintenance on %+v", *instanceKey) goto Cleanup } else { defer EndMaintenance(maintenanceToken) } - if instance.IsSlave() { - instance, err = StopSlave(instanceKey) + if instance.IsSubordinate() { + instance, err = StopSubordinate(instanceKey) if err != nil { goto Cleanup } } - instance, err = ResetSlave(instanceKey) + instance, err = ResetSubordinate(instanceKey) if err != nil { goto Cleanup } Cleanup: - instance, _ = StartSlave(instanceKey) + instance, _ = StartSubordinate(instanceKey) if err != nil { return instance, log.Errore(err) } // and we're done (pending deferred functions) - AuditOperation("reset-slave", instanceKey, fmt.Sprintf("%+v replication reset", *instanceKey)) + AuditOperation("reset-subordinate", instanceKey, fmt.Sprintf("%+v replication reset", *instanceKey)) return instance, err } -// DetachSlaveOperation will detach a slave from its master by forcibly corrupting its replication coordinates -func DetachSlaveOperation(instanceKey *InstanceKey) (*Instance, error) { +// DetachSubordinateOperation will detach a subordinate from its main by forcibly corrupting its replication coordinates +func DetachSubordinateOperation(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, err @@ -967,40 +967,40 @@ func DetachSlaveOperation(instanceKey *InstanceKey) (*Instance, error) { log.Infof("Will detach %+v", instanceKey) - if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "detach slave"); merr != nil { + if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "detach subordinate"); merr != nil { err = fmt.Errorf("Cannot begin maintenance on %+v", *instanceKey) goto Cleanup } else { defer EndMaintenance(maintenanceToken) } - if instance.IsSlave() { - instance, err = StopSlave(instanceKey) + if instance.IsSubordinate() { + instance, err = StopSubordinate(instanceKey) if err != nil { goto Cleanup } } - instance, err = DetachSlave(instanceKey) + instance, err = DetachSubordinate(instanceKey) if err != nil { goto Cleanup } Cleanup: - instance, _ = StartSlave(instanceKey) + instance, _ = StartSubordinate(instanceKey) if err != nil { return instance, log.Errore(err) } // and we're done (pending deferred functions) - AuditOperation("detach-slave", instanceKey, fmt.Sprintf("%+v replication detached", *instanceKey)) + AuditOperation("detach-subordinate", instanceKey, fmt.Sprintf("%+v replication detached", *instanceKey)) return instance, err } -// ReattachSlaveOperation will detach a slave from its master by forcibly corrupting its replication coordinates -func ReattachSlaveOperation(instanceKey *InstanceKey) (*Instance, error) { +// ReattachSubordinateOperation will detach a subordinate from its main by forcibly corrupting its replication coordinates +func ReattachSubordinateOperation(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, err @@ -1008,125 +1008,125 @@ func ReattachSlaveOperation(instanceKey *InstanceKey) (*Instance, error) { log.Infof("Will reattach %+v", instanceKey) - if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "detach slave"); merr != nil { + if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "detach subordinate"); merr != nil { err = fmt.Errorf("Cannot begin maintenance on %+v", *instanceKey) goto Cleanup } else { defer EndMaintenance(maintenanceToken) } - if instance.IsSlave() { - instance, err = StopSlave(instanceKey) + if instance.IsSubordinate() { + instance, err = StopSubordinate(instanceKey) if err != nil { goto Cleanup } } - instance, err = ReattachSlave(instanceKey) + instance, err = ReattachSubordinate(instanceKey) if err != nil { goto Cleanup } Cleanup: - instance, _ = StartSlave(instanceKey) + instance, _ = StartSubordinate(instanceKey) if err != nil { return instance, log.Errore(err) } // and we're done (pending deferred functions) - AuditOperation("reattach-slave", instanceKey, fmt.Sprintf("%+v replication reattached", *instanceKey)) + AuditOperation("reattach-subordinate", instanceKey, fmt.Sprintf("%+v replication reattached", *instanceKey)) return instance, err } -// DetachSlaveMasterHost detaches a slave from its master by corrupting the Master_Host (in such way that is reversible) -func DetachSlaveMasterHost(instanceKey *InstanceKey) (*Instance, error) { +// DetachSubordinateMainHost detaches a subordinate from its main by corrupting the Main_Host (in such way that is reversible) +func DetachSubordinateMainHost(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, err } - if !instance.IsSlave() { - return instance, fmt.Errorf("instance is not a slave: %+v", *instanceKey) + if !instance.IsSubordinate() { + return instance, fmt.Errorf("instance is not a subordinate: %+v", *instanceKey) } - if instance.MasterKey.IsDetached() { + if instance.MainKey.IsDetached() { return instance, fmt.Errorf("instance already detached: %+v", *instanceKey) } - detachedMasterKey := instance.MasterKey.DetachedKey() + detachedMainKey := instance.MainKey.DetachedKey() - log.Infof("Will detach master host on %+v. Detached key is %+v", *instanceKey, *detachedMasterKey) + log.Infof("Will detach main host on %+v. Detached key is %+v", *instanceKey, *detachedMainKey) - if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "detach-slave-master-host"); merr != nil { + if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "detach-subordinate-main-host"); merr != nil { err = fmt.Errorf("Cannot begin maintenance on %+v", *instanceKey) goto Cleanup } else { defer EndMaintenance(maintenanceToken) } - instance, err = StopSlave(instanceKey) + instance, err = StopSubordinate(instanceKey) if err != nil { goto Cleanup } - instance, err = ChangeMasterTo(instanceKey, detachedMasterKey, &instance.ExecBinlogCoordinates, true, GTIDHintNeutral) + instance, err = ChangeMainTo(instanceKey, detachedMainKey, &instance.ExecBinlogCoordinates, true, GTIDHintNeutral) if err != nil { goto Cleanup } Cleanup: - instance, _ = StartSlave(instanceKey) + instance, _ = StartSubordinate(instanceKey) if err != nil { return instance, log.Errore(err) } // and we're done (pending deferred functions) - AuditOperation("repoint", instanceKey, fmt.Sprintf("slave %+v detached from master into %+v", *instanceKey, *detachedMasterKey)) + AuditOperation("repoint", instanceKey, fmt.Sprintf("subordinate %+v detached from main into %+v", *instanceKey, *detachedMainKey)) return instance, err } -// ReattachSlaveMasterHost reattaches a slave back onto its master by undoing a DetachSlaveMasterHost operation -func ReattachSlaveMasterHost(instanceKey *InstanceKey) (*Instance, error) { +// ReattachSubordinateMainHost reattaches a subordinate back onto its main by undoing a DetachSubordinateMainHost operation +func ReattachSubordinateMainHost(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, err } - if !instance.IsSlave() { - return instance, fmt.Errorf("instance is not a slave: %+v", *instanceKey) + if !instance.IsSubordinate() { + return instance, fmt.Errorf("instance is not a subordinate: %+v", *instanceKey) } - if !instance.MasterKey.IsDetached() { + if !instance.MainKey.IsDetached() { return instance, fmt.Errorf("instance does not seem to be detached: %+v", *instanceKey) } - reattachedMasterKey := instance.MasterKey.ReattachedKey() + reattachedMainKey := instance.MainKey.ReattachedKey() - log.Infof("Will reattach master host on %+v. Reattached key is %+v", *instanceKey, *reattachedMasterKey) + log.Infof("Will reattach main host on %+v. Reattached key is %+v", *instanceKey, *reattachedMainKey) - if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "reattach-slave-master-host"); merr != nil { + if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "reattach-subordinate-main-host"); merr != nil { err = fmt.Errorf("Cannot begin maintenance on %+v", *instanceKey) goto Cleanup } else { defer EndMaintenance(maintenanceToken) } - instance, err = StopSlave(instanceKey) + instance, err = StopSubordinate(instanceKey) if err != nil { goto Cleanup } - instance, err = ChangeMasterTo(instanceKey, reattachedMasterKey, &instance.ExecBinlogCoordinates, true, GTIDHintNeutral) + instance, err = ChangeMainTo(instanceKey, reattachedMainKey, &instance.ExecBinlogCoordinates, true, GTIDHintNeutral) if err != nil { goto Cleanup } - // Just in case this instance used to be a master: - ReplaceAliasClusterName(instanceKey.StringCode(), reattachedMasterKey.StringCode()) + // Just in case this instance used to be a main: + ReplaceAliasClusterName(instanceKey.StringCode(), reattachedMainKey.StringCode()) Cleanup: - instance, _ = StartSlave(instanceKey) + instance, _ = StartSubordinate(instanceKey) if err != nil { return instance, log.Errore(err) } // and we're done (pending deferred functions) - AuditOperation("repoint", instanceKey, fmt.Sprintf("slave %+v reattached to master %+v", *instanceKey, *reattachedMasterKey)) + AuditOperation("repoint", instanceKey, fmt.Sprintf("subordinate %+v reattached to main %+v", *instanceKey, *reattachedMainKey)) return instance, err } @@ -1181,34 +1181,34 @@ func DisableGTID(instanceKey *InstanceKey) (*Instance, error) { return instance, err } -// ResetMasterGTIDOperation will issue a safe RESET MASTER on a slave that replicates via GTID: +// ResetMainGTIDOperation will issue a safe RESET MASTER on a subordinate that replicates via GTID: // It will make sure the gtid_purged set matches the executed set value as read just before the RESET. -// this will enable new slaves to be attached to given instance without complaints about missing/purged entries. -// This function requires that the instance does not have slaves. -func ResetMasterGTIDOperation(instanceKey *InstanceKey, removeSelfUUID bool, uuidToRemove string) (*Instance, error) { +// this will enable new subordinates to be attached to given instance without complaints about missing/purged entries. +// This function requires that the instance does not have subordinates. +func ResetMainGTIDOperation(instanceKey *InstanceKey, removeSelfUUID bool, uuidToRemove string) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, err } if !instance.SupportsOracleGTID { - return instance, log.Errorf("reset-master-gtid requested for %+v but it is not using oracle-gtid", *instanceKey) + return instance, log.Errorf("reset-main-gtid requested for %+v but it is not using oracle-gtid", *instanceKey) } - if len(instance.SlaveHosts) > 0 { - return instance, log.Errorf("reset-master-gtid will not operate on %+v because it has %+v slaves. Expecting no slaves", *instanceKey, len(instance.SlaveHosts)) + if len(instance.SubordinateHosts) > 0 { + return instance, log.Errorf("reset-main-gtid will not operate on %+v because it has %+v subordinates. Expecting no subordinates", *instanceKey, len(instance.SubordinateHosts)) } - log.Infof("Will reset master on %+v", instanceKey) + log.Infof("Will reset main on %+v", instanceKey) var oracleGtidSet *OracleGtidSet - if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "reset-master-gtid"); merr != nil { + if maintenanceToken, merr := BeginMaintenance(instanceKey, GetMaintenanceOwner(), "reset-main-gtid"); merr != nil { err = fmt.Errorf("Cannot begin maintenance on %+v", *instanceKey) goto Cleanup } else { defer EndMaintenance(maintenanceToken) } - if instance.IsSlave() { - instance, err = StopSlave(instanceKey) + if instance.IsSubordinate() { + instance, err = StopSubordinate(instanceKey) if err != nil { goto Cleanup } @@ -1230,7 +1230,7 @@ func ResetMasterGTIDOperation(instanceKey *InstanceKey, removeSelfUUID bool, uui } } - instance, err = ResetMaster(instanceKey) + instance, err = ResetMain(instanceKey) if err != nil { goto Cleanup } @@ -1240,14 +1240,14 @@ func ResetMasterGTIDOperation(instanceKey *InstanceKey, removeSelfUUID bool, uui } Cleanup: - instance, _ = StartSlave(instanceKey) + instance, _ = StartSubordinate(instanceKey) if err != nil { return instance, log.Errore(err) } // and we're done (pending deferred functions) - AuditOperation("reset-master-gtid", instanceKey, fmt.Sprintf("%+v master reset", *instanceKey)) + AuditOperation("reset-main-gtid", instanceKey, fmt.Sprintf("%+v main reset", *instanceKey)) return instance, err } @@ -1261,20 +1261,20 @@ func FindLastPseudoGTIDEntry(instance *Instance, recordedInstanceRelayLogCoordin } minBinlogCoordinates, minRelaylogCoordinates, err := GetHeuristiclyRecentCoordinatesForInstance(&instance.Key) - if instance.LogBinEnabled && instance.LogSlaveUpdatesEnabled && (expectedBinlogFormat == nil || instance.Binlog_format == *expectedBinlogFormat) { + if instance.LogBinEnabled && instance.LogSubordinateUpdatesEnabled && (expectedBinlogFormat == nil || instance.Binlog_format == *expectedBinlogFormat) { // Well no need to search this instance's binary logs if it doesn't have any... - // With regard log-slave-updates, some edge cases are possible, like having this instance's log-slave-updates + // With regard log-subordinate-updates, some edge cases are possible, like having this instance's log-subordinate-updates // enabled/disabled (of course having restarted it) - // The approach is not to take chances. If log-slave-updates is disabled, fail and go for relay-logs. - // If log-slave-updates was just enabled then possibly no pseudo-gtid is found, and so again we will go + // The approach is not to take chances. If log-subordinate-updates is disabled, fail and go for relay-logs. + // If log-subordinate-updates was just enabled then possibly no pseudo-gtid is found, and so again we will go // for relay logs. - // Also, if master has STATEMENT binlog format, and the slave has ROW binlog format, then comparing binlog entries would urely fail if based on the slave's binary logs. + // Also, if main has STATEMENT binlog format, and the subordinate has ROW binlog format, then comparing binlog entries would urely fail if based on the subordinate's binary logs. // Instead, we revert to the relay logs. instancePseudoGtidCoordinates, instancePseudoGtidText, err = getLastPseudoGTIDEntryInInstance(instance, minBinlogCoordinates, maxBinlogCoordinates, exhaustiveSearch) } if err != nil || instancePseudoGtidCoordinates == nil { // Unable to find pseudo GTID in binary logs. - // Then MAYBE we are lucky enough (chances are we are, if this slave did not crash) that we can + // Then MAYBE we are lucky enough (chances are we are, if this subordinate did not crash) that we can // extract the Pseudo GTID entry from the last (current) relay log file. instancePseudoGtidCoordinates, instancePseudoGtidText, err = getLastPseudoGTIDEntryInRelayLogs(instance, minRelaylogCoordinates, recordedInstanceRelayLogCoordinates, exhaustiveSearch) } @@ -1373,8 +1373,8 @@ func MatchBelow(instanceKey, otherKey *InstanceKey, requireInstanceMaintenance b } } - log.Debugf("Stopping slave on %+v", *instanceKey) - instance, err = StopSlave(instanceKey) + log.Debugf("Stopping subordinate on %+v", *instanceKey) + instance, err = StopSubordinate(instanceKey) if err != nil { goto Cleanup } @@ -1388,13 +1388,13 @@ func MatchBelow(instanceKey, otherKey *InstanceKey, requireInstanceMaintenance b log.Debugf("%+v will match below %+v at %+v; validated events: %d", *instanceKey, *otherKey, *nextBinlogCoordinatesToMatch, countMatchedEvents) // Drum roll...... - instance, err = ChangeMasterTo(instanceKey, otherKey, nextBinlogCoordinatesToMatch, false, GTIDHintDeny) + instance, err = ChangeMainTo(instanceKey, otherKey, nextBinlogCoordinatesToMatch, false, GTIDHintDeny) if err != nil { goto Cleanup } Cleanup: - instance, _ = StartSlave(instanceKey) + instance, _ = StartSubordinate(instanceKey) if err != nil { return instance, nextBinlogCoordinatesToMatch, log.Errore(err) } @@ -1404,48 +1404,48 @@ Cleanup: return instance, nextBinlogCoordinatesToMatch, err } -// RematchSlave will re-match a slave to its master, using pseudo-gtid -func RematchSlave(instanceKey *InstanceKey, requireInstanceMaintenance bool) (*Instance, *BinlogCoordinates, error) { +// RematchSubordinate will re-match a subordinate to its main, using pseudo-gtid +func RematchSubordinate(instanceKey *InstanceKey, requireInstanceMaintenance bool) (*Instance, *BinlogCoordinates, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, nil, err } - masterInstance, found, err := ReadInstance(&instance.MasterKey) + mainInstance, found, err := ReadInstance(&instance.MainKey) if err != nil || !found { return instance, nil, err } - return MatchBelow(instanceKey, &masterInstance.Key, requireInstanceMaintenance) + return MatchBelow(instanceKey, &mainInstance.Key, requireInstanceMaintenance) } -// MakeMaster will take an instance, make all its siblings its slaves (via pseudo-GTID) and make it master +// MakeMain will take an instance, make all its siblings its subordinates (via pseudo-GTID) and make it main // (stop its replicaiton, make writeable). -func MakeMaster(instanceKey *InstanceKey) (*Instance, error) { +func MakeMain(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, err } - masterInstance, err := ReadTopologyInstanceUnbuffered(&instance.MasterKey) + mainInstance, err := ReadTopologyInstanceUnbuffered(&instance.MainKey) if err == nil { - // If the read succeeded, check the master status. - if masterInstance.IsSlave() { - return instance, fmt.Errorf("MakeMaster: instance's master %+v seems to be replicating", masterInstance.Key) + // If the read succeeded, check the main status. + if mainInstance.IsSubordinate() { + return instance, fmt.Errorf("MakeMain: instance's main %+v seems to be replicating", mainInstance.Key) } - if masterInstance.IsLastCheckValid { - return instance, fmt.Errorf("MakeMaster: instance's master %+v seems to be accessible", masterInstance.Key) + if mainInstance.IsLastCheckValid { + return instance, fmt.Errorf("MakeMain: instance's main %+v seems to be accessible", mainInstance.Key) } } - // Continue anyway if the read failed, because that means the master is + // Continue anyway if the read failed, because that means the main is // inaccessible... So it's OK to do the promotion. if !instance.SQLThreadUpToDate() { - return instance, fmt.Errorf("MakeMaster: instance's SQL thread must be up-to-date with I/O thread for %+v", *instanceKey) + return instance, fmt.Errorf("MakeMain: instance's SQL thread must be up-to-date with I/O thread for %+v", *instanceKey) } - siblings, err := ReadSlaveInstances(&masterInstance.Key) + siblings, err := ReadSubordinateInstances(&mainInstance.Key) if err != nil { return instance, err } for _, sibling := range siblings { if instance.ExecBinlogCoordinates.SmallerThan(&sibling.ExecBinlogCoordinates) { - return instance, fmt.Errorf("MakeMaster: instance %+v has more advanced sibling: %+v", *instanceKey, sibling.Key) + return instance, fmt.Errorf("MakeMain: instance %+v has more advanced sibling: %+v", *instanceKey, sibling.Key) } } @@ -1468,124 +1468,124 @@ Cleanup: return instance, log.Errore(err) } // and we're done (pending deferred functions) - AuditOperation("make-master", instanceKey, fmt.Sprintf("made master of %+v", *instanceKey)) + AuditOperation("make-main", instanceKey, fmt.Sprintf("made main of %+v", *instanceKey)) return instance, err } -// EnslaveSiblings is a convenience method for turning sublings of a slave to be its subordinates. +// EnsubordinateSiblings is a convenience method for turning sublings of a subordinate to be its subordinates. // This uses normal connected replication (does not utilize Pseudo-GTID) -func EnslaveSiblings(instanceKey *InstanceKey) (*Instance, int, error) { +func EnsubordinateSiblings(instanceKey *InstanceKey) (*Instance, int, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, 0, err } - masterInstance, found, err := ReadInstance(&instance.MasterKey) + mainInstance, found, err := ReadInstance(&instance.MainKey) if err != nil || !found { return instance, 0, err } - siblings, err := ReadSlaveInstances(&masterInstance.Key) + siblings, err := ReadSubordinateInstances(&mainInstance.Key) if err != nil { return instance, 0, err } - enslavedSiblings := 0 + ensubordinatedSiblings := 0 for _, sibling := range siblings { if _, err := MoveBelow(&sibling.Key, &instance.Key); err == nil { - enslavedSiblings++ + ensubordinatedSiblings++ } } - return instance, enslavedSiblings, err + return instance, ensubordinatedSiblings, err } -// EnslaveMaster will move an instance up the chain and cause its master to become its slave. -// It's almost a role change, just that other slaves of either 'instance' or its master are currently unaffected +// EnsubordinateMain will move an instance up the chain and cause its main to become its subordinate. +// It's almost a role change, just that other subordinates of either 'instance' or its main are currently unaffected // (they continue replicate without change) -// Note that the master must itself be a slave; however the grandparent does not necessarily have to be reachable +// Note that the main must itself be a subordinate; however the grandparent does not necessarily have to be reachable // and can in fact be dead. -func EnslaveMaster(instanceKey *InstanceKey) (*Instance, error) { +func EnsubordinateMain(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, err } - masterInstance, found, err := ReadInstance(&instance.MasterKey) + mainInstance, found, err := ReadInstance(&instance.MainKey) if err != nil || !found { return instance, err } - log.Debugf("EnslaveMaster: will attempt making %+v enslave its master %+v, now resolved as %+v", *instanceKey, instance.MasterKey, masterInstance.Key) + log.Debugf("EnsubordinateMain: will attempt making %+v ensubordinate its main %+v, now resolved as %+v", *instanceKey, instance.MainKey, mainInstance.Key) - if canReplicate, err := masterInstance.CanReplicateFrom(instance); canReplicate == false { + if canReplicate, err := mainInstance.CanReplicateFrom(instance); canReplicate == false { return instance, err } // We begin - masterInstance, err = StopSlave(&masterInstance.Key) + mainInstance, err = StopSubordinate(&mainInstance.Key) if err != nil { goto Cleanup } - instance, err = StopSlave(&instance.Key) + instance, err = StopSubordinate(&instance.Key) if err != nil { goto Cleanup } - instance, err = StartSlaveUntilMasterCoordinates(&instance.Key, &masterInstance.SelfBinlogCoordinates) + instance, err = StartSubordinateUntilMainCoordinates(&instance.Key, &mainInstance.SelfBinlogCoordinates) if err != nil { goto Cleanup } - // instance and masterInstance are equal - // We skip name unresolve. It is OK if the master's master is dead, unreachable, does not resolve properly. - // We just copy+paste info from the master. - // In particular, this is commonly calledin DeadMaster recovery - instance, err = ChangeMasterTo(&instance.Key, &masterInstance.MasterKey, &masterInstance.ExecBinlogCoordinates, true, GTIDHintNeutral) + // instance and mainInstance are equal + // We skip name unresolve. It is OK if the main's main is dead, unreachable, does not resolve properly. + // We just copy+paste info from the main. + // In particular, this is commonly calledin DeadMain recovery + instance, err = ChangeMainTo(&instance.Key, &mainInstance.MainKey, &mainInstance.ExecBinlogCoordinates, true, GTIDHintNeutral) if err != nil { goto Cleanup } - // instance is now sibling of master - masterInstance, err = ChangeMasterTo(&masterInstance.Key, &instance.Key, &instance.SelfBinlogCoordinates, false, GTIDHintNeutral) + // instance is now sibling of main + mainInstance, err = ChangeMainTo(&mainInstance.Key, &instance.Key, &instance.SelfBinlogCoordinates, false, GTIDHintNeutral) if err != nil { goto Cleanup } // swap is done! Cleanup: - instance, _ = StartSlave(&instance.Key) - masterInstance, _ = StartSlave(&masterInstance.Key) + instance, _ = StartSubordinate(&instance.Key) + mainInstance, _ = StartSubordinate(&mainInstance.Key) if err != nil { return instance, err } - AuditOperation("enslave-master", instanceKey, fmt.Sprintf("enslaved master: %+v", masterInstance.Key)) + AuditOperation("ensubordinate-main", instanceKey, fmt.Sprintf("ensubordinated main: %+v", mainInstance.Key)) return instance, err } -// MakeLocalMaster promotes a slave above its master, making it slave of its grandparent, while also enslaving its siblings. -// This serves as a convenience method to recover replication when a local master fails; the instance promoted is one of its slaves, +// MakeLocalMain promotes a subordinate above its main, making it subordinate of its grandparent, while also enslaving its siblings. +// This serves as a convenience method to recover replication when a local main fails; the instance promoted is one of its subordinates, // which is most advanced among its siblings. // This method utilizes Pseudo GTID -func MakeLocalMaster(instanceKey *InstanceKey) (*Instance, error) { +func MakeLocalMain(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, err } - masterInstance, found, err := ReadInstance(&instance.MasterKey) + mainInstance, found, err := ReadInstance(&instance.MainKey) if err != nil || !found { return instance, err } - grandparentInstance, err := ReadTopologyInstanceUnbuffered(&masterInstance.MasterKey) + grandparentInstance, err := ReadTopologyInstanceUnbuffered(&mainInstance.MainKey) if err != nil { return instance, err } - siblings, err := ReadSlaveInstances(&masterInstance.Key) + siblings, err := ReadSubordinateInstances(&mainInstance.Key) if err != nil { return instance, err } for _, sibling := range siblings { if instance.ExecBinlogCoordinates.SmallerThan(&sibling.ExecBinlogCoordinates) { - return instance, fmt.Errorf("MakeMaster: instance %+v has more advanced sibling: %+v", *instanceKey, sibling.Key) + return instance, fmt.Errorf("MakeMain: instance %+v has more advanced sibling: %+v", *instanceKey, sibling.Key) } } - instance, err = StopSlaveNicely(instanceKey, 0) + instance, err = StopSubordinateNicely(instanceKey, 0) if err != nil { goto Cleanup } @@ -1605,7 +1605,7 @@ Cleanup: return instance, log.Errore(err) } // and we're done (pending deferred functions) - AuditOperation("make-local-master", instanceKey, fmt.Sprintf("made master of %+v", *instanceKey)) + AuditOperation("make-local-main", instanceKey, fmt.Sprintf("made main of %+v", *instanceKey)) return instance, err } @@ -1615,56 +1615,56 @@ func sortInstances(instances [](*Instance)) { sort.Sort(sort.Reverse(InstancesByExecBinlogCoordinates(instances))) } -// getSlavesForSorting returns a list of slaves of a given master potentially for candidate choosing -func getSlavesForSorting(masterKey *InstanceKey, includeBinlogServerSubSlaves bool) (slaves [](*Instance), err error) { - if includeBinlogServerSubSlaves { - slaves, err = ReadSlaveInstancesIncludingBinlogServerSubSlaves(masterKey) +// getSubordinatesForSorting returns a list of subordinates of a given main potentially for candidate choosing +func getSubordinatesForSorting(mainKey *InstanceKey, includeBinlogServerSubSubordinates bool) (subordinates [](*Instance), err error) { + if includeBinlogServerSubSubordinates { + subordinates, err = ReadSubordinateInstancesIncludingBinlogServerSubSubordinates(mainKey) } else { - slaves, err = ReadSlaveInstances(masterKey) + subordinates, err = ReadSubordinateInstances(mainKey) } - return slaves, err + return subordinates, err } -// sortedSlaves returns the list of slaves of some master, sorted by exec coordinates -// (most up-to-date slave first). -// This function assumes given `slaves` argument is indeed a list of instances all replicating -// from the same master (the result of `getSlavesForSorting()` is appropriate) -func sortedSlaves(slaves [](*Instance), shouldStopSlaves bool) [](*Instance) { - if len(slaves) == 0 { - return slaves +// sortedSubordinates returns the list of subordinates of some main, sorted by exec coordinates +// (most up-to-date subordinate first). +// This function assumes given `subordinates` argument is indeed a list of instances all replicating +// from the same main (the result of `getSubordinatesForSorting()` is appropriate) +func sortedSubordinates(subordinates [](*Instance), shouldStopSubordinates bool) [](*Instance) { + if len(subordinates) == 0 { + return subordinates } - if shouldStopSlaves { - log.Debugf("sortedSlaves: stopping %d slaves nicely", len(slaves)) - slaves = StopSlavesNicely(slaves, time.Duration(config.Config.InstanceBulkOperationsWaitTimeoutSeconds)*time.Second) + if shouldStopSubordinates { + log.Debugf("sortedSubordinates: stopping %d subordinates nicely", len(subordinates)) + subordinates = StopSubordinatesNicely(subordinates, time.Duration(config.Config.InstanceBulkOperationsWaitTimeoutSeconds)*time.Second) } - slaves = RemoveNilInstances(slaves) + subordinates = RemoveNilInstances(subordinates) - sortInstances(slaves) - for _, slave := range slaves { - log.Debugf("- sorted slave: %+v %+v", slave.Key, slave.ExecBinlogCoordinates) + sortInstances(subordinates) + for _, subordinate := range subordinates { + log.Debugf("- sorted subordinate: %+v %+v", subordinate.Key, subordinate.ExecBinlogCoordinates) } - return slaves + return subordinates } -// MultiMatchBelow will efficiently match multiple slaves below a given instance. -// It is assumed that all given slaves are siblings -func MultiMatchBelow(slaves [](*Instance), belowKey *InstanceKey, slavesAlreadyStopped bool, postponedFunctionsContainer *PostponedFunctionsContainer) ([](*Instance), *Instance, error, []error) { +// MultiMatchBelow will efficiently match multiple subordinates below a given instance. +// It is assumed that all given subordinates are siblings +func MultiMatchBelow(subordinates [](*Instance), belowKey *InstanceKey, subordinatesAlreadyStopped bool, postponedFunctionsContainer *PostponedFunctionsContainer) ([](*Instance), *Instance, error, []error) { res := [](*Instance){} errs := []error{} - slaveMutex := make(chan bool, 1) + subordinateMutex := make(chan bool, 1) if config.Config.PseudoGTIDPattern == "" { return res, nil, fmt.Errorf("PseudoGTIDPattern not configured; cannot use Pseudo-GTID"), errs } - slaves = RemoveInstance(slaves, belowKey) - slaves = RemoveBinlogServerInstances(slaves) + subordinates = RemoveInstance(subordinates, belowKey) + subordinates = RemoveBinlogServerInstances(subordinates) - for _, slave := range slaves { - if maintenanceToken, merr := BeginMaintenance(&slave.Key, GetMaintenanceOwner(), fmt.Sprintf("%+v match below %+v as part of MultiMatchBelow", slave.Key, *belowKey)); merr != nil { - errs = append(errs, fmt.Errorf("Cannot begin maintenance on %+v", slave.Key)) - slaves = RemoveInstance(slaves, &slave.Key) + for _, subordinate := range subordinates { + if maintenanceToken, merr := BeginMaintenance(&subordinate.Key, GetMaintenanceOwner(), fmt.Sprintf("%+v match below %+v as part of MultiMatchBelow", subordinate.Key, *belowKey)); merr != nil { + errs = append(errs, fmt.Errorf("Cannot begin maintenance on %+v", subordinate.Key)) + subordinates = RemoveInstance(subordinates, &subordinate.Key) } else { defer EndMaintenance(maintenanceToken) } @@ -1672,7 +1672,7 @@ func MultiMatchBelow(slaves [](*Instance), belowKey *InstanceKey, slavesAlreadyS belowInstance, err := ReadTopologyInstanceUnbuffered(belowKey) if err != nil { - // Can't access the server below which we need to match ==> can't move slaves + // Can't access the server below which we need to match ==> can't move subordinates return res, belowInstance, err, errs } if belowInstance.IsBinlogServer() { @@ -1681,269 +1681,269 @@ func MultiMatchBelow(slaves [](*Instance), belowKey *InstanceKey, slavesAlreadyS return res, belowInstance, err, errs } - // slaves involved - if len(slaves) == 0 { + // subordinates involved + if len(subordinates) == 0 { return res, belowInstance, nil, errs } - if !slavesAlreadyStopped { - log.Debugf("MultiMatchBelow: stopping %d slaves nicely", len(slaves)) - // We want the slaves to have SQL thread up to date with IO thread. + if !subordinatesAlreadyStopped { + log.Debugf("MultiMatchBelow: stopping %d subordinates nicely", len(subordinates)) + // We want the subordinates to have SQL thread up to date with IO thread. // We will wait for them (up to a timeout) to do so. - slaves = StopSlavesNicely(slaves, time.Duration(config.Config.InstanceBulkOperationsWaitTimeoutSeconds)*time.Second) + subordinates = StopSubordinatesNicely(subordinates, time.Duration(config.Config.InstanceBulkOperationsWaitTimeoutSeconds)*time.Second) } - slaves = RemoveNilInstances(slaves) - sort.Sort(sort.Reverse(InstancesByExecBinlogCoordinates(slaves))) + subordinates = RemoveNilInstances(subordinates) + sort.Sort(sort.Reverse(InstancesByExecBinlogCoordinates(subordinates))) // Optimizations: - // Slaves which broke on the same Exec-coordinates can be handled in the exact same way: - // we only need to figure out one slave of each group/bucket of exec-coordinates; then apply the CHANGE MASTER TO + // Subordinates which broke on the same Exec-coordinates can be handled in the exact same way: + // we only need to figure out one subordinate of each group/bucket of exec-coordinates; then apply the CHANGE MASTER TO // on all its fellow members using same coordinates. - slaveBuckets := make(map[BinlogCoordinates][](*Instance)) - for _, slave := range slaves { - slave := slave - slaveBuckets[slave.ExecBinlogCoordinates] = append(slaveBuckets[slave.ExecBinlogCoordinates], slave) + subordinateBuckets := make(map[BinlogCoordinates][](*Instance)) + for _, subordinate := range subordinates { + subordinate := subordinate + subordinateBuckets[subordinate.ExecBinlogCoordinates] = append(subordinateBuckets[subordinate.ExecBinlogCoordinates], subordinate) } - log.Debugf("MultiMatchBelow: %d slaves merged into %d buckets", len(slaves), len(slaveBuckets)) - for bucket, bucketSlaves := range slaveBuckets { - log.Debugf("+- bucket: %+v, %d slaves", bucket, len(bucketSlaves)) + log.Debugf("MultiMatchBelow: %d subordinates merged into %d buckets", len(subordinates), len(subordinateBuckets)) + for bucket, bucketSubordinates := range subordinateBuckets { + log.Debugf("+- bucket: %+v, %d subordinates", bucket, len(bucketSubordinates)) } - matchedSlaves := make(map[InstanceKey]bool) + matchedSubordinates := make(map[InstanceKey]bool) bucketsBarrier := make(chan *BinlogCoordinates) - // Now go over the buckets, and try a single slave from each bucket - // (though if one results with an error, synchronuously-for-that-bucket continue to the next slave in bucket) + // Now go over the buckets, and try a single subordinate from each bucket + // (though if one results with an error, synchronuously-for-that-bucket continue to the next subordinate in bucket) - for execCoordinates, bucketSlaves := range slaveBuckets { + for execCoordinates, bucketSubordinates := range subordinateBuckets { execCoordinates := execCoordinates - bucketSlaves := bucketSlaves + bucketSubordinates := bucketSubordinates var bucketMatchedCoordinates *BinlogCoordinates // Buckets concurrent go func() { - // find coordinates for a single bucket based on a slave in said bucket + // find coordinates for a single bucket based on a subordinate in said bucket defer func() { bucketsBarrier <- &execCoordinates }() func() { - for _, slave := range bucketSlaves { - slave := slave - var slaveErr error + for _, subordinate := range bucketSubordinates { + subordinate := subordinate + var subordinateErr error var matchedCoordinates *BinlogCoordinates - log.Debugf("MultiMatchBelow: attempting slave %+v in bucket %+v", slave.Key, execCoordinates) + log.Debugf("MultiMatchBelow: attempting subordinate %+v in bucket %+v", subordinate.Key, execCoordinates) matchFunc := func() error { ExecuteOnTopology(func() { - _, matchedCoordinates, slaveErr = MatchBelow(&slave.Key, &belowInstance.Key, false) + _, matchedCoordinates, subordinateErr = MatchBelow(&subordinate.Key, &belowInstance.Key, false) }) return nil } if postponedFunctionsContainer != nil && - config.Config.PostponeSlaveRecoveryOnLagMinutes > 0 && - slave.SQLDelay > config.Config.PostponeSlaveRecoveryOnLagMinutes*60 && - len(bucketSlaves) == 1 { - // This slave is the only one in the bucket, AND it's lagging very much, AND - // we're configured to postpone operation on this slave so as not to delay everyone else. + config.Config.PostponeSubordinateRecoveryOnLagMinutes > 0 && + subordinate.SQLDelay > config.Config.PostponeSubordinateRecoveryOnLagMinutes*60 && + len(bucketSubordinates) == 1 { + // This subordinate is the only one in the bucket, AND it's lagging very much, AND + // we're configured to postpone operation on this subordinate so as not to delay everyone else. (*postponedFunctionsContainer).AddPostponedFunction(matchFunc) return // We bail out and trust our invoker to later call upon this postponed function } matchFunc() - log.Debugf("MultiMatchBelow: match result: %+v, %+v", matchedCoordinates, slaveErr) + log.Debugf("MultiMatchBelow: match result: %+v, %+v", matchedCoordinates, subordinateErr) - if slaveErr == nil { - // Success! We matched a slave of this bucket + if subordinateErr == nil { + // Success! We matched a subordinate of this bucket func() { // Instantaneous mutex. - slaveMutex <- true - defer func() { <-slaveMutex }() + subordinateMutex <- true + defer func() { <-subordinateMutex }() bucketMatchedCoordinates = matchedCoordinates - matchedSlaves[slave.Key] = true + matchedSubordinates[subordinate.Key] = true }() - log.Debugf("MultiMatchBelow: matched slave %+v in bucket %+v", slave.Key, execCoordinates) + log.Debugf("MultiMatchBelow: matched subordinate %+v in bucket %+v", subordinate.Key, execCoordinates) return } // Got here? Error! func() { // Instantaneous mutex. - slaveMutex <- true - defer func() { <-slaveMutex }() - errs = append(errs, slaveErr) + subordinateMutex <- true + defer func() { <-subordinateMutex }() + errs = append(errs, subordinateErr) }() - log.Errore(slaveErr) - // Failure: some unknown problem with bucket slave. Let's try the next one (continue loop) + log.Errore(subordinateErr) + // Failure: some unknown problem with bucket subordinate. Let's try the next one (continue loop) } }() if bucketMatchedCoordinates == nil { - log.Errorf("MultiMatchBelow: Cannot match up %d slaves since their bucket %+v is failed", len(bucketSlaves), execCoordinates) + log.Errorf("MultiMatchBelow: Cannot match up %d subordinates since their bucket %+v is failed", len(bucketSubordinates), execCoordinates) return } - log.Debugf("MultiMatchBelow: bucket %+v coordinates are: %+v. Proceeding to match all bucket slaves", execCoordinates, *bucketMatchedCoordinates) - // At this point our bucket has a known salvaged slave. - // We don't wait for the other buckets -- we immediately work out all the other slaves in this bucket. + log.Debugf("MultiMatchBelow: bucket %+v coordinates are: %+v. Proceeding to match all bucket subordinates", execCoordinates, *bucketMatchedCoordinates) + // At this point our bucket has a known salvaged subordinate. + // We don't wait for the other buckets -- we immediately work out all the other subordinates in this bucket. // (perhaps another bucket is busy matching a 24h delayed-replica; we definitely don't want to hold on that) func() { barrier := make(chan *InstanceKey) - // We point all this bucket's slaves into the same coordinates, concurrently - // We are already doing concurrent buckets; but for each bucket we also want to do concurrent slaves, + // We point all this bucket's subordinates into the same coordinates, concurrently + // We are already doing concurrent buckets; but for each bucket we also want to do concurrent subordinates, // otherwise one large bucket would make for a sequential work... - for _, slave := range bucketSlaves { - slave := slave + for _, subordinate := range bucketSubordinates { + subordinate := subordinate go func() { - defer func() { barrier <- &slave.Key }() + defer func() { barrier <- &subordinate.Key }() var err error - if _, found := matchedSlaves[slave.Key]; found { - // Already matched this slave + if _, found := matchedSubordinates[subordinate.Key]; found { + // Already matched this subordinate return } - log.Debugf("MultiMatchBelow: Will match up %+v to previously matched master coordinates %+v", slave.Key, *bucketMatchedCoordinates) - slaveMatchSuccess := false + log.Debugf("MultiMatchBelow: Will match up %+v to previously matched main coordinates %+v", subordinate.Key, *bucketMatchedCoordinates) + subordinateMatchSuccess := false ExecuteOnTopology(func() { - if _, err = ChangeMasterTo(&slave.Key, &belowInstance.Key, bucketMatchedCoordinates, false, GTIDHintDeny); err == nil { - StartSlave(&slave.Key) - slaveMatchSuccess = true + if _, err = ChangeMainTo(&subordinate.Key, &belowInstance.Key, bucketMatchedCoordinates, false, GTIDHintDeny); err == nil { + StartSubordinate(&subordinate.Key) + subordinateMatchSuccess = true } }) func() { // Quickly update lists; mutext is instantenous - slaveMutex <- true - defer func() { <-slaveMutex }() - if slaveMatchSuccess { - matchedSlaves[slave.Key] = true + subordinateMutex <- true + defer func() { <-subordinateMutex }() + if subordinateMatchSuccess { + matchedSubordinates[subordinate.Key] = true } else { errs = append(errs, err) - log.Errorf("MultiMatchBelow: Cannot match up %+v: error is %+v", slave.Key, err) + log.Errorf("MultiMatchBelow: Cannot match up %+v: error is %+v", subordinate.Key, err) } }() }() } - for range bucketSlaves { + for range bucketSubordinates { <-barrier } }() }() } - for range slaveBuckets { + for range subordinateBuckets { <-bucketsBarrier } - for _, slave := range slaves { - slave := slave - if _, found := matchedSlaves[slave.Key]; found { - res = append(res, slave) + for _, subordinate := range subordinates { + subordinate := subordinate + if _, found := matchedSubordinates[subordinate.Key]; found { + res = append(res, subordinate) } } return res, belowInstance, err, errs } -// MultiMatchSlaves will match (via pseudo-gtid) all slaves of given master below given instance. -func MultiMatchSlaves(masterKey *InstanceKey, belowKey *InstanceKey, pattern string) ([](*Instance), *Instance, error, []error) { +// MultiMatchSubordinates will match (via pseudo-gtid) all subordinates of given main below given instance. +func MultiMatchSubordinates(mainKey *InstanceKey, belowKey *InstanceKey, pattern string) ([](*Instance), *Instance, error, []error) { res := [](*Instance){} errs := []error{} belowInstance, err := ReadTopologyInstanceUnbuffered(belowKey) if err != nil { - // Can't access "below" ==> can't match slaves beneath it + // Can't access "below" ==> can't match subordinates beneath it return res, nil, err, errs } - masterInstance, found, err := ReadInstance(masterKey) + mainInstance, found, err := ReadInstance(mainKey) if err != nil || !found { return res, nil, err, errs } // See if we have a binlog server case (special handling): binlogCase := false - if masterInstance.IsBinlogServer() && masterInstance.MasterKey.Equals(belowKey) { + if mainInstance.IsBinlogServer() && mainInstance.MainKey.Equals(belowKey) { // repoint-up - log.Debugf("MultiMatchSlaves: pointing slaves up from binlog server") + log.Debugf("MultiMatchSubordinates: pointing subordinates up from binlog server") binlogCase = true - } else if belowInstance.IsBinlogServer() && belowInstance.MasterKey.Equals(masterKey) { + } else if belowInstance.IsBinlogServer() && belowInstance.MainKey.Equals(mainKey) { // repoint-down - log.Debugf("MultiMatchSlaves: pointing slaves down to binlog server") + log.Debugf("MultiMatchSubordinates: pointing subordinates down to binlog server") binlogCase = true - } else if masterInstance.IsBinlogServer() && belowInstance.IsBinlogServer() && masterInstance.MasterKey.Equals(&belowInstance.MasterKey) { + } else if mainInstance.IsBinlogServer() && belowInstance.IsBinlogServer() && mainInstance.MainKey.Equals(&belowInstance.MainKey) { // Both BLS, siblings - log.Debugf("MultiMatchSlaves: pointing slaves to binlong sibling") + log.Debugf("MultiMatchSubordinates: pointing subordinates to binlong sibling") binlogCase = true } if binlogCase { - slaves, err, errors := RepointSlavesTo(masterKey, pattern, belowKey) + subordinates, err, errors := RepointSubordinatesTo(mainKey, pattern, belowKey) // Bail out! - return slaves, masterInstance, err, errors + return subordinates, mainInstance, err, errors } // Not binlog server - // slaves involved - slaves, err := ReadSlaveInstancesIncludingBinlogServerSubSlaves(masterKey) + // subordinates involved + subordinates, err := ReadSubordinateInstancesIncludingBinlogServerSubSubordinates(mainKey) if err != nil { return res, belowInstance, err, errs } - slaves = filterInstancesByPattern(slaves, pattern) - matchedSlaves, belowInstance, err, errs := MultiMatchBelow(slaves, &belowInstance.Key, false, nil) + subordinates = filterInstancesByPattern(subordinates, pattern) + matchedSubordinates, belowInstance, err, errs := MultiMatchBelow(subordinates, &belowInstance.Key, false, nil) - if len(matchedSlaves) != len(slaves) { - err = fmt.Errorf("MultiMatchSlaves: only matched %d out of %d slaves of %+v; error is: %+v", len(matchedSlaves), len(slaves), *masterKey, err) + if len(matchedSubordinates) != len(subordinates) { + err = fmt.Errorf("MultiMatchSubordinates: only matched %d out of %d subordinates of %+v; error is: %+v", len(matchedSubordinates), len(subordinates), *mainKey, err) } - AuditOperation("multi-match-slaves", masterKey, fmt.Sprintf("matched %d slaves under %+v", len(matchedSlaves), *belowKey)) + AuditOperation("multi-match-subordinates", mainKey, fmt.Sprintf("matched %d subordinates under %+v", len(matchedSubordinates), *belowKey)) - return matchedSlaves, belowInstance, err, errs + return matchedSubordinates, belowInstance, err, errs } -// MatchUp will move a slave up the replication chain, so that it becomes sibling of its master, via Pseudo-GTID +// MatchUp will move a subordinate up the replication chain, so that it becomes sibling of its main, via Pseudo-GTID func MatchUp(instanceKey *InstanceKey, requireInstanceMaintenance bool) (*Instance, *BinlogCoordinates, error) { instance, found, err := ReadInstance(instanceKey) if err != nil || !found { return nil, nil, err } - if !instance.IsSlave() { - return instance, nil, fmt.Errorf("instance is not a slave: %+v", instanceKey) + if !instance.IsSubordinate() { + return instance, nil, fmt.Errorf("instance is not a subordinate: %+v", instanceKey) } - master, found, err := ReadInstance(&instance.MasterKey) + main, found, err := ReadInstance(&instance.MainKey) if err != nil || !found { - return instance, nil, log.Errorf("Cannot get master for %+v. error=%+v", instance.Key, err) + return instance, nil, log.Errorf("Cannot get main for %+v. error=%+v", instance.Key, err) } - if !master.IsSlave() { - return instance, nil, fmt.Errorf("master is not a slave itself: %+v", master.Key) + if !main.IsSubordinate() { + return instance, nil, fmt.Errorf("main is not a subordinate itself: %+v", main.Key) } - return MatchBelow(instanceKey, &master.MasterKey, requireInstanceMaintenance) + return MatchBelow(instanceKey, &main.MainKey, requireInstanceMaintenance) } -// MatchUpSlaves will move all slaves of given master up the replication chain, -// so that they become siblings of their master. -// This should be called when the local master dies, and all its slaves are to be resurrected via Pseudo-GTID -func MatchUpSlaves(masterKey *InstanceKey, pattern string) ([](*Instance), *Instance, error, []error) { +// MatchUpSubordinates will move all subordinates of given main up the replication chain, +// so that they become siblings of their main. +// This should be called when the local main dies, and all its subordinates are to be resurrected via Pseudo-GTID +func MatchUpSubordinates(mainKey *InstanceKey, pattern string) ([](*Instance), *Instance, error, []error) { res := [](*Instance){} errs := []error{} - masterInstance, found, err := ReadInstance(masterKey) + mainInstance, found, err := ReadInstance(mainKey) if err != nil || !found { return res, nil, err, errs } - return MultiMatchSlaves(masterKey, &masterInstance.MasterKey, pattern) + return MultiMatchSubordinates(mainKey, &mainInstance.MainKey, pattern) } -func isGenerallyValidAsBinlogSource(slave *Instance) bool { - if !slave.IsLastCheckValid { - // something wrong with this slave right now. We shouldn't hope to be able to promote it +func isGenerallyValidAsBinlogSource(subordinate *Instance) bool { + if !subordinate.IsLastCheckValid { + // something wrong with this subordinate right now. We shouldn't hope to be able to promote it return false } - if !slave.LogBinEnabled { + if !subordinate.LogBinEnabled { return false } - if !slave.LogSlaveUpdatesEnabled { + if !subordinate.LogSubordinateUpdatesEnabled { return false } return true } -func isGenerallyValidAsCandidateSlave(slave *Instance) bool { - if !isGenerallyValidAsBinlogSource(slave) { +func isGenerallyValidAsCandidateSubordinate(subordinate *Instance) bool { + if !isGenerallyValidAsBinlogSource(subordinate) { // does not have binary logs return false } - if slave.IsBinlogServer() { + if subordinate.IsBinlogServer() { // Can't regroup under a binlog server because it does not support pseudo-gtid related queries such as SHOW BINLOG EVENTS return false } @@ -1951,34 +1951,34 @@ func isGenerallyValidAsCandidateSlave(slave *Instance) bool { return true } -// isValidAsCandidateMasterInBinlogServerTopology let's us know whether a given slave is generally -// valid to promote to be master. -func isValidAsCandidateMasterInBinlogServerTopology(slave *Instance) bool { - if !slave.IsLastCheckValid { - // something wrong with this slave right now. We shouldn't hope to be able to promote it +// isValidAsCandidateMainInBinlogServerTopology let's us know whether a given subordinate is generally +// valid to promote to be main. +func isValidAsCandidateMainInBinlogServerTopology(subordinate *Instance) bool { + if !subordinate.IsLastCheckValid { + // something wrong with this subordinate right now. We shouldn't hope to be able to promote it return false } - if !slave.LogBinEnabled { + if !subordinate.LogBinEnabled { return false } - if slave.LogSlaveUpdatesEnabled { - // That's right: we *disallow* log-slave-updates + if subordinate.LogSubordinateUpdatesEnabled { + // That's right: we *disallow* log-subordinate-updates return false } - if slave.IsBinlogServer() { + if subordinate.IsBinlogServer() { return false } return true } -func isBannedFromBeingCandidateSlave(slave *Instance) bool { - if slave.PromotionRule == MustNotPromoteRule { - log.Debugf("instance %+v is banned because of promotion rule", slave.Key) +func isBannedFromBeingCandidateSubordinate(subordinate *Instance) bool { + if subordinate.PromotionRule == MustNotPromoteRule { + log.Debugf("instance %+v is banned because of promotion rule", subordinate.Key) return true } for _, filter := range config.Config.PromotionIgnoreHostnameFilters { - if matched, _ := regexp.MatchString(filter, slave.Key.Hostname); matched { + if matched, _ := regexp.MatchString(filter, subordinate.Key.Hostname); matched { return true } } @@ -1987,17 +1987,17 @@ func isBannedFromBeingCandidateSlave(slave *Instance) bool { // getPriorityMajorVersionForCandidate returns the primary (most common) major version found // among given instances. This will be used for choosing best candidate for promotion. -func getPriorityMajorVersionForCandidate(slaves [](*Instance)) (priorityMajorVersion string, err error) { - if len(slaves) == 0 { - return "", log.Errorf("empty slaves list in getPriorityMajorVersionForCandidate") +func getPriorityMajorVersionForCandidate(subordinates [](*Instance)) (priorityMajorVersion string, err error) { + if len(subordinates) == 0 { + return "", log.Errorf("empty subordinates list in getPriorityMajorVersionForCandidate") } majorVersionsCount := make(map[string]int) - for _, slave := range slaves { - majorVersionsCount[slave.MajorVersionString()] = majorVersionsCount[slave.MajorVersionString()] + 1 + for _, subordinate := range subordinates { + majorVersionsCount[subordinate.MajorVersionString()] = majorVersionsCount[subordinate.MajorVersionString()] + 1 } if len(majorVersionsCount) == 1 { // all same version, simple case - return slaves[0].MajorVersionString(), nil + return subordinates[0].MajorVersionString(), nil } currentMaxMajorVersionCount := 0 @@ -2012,17 +2012,17 @@ func getPriorityMajorVersionForCandidate(slaves [](*Instance)) (priorityMajorVer // getPriorityBinlogFormatForCandidate returns the primary (most common) binlog format found // among given instances. This will be used for choosing best candidate for promotion. -func getPriorityBinlogFormatForCandidate(slaves [](*Instance)) (priorityBinlogFormat string, err error) { - if len(slaves) == 0 { - return "", log.Errorf("empty slaves list in getPriorityBinlogFormatForCandidate") +func getPriorityBinlogFormatForCandidate(subordinates [](*Instance)) (priorityBinlogFormat string, err error) { + if len(subordinates) == 0 { + return "", log.Errorf("empty subordinates list in getPriorityBinlogFormatForCandidate") } binlogFormatsCount := make(map[string]int) - for _, slave := range slaves { - binlogFormatsCount[slave.Binlog_format] = binlogFormatsCount[slave.Binlog_format] + 1 + for _, subordinate := range subordinates { + binlogFormatsCount[subordinate.Binlog_format] = binlogFormatsCount[subordinate.Binlog_format] + 1 } if len(binlogFormatsCount) == 1 { // all same binlog format, simple case - return slaves[0].Binlog_format, nil + return subordinates[0].Binlog_format, nil } currentMaxBinlogFormatCount := 0 @@ -2035,178 +2035,178 @@ func getPriorityBinlogFormatForCandidate(slaves [](*Instance)) (priorityBinlogFo return priorityBinlogFormat, nil } -// chooseCandidateSlave -func chooseCandidateSlave(slaves [](*Instance)) (candidateSlave *Instance, aheadSlaves, equalSlaves, laterSlaves, cannotReplicateSlaves [](*Instance), err error) { - if len(slaves) == 0 { - return candidateSlave, aheadSlaves, equalSlaves, laterSlaves, cannotReplicateSlaves, fmt.Errorf("No slaves found given in chooseCandidateSlave") +// chooseCandidateSubordinate +func chooseCandidateSubordinate(subordinates [](*Instance)) (candidateSubordinate *Instance, aheadSubordinates, equalSubordinates, laterSubordinates, cannotReplicateSubordinates [](*Instance), err error) { + if len(subordinates) == 0 { + return candidateSubordinate, aheadSubordinates, equalSubordinates, laterSubordinates, cannotReplicateSubordinates, fmt.Errorf("No subordinates found given in chooseCandidateSubordinate") } - priorityMajorVersion, _ := getPriorityMajorVersionForCandidate(slaves) - priorityBinlogFormat, _ := getPriorityBinlogFormatForCandidate(slaves) + priorityMajorVersion, _ := getPriorityMajorVersionForCandidate(subordinates) + priorityBinlogFormat, _ := getPriorityBinlogFormatForCandidate(subordinates) - for _, slave := range slaves { - slave := slave - if isGenerallyValidAsCandidateSlave(slave) && - !isBannedFromBeingCandidateSlave(slave) && - !IsSmallerMajorVersion(priorityMajorVersion, slave.MajorVersionString()) && - !IsSmallerBinlogFormat(priorityBinlogFormat, slave.Binlog_format) { + for _, subordinate := range subordinates { + subordinate := subordinate + if isGenerallyValidAsCandidateSubordinate(subordinate) && + !isBannedFromBeingCandidateSubordinate(subordinate) && + !IsSmallerMajorVersion(priorityMajorVersion, subordinate.MajorVersionString()) && + !IsSmallerBinlogFormat(priorityBinlogFormat, subordinate.Binlog_format) { // this is the one - candidateSlave = slave + candidateSubordinate = subordinate break } } - if candidateSlave == nil { - // Unable to find a candidate that will master others. - // Instead, pick a (single) slave which is not banned. - for _, slave := range slaves { - slave := slave - if !isBannedFromBeingCandidateSlave(slave) { + if candidateSubordinate == nil { + // Unable to find a candidate that will main others. + // Instead, pick a (single) subordinate which is not banned. + for _, subordinate := range subordinates { + subordinate := subordinate + if !isBannedFromBeingCandidateSubordinate(subordinate) { // this is the one - candidateSlave = slave + candidateSubordinate = subordinate break } } - if candidateSlave != nil { - slaves = RemoveInstance(slaves, &candidateSlave.Key) + if candidateSubordinate != nil { + subordinates = RemoveInstance(subordinates, &candidateSubordinate.Key) } - return candidateSlave, slaves, equalSlaves, laterSlaves, cannotReplicateSlaves, fmt.Errorf("chooseCandidateSlave: no candidate slave found") - } - slaves = RemoveInstance(slaves, &candidateSlave.Key) - for _, slave := range slaves { - slave := slave - if canReplicate, _ := slave.CanReplicateFrom(candidateSlave); !canReplicate { - cannotReplicateSlaves = append(cannotReplicateSlaves, slave) - } else if slave.ExecBinlogCoordinates.SmallerThan(&candidateSlave.ExecBinlogCoordinates) { - laterSlaves = append(laterSlaves, slave) - } else if slave.ExecBinlogCoordinates.Equals(&candidateSlave.ExecBinlogCoordinates) { - equalSlaves = append(equalSlaves, slave) + return candidateSubordinate, subordinates, equalSubordinates, laterSubordinates, cannotReplicateSubordinates, fmt.Errorf("chooseCandidateSubordinate: no candidate subordinate found") + } + subordinates = RemoveInstance(subordinates, &candidateSubordinate.Key) + for _, subordinate := range subordinates { + subordinate := subordinate + if canReplicate, _ := subordinate.CanReplicateFrom(candidateSubordinate); !canReplicate { + cannotReplicateSubordinates = append(cannotReplicateSubordinates, subordinate) + } else if subordinate.ExecBinlogCoordinates.SmallerThan(&candidateSubordinate.ExecBinlogCoordinates) { + laterSubordinates = append(laterSubordinates, subordinate) + } else if subordinate.ExecBinlogCoordinates.Equals(&candidateSubordinate.ExecBinlogCoordinates) { + equalSubordinates = append(equalSubordinates, subordinate) } else { - aheadSlaves = append(aheadSlaves, slave) + aheadSubordinates = append(aheadSubordinates, subordinate) } } - return candidateSlave, aheadSlaves, equalSlaves, laterSlaves, cannotReplicateSlaves, err + return candidateSubordinate, aheadSubordinates, equalSubordinates, laterSubordinates, cannotReplicateSubordinates, err } -// GetCandidateSlave chooses the best slave to promote given a (possibly dead) master -func GetCandidateSlave(masterKey *InstanceKey, forRematchPurposes bool) (*Instance, [](*Instance), [](*Instance), [](*Instance), [](*Instance), error) { - var candidateSlave *Instance - aheadSlaves := [](*Instance){} - equalSlaves := [](*Instance){} - laterSlaves := [](*Instance){} - cannotReplicateSlaves := [](*Instance){} +// GetCandidateSubordinate chooses the best subordinate to promote given a (possibly dead) main +func GetCandidateSubordinate(mainKey *InstanceKey, forRematchPurposes bool) (*Instance, [](*Instance), [](*Instance), [](*Instance), [](*Instance), error) { + var candidateSubordinate *Instance + aheadSubordinates := [](*Instance){} + equalSubordinates := [](*Instance){} + laterSubordinates := [](*Instance){} + cannotReplicateSubordinates := [](*Instance){} - slaves, err := getSlavesForSorting(masterKey, false) + subordinates, err := getSubordinatesForSorting(mainKey, false) if err != nil { - return candidateSlave, aheadSlaves, equalSlaves, laterSlaves, cannotReplicateSlaves, err + return candidateSubordinate, aheadSubordinates, equalSubordinates, laterSubordinates, cannotReplicateSubordinates, err } - slaves = sortedSlaves(slaves, forRematchPurposes) + subordinates = sortedSubordinates(subordinates, forRematchPurposes) if err != nil { - return candidateSlave, aheadSlaves, equalSlaves, laterSlaves, cannotReplicateSlaves, err + return candidateSubordinate, aheadSubordinates, equalSubordinates, laterSubordinates, cannotReplicateSubordinates, err } - if len(slaves) == 0 { - return candidateSlave, aheadSlaves, equalSlaves, laterSlaves, cannotReplicateSlaves, fmt.Errorf("No slaves found for %+v", *masterKey) + if len(subordinates) == 0 { + return candidateSubordinate, aheadSubordinates, equalSubordinates, laterSubordinates, cannotReplicateSubordinates, fmt.Errorf("No subordinates found for %+v", *mainKey) } - candidateSlave, aheadSlaves, equalSlaves, laterSlaves, cannotReplicateSlaves, err = chooseCandidateSlave(slaves) + candidateSubordinate, aheadSubordinates, equalSubordinates, laterSubordinates, cannotReplicateSubordinates, err = chooseCandidateSubordinate(subordinates) if err != nil { - return candidateSlave, aheadSlaves, equalSlaves, laterSlaves, cannotReplicateSlaves, err + return candidateSubordinate, aheadSubordinates, equalSubordinates, laterSubordinates, cannotReplicateSubordinates, err } - log.Debugf("GetCandidateSlave: candidate: %+v, ahead: %d, equal: %d, late: %d, break: %d", candidateSlave.Key, len(aheadSlaves), len(equalSlaves), len(laterSlaves), len(cannotReplicateSlaves)) - return candidateSlave, aheadSlaves, equalSlaves, laterSlaves, cannotReplicateSlaves, nil + log.Debugf("GetCandidateSubordinate: candidate: %+v, ahead: %d, equal: %d, late: %d, break: %d", candidateSubordinate.Key, len(aheadSubordinates), len(equalSubordinates), len(laterSubordinates), len(cannotReplicateSubordinates)) + return candidateSubordinate, aheadSubordinates, equalSubordinates, laterSubordinates, cannotReplicateSubordinates, nil } -// GetCandidateSlaveOfBinlogServerTopology chooses the best slave to promote given a (possibly dead) master -func GetCandidateSlaveOfBinlogServerTopology(masterKey *InstanceKey) (candidateSlave *Instance, err error) { - slaves, err := getSlavesForSorting(masterKey, true) +// GetCandidateSubordinateOfBinlogServerTopology chooses the best subordinate to promote given a (possibly dead) main +func GetCandidateSubordinateOfBinlogServerTopology(mainKey *InstanceKey) (candidateSubordinate *Instance, err error) { + subordinates, err := getSubordinatesForSorting(mainKey, true) if err != nil { - return candidateSlave, err + return candidateSubordinate, err } - slaves = sortedSlaves(slaves, false) - if len(slaves) == 0 { - return candidateSlave, fmt.Errorf("No slaves found for %+v", *masterKey) + subordinates = sortedSubordinates(subordinates, false) + if len(subordinates) == 0 { + return candidateSubordinate, fmt.Errorf("No subordinates found for %+v", *mainKey) } - for _, slave := range slaves { - slave := slave - if candidateSlave != nil { + for _, subordinate := range subordinates { + subordinate := subordinate + if candidateSubordinate != nil { break } - if isValidAsCandidateMasterInBinlogServerTopology(slave) && !isBannedFromBeingCandidateSlave(slave) { + if isValidAsCandidateMainInBinlogServerTopology(subordinate) && !isBannedFromBeingCandidateSubordinate(subordinate) { // this is the one - candidateSlave = slave + candidateSubordinate = subordinate } } - if candidateSlave != nil { - log.Debugf("GetCandidateSlaveOfBinlogServerTopology: returning %+v as candidate slave for %+v", candidateSlave.Key, *masterKey) + if candidateSubordinate != nil { + log.Debugf("GetCandidateSubordinateOfBinlogServerTopology: returning %+v as candidate subordinate for %+v", candidateSubordinate.Key, *mainKey) } else { - log.Debugf("GetCandidateSlaveOfBinlogServerTopology: no candidate slave found for %+v", *masterKey) + log.Debugf("GetCandidateSubordinateOfBinlogServerTopology: no candidate subordinate found for %+v", *mainKey) } - return candidateSlave, err + return candidateSubordinate, err } -// RegroupSlavesPseudoGTID will choose a candidate slave of a given instance, and enslave its siblings using pseudo-gtid -func RegroupSlavesPseudoGTID(masterKey *InstanceKey, returnSlaveEvenOnFailureToRegroup bool, onCandidateSlaveChosen func(*Instance), postponedFunctionsContainer *PostponedFunctionsContainer) ([](*Instance), [](*Instance), [](*Instance), [](*Instance), *Instance, error) { - candidateSlave, aheadSlaves, equalSlaves, laterSlaves, cannotReplicateSlaves, err := GetCandidateSlave(masterKey, true) +// RegroupSubordinatesPseudoGTID will choose a candidate subordinate of a given instance, and ensubordinate its siblings using pseudo-gtid +func RegroupSubordinatesPseudoGTID(mainKey *InstanceKey, returnSubordinateEvenOnFailureToRegroup bool, onCandidateSubordinateChosen func(*Instance), postponedFunctionsContainer *PostponedFunctionsContainer) ([](*Instance), [](*Instance), [](*Instance), [](*Instance), *Instance, error) { + candidateSubordinate, aheadSubordinates, equalSubordinates, laterSubordinates, cannotReplicateSubordinates, err := GetCandidateSubordinate(mainKey, true) if err != nil { - if !returnSlaveEvenOnFailureToRegroup { - candidateSlave = nil + if !returnSubordinateEvenOnFailureToRegroup { + candidateSubordinate = nil } - return aheadSlaves, equalSlaves, laterSlaves, cannotReplicateSlaves, candidateSlave, err + return aheadSubordinates, equalSubordinates, laterSubordinates, cannotReplicateSubordinates, candidateSubordinate, err } if config.Config.PseudoGTIDPattern == "" { - return aheadSlaves, equalSlaves, laterSlaves, cannotReplicateSlaves, candidateSlave, fmt.Errorf("PseudoGTIDPattern not configured; cannot use Pseudo-GTID") + return aheadSubordinates, equalSubordinates, laterSubordinates, cannotReplicateSubordinates, candidateSubordinate, fmt.Errorf("PseudoGTIDPattern not configured; cannot use Pseudo-GTID") } - if onCandidateSlaveChosen != nil { - onCandidateSlaveChosen(candidateSlave) + if onCandidateSubordinateChosen != nil { + onCandidateSubordinateChosen(candidateSubordinate) } - log.Debugf("RegroupSlaves: working on %d equals slaves", len(equalSlaves)) + log.Debugf("RegroupSubordinates: working on %d equals subordinates", len(equalSubordinates)) barrier := make(chan *InstanceKey) - for _, slave := range equalSlaves { - slave := slave - // This slave has the exact same executing coordinates as the candidate slave. This slave - // is *extremely* easy to attach below the candidate slave! + for _, subordinate := range equalSubordinates { + subordinate := subordinate + // This subordinate has the exact same executing coordinates as the candidate subordinate. This subordinate + // is *extremely* easy to attach below the candidate subordinate! go func() { - defer func() { barrier <- &candidateSlave.Key }() + defer func() { barrier <- &candidateSubordinate.Key }() ExecuteOnTopology(func() { - ChangeMasterTo(&slave.Key, &candidateSlave.Key, &candidateSlave.SelfBinlogCoordinates, false, GTIDHintDeny) + ChangeMainTo(&subordinate.Key, &candidateSubordinate.Key, &candidateSubordinate.SelfBinlogCoordinates, false, GTIDHintDeny) }) }() } - for range equalSlaves { + for range equalSubordinates { <-barrier } - log.Debugf("RegroupSlaves: multi matching %d later slaves", len(laterSlaves)) - // As for the laterSlaves, we'll have to apply pseudo GTID - laterSlaves, instance, err, _ := MultiMatchBelow(laterSlaves, &candidateSlave.Key, true, postponedFunctionsContainer) + log.Debugf("RegroupSubordinates: multi matching %d later subordinates", len(laterSubordinates)) + // As for the laterSubordinates, we'll have to apply pseudo GTID + laterSubordinates, instance, err, _ := MultiMatchBelow(laterSubordinates, &candidateSubordinate.Key, true, postponedFunctionsContainer) - operatedSlaves := append(equalSlaves, candidateSlave) - operatedSlaves = append(operatedSlaves, laterSlaves...) - log.Debugf("RegroupSlaves: starting %d slaves", len(operatedSlaves)) + operatedSubordinates := append(equalSubordinates, candidateSubordinate) + operatedSubordinates = append(operatedSubordinates, laterSubordinates...) + log.Debugf("RegroupSubordinates: starting %d subordinates", len(operatedSubordinates)) barrier = make(chan *InstanceKey) - for _, slave := range operatedSlaves { - slave := slave + for _, subordinate := range operatedSubordinates { + subordinate := subordinate go func() { - defer func() { barrier <- &candidateSlave.Key }() + defer func() { barrier <- &candidateSubordinate.Key }() ExecuteOnTopology(func() { - StartSlave(&slave.Key) + StartSubordinate(&subordinate.Key) }) }() } - for range operatedSlaves { + for range operatedSubordinates { <-barrier } - log.Debugf("RegroupSlaves: done") - AuditOperation("regroup-slaves", masterKey, fmt.Sprintf("regrouped %+v slaves below %+v", len(operatedSlaves), *masterKey)) - // aheadSlaves are lost (they were ahead in replication as compared to promoted slave) - return aheadSlaves, equalSlaves, laterSlaves, cannotReplicateSlaves, instance, err + log.Debugf("RegroupSubordinates: done") + AuditOperation("regroup-subordinates", mainKey, fmt.Sprintf("regrouped %+v subordinates below %+v", len(operatedSubordinates), *mainKey)) + // aheadSubordinates are lost (they were ahead in replication as compared to promoted subordinate) + return aheadSubordinates, equalSubordinates, laterSubordinates, cannotReplicateSubordinates, instance, err } -func getMostUpToDateActiveBinlogServer(masterKey *InstanceKey) (mostAdvancedBinlogServer *Instance, binlogServerSlaves [](*Instance), err error) { - if binlogServerSlaves, err = ReadBinlogServerSlaveInstances(masterKey); err == nil && len(binlogServerSlaves) > 0 { +func getMostUpToDateActiveBinlogServer(mainKey *InstanceKey) (mostAdvancedBinlogServer *Instance, binlogServerSubordinates [](*Instance), err error) { + if binlogServerSubordinates, err = ReadBinlogServerSubordinateInstances(mainKey); err == nil && len(binlogServerSubordinates) > 0 { // Pick the most advanced binlog sever that is good to go - for _, binlogServer := range binlogServerSlaves { + for _, binlogServer := range binlogServerSubordinates { if binlogServer.IsLastCheckValid { if mostAdvancedBinlogServer == nil { mostAdvancedBinlogServer = binlogServer @@ -2217,116 +2217,116 @@ func getMostUpToDateActiveBinlogServer(masterKey *InstanceKey) (mostAdvancedBinl } } } - return mostAdvancedBinlogServer, binlogServerSlaves, err + return mostAdvancedBinlogServer, binlogServerSubordinates, err } -// RegroupSlavesPseudoGTIDIncludingSubSlavesOfBinlogServers uses Pseugo-GTID to regroup slaves -// of given instance. The function also drill in to slaves of binlog servers that are replicating from given instance, +// RegroupSubordinatesPseudoGTIDIncludingSubSubordinatesOfBinlogServers uses Pseugo-GTID to regroup subordinates +// of given instance. The function also drill in to subordinates of binlog servers that are replicating from given instance, // and other recursive binlog servers, as long as they're in the same binlog-server-family. -func RegroupSlavesPseudoGTIDIncludingSubSlavesOfBinlogServers(masterKey *InstanceKey, returnSlaveEvenOnFailureToRegroup bool, onCandidateSlaveChosen func(*Instance), postponedFunctionsContainer *PostponedFunctionsContainer) ([](*Instance), [](*Instance), [](*Instance), [](*Instance), *Instance, error) { +func RegroupSubordinatesPseudoGTIDIncludingSubSubordinatesOfBinlogServers(mainKey *InstanceKey, returnSubordinateEvenOnFailureToRegroup bool, onCandidateSubordinateChosen func(*Instance), postponedFunctionsContainer *PostponedFunctionsContainer) ([](*Instance), [](*Instance), [](*Instance), [](*Instance), *Instance, error) { // First, handle binlog server issues: func() error { - log.Debugf("RegroupSlavesIncludingSubSlavesOfBinlogServers: starting on slaves of %+v", *masterKey) + log.Debugf("RegroupSubordinatesIncludingSubSubordinatesOfBinlogServers: starting on subordinates of %+v", *mainKey) // Find the most up to date binlog server: - mostUpToDateBinlogServer, binlogServerSlaves, err := getMostUpToDateActiveBinlogServer(masterKey) + mostUpToDateBinlogServer, binlogServerSubordinates, err := getMostUpToDateActiveBinlogServer(mainKey) if err != nil { return log.Errore(err) } if mostUpToDateBinlogServer == nil { - log.Debugf("RegroupSlavesIncludingSubSlavesOfBinlogServers: no binlog server replicates from %+v", *masterKey) + log.Debugf("RegroupSubordinatesIncludingSubSubordinatesOfBinlogServers: no binlog server replicates from %+v", *mainKey) // No binlog server; proceed as normal return nil } - log.Debugf("RegroupSlavesIncludingSubSlavesOfBinlogServers: most up to date binlog server of %+v: %+v", *masterKey, mostUpToDateBinlogServer.Key) + log.Debugf("RegroupSubordinatesIncludingSubSubordinatesOfBinlogServers: most up to date binlog server of %+v: %+v", *mainKey, mostUpToDateBinlogServer.Key) - // Find the most up to date candidate slave: - candidateSlave, _, _, _, _, err := GetCandidateSlave(masterKey, true) + // Find the most up to date candidate subordinate: + candidateSubordinate, _, _, _, _, err := GetCandidateSubordinate(mainKey, true) if err != nil { return log.Errore(err) } - if candidateSlave == nil { - log.Debugf("RegroupSlavesIncludingSubSlavesOfBinlogServers: no candidate slave for %+v", *masterKey) + if candidateSubordinate == nil { + log.Debugf("RegroupSubordinatesIncludingSubSubordinatesOfBinlogServers: no candidate subordinate for %+v", *mainKey) // Let the followup code handle that return nil } - log.Debugf("RegroupSlavesIncludingSubSlavesOfBinlogServers: candidate slave of %+v: %+v", *masterKey, candidateSlave.Key) + log.Debugf("RegroupSubordinatesIncludingSubSubordinatesOfBinlogServers: candidate subordinate of %+v: %+v", *mainKey, candidateSubordinate.Key) - if candidateSlave.ExecBinlogCoordinates.SmallerThan(&mostUpToDateBinlogServer.ExecBinlogCoordinates) { - log.Debugf("RegroupSlavesIncludingSubSlavesOfBinlogServers: candidate slave %+v coordinates smaller than binlog server %+v", candidateSlave.Key, mostUpToDateBinlogServer.Key) + if candidateSubordinate.ExecBinlogCoordinates.SmallerThan(&mostUpToDateBinlogServer.ExecBinlogCoordinates) { + log.Debugf("RegroupSubordinatesIncludingSubSubordinatesOfBinlogServers: candidate subordinate %+v coordinates smaller than binlog server %+v", candidateSubordinate.Key, mostUpToDateBinlogServer.Key) // Need to align under binlog server... - candidateSlave, err = Repoint(&candidateSlave.Key, &mostUpToDateBinlogServer.Key, GTIDHintDeny) + candidateSubordinate, err = Repoint(&candidateSubordinate.Key, &mostUpToDateBinlogServer.Key, GTIDHintDeny) if err != nil { return log.Errore(err) } - log.Debugf("RegroupSlavesIncludingSubSlavesOfBinlogServers: repointed candidate slave %+v under binlog server %+v", candidateSlave.Key, mostUpToDateBinlogServer.Key) - candidateSlave, err = StartSlaveUntilMasterCoordinates(&candidateSlave.Key, &mostUpToDateBinlogServer.ExecBinlogCoordinates) + log.Debugf("RegroupSubordinatesIncludingSubSubordinatesOfBinlogServers: repointed candidate subordinate %+v under binlog server %+v", candidateSubordinate.Key, mostUpToDateBinlogServer.Key) + candidateSubordinate, err = StartSubordinateUntilMainCoordinates(&candidateSubordinate.Key, &mostUpToDateBinlogServer.ExecBinlogCoordinates) if err != nil { return log.Errore(err) } - log.Debugf("RegroupSlavesIncludingSubSlavesOfBinlogServers: aligned candidate slave %+v under binlog server %+v", candidateSlave.Key, mostUpToDateBinlogServer.Key) + log.Debugf("RegroupSubordinatesIncludingSubSubordinatesOfBinlogServers: aligned candidate subordinate %+v under binlog server %+v", candidateSubordinate.Key, mostUpToDateBinlogServer.Key) // and move back - candidateSlave, err = Repoint(&candidateSlave.Key, masterKey, GTIDHintDeny) + candidateSubordinate, err = Repoint(&candidateSubordinate.Key, mainKey, GTIDHintDeny) if err != nil { return log.Errore(err) } - log.Debugf("RegroupSlavesIncludingSubSlavesOfBinlogServers: repointed candidate slave %+v under master %+v", candidateSlave.Key, *masterKey) + log.Debugf("RegroupSubordinatesIncludingSubSubordinatesOfBinlogServers: repointed candidate subordinate %+v under main %+v", candidateSubordinate.Key, *mainKey) return nil } // Either because it _was_ like that, or we _made_ it so, - // candidate slave is as/more up to date than all binlog servers - for _, binlogServer := range binlogServerSlaves { - log.Debugf("RegroupSlavesIncludingSubSlavesOfBinlogServers: matching slaves of binlog server %+v below %+v", binlogServer.Key, candidateSlave.Key) + // candidate subordinate is as/more up to date than all binlog servers + for _, binlogServer := range binlogServerSubordinates { + log.Debugf("RegroupSubordinatesIncludingSubSubordinatesOfBinlogServers: matching subordinates of binlog server %+v below %+v", binlogServer.Key, candidateSubordinate.Key) // Right now sequentially. // At this point just do what you can, don't return an error - MultiMatchSlaves(&binlogServer.Key, &candidateSlave.Key, "") - log.Debugf("RegroupSlavesIncludingSubSlavesOfBinlogServers: done matching slaves of binlog server %+v below %+v", binlogServer.Key, candidateSlave.Key) + MultiMatchSubordinates(&binlogServer.Key, &candidateSubordinate.Key, "") + log.Debugf("RegroupSubordinatesIncludingSubSubordinatesOfBinlogServers: done matching subordinates of binlog server %+v below %+v", binlogServer.Key, candidateSubordinate.Key) } - log.Debugf("RegroupSlavesIncludingSubSlavesOfBinlogServers: done handling binlog regrouping for %+v; will proceed with normal RegroupSlaves", *masterKey) - AuditOperation("regroup-slaves-including-bls", masterKey, fmt.Sprintf("matched slaves of binlog server slaves of %+v under %+v", *masterKey, candidateSlave.Key)) + log.Debugf("RegroupSubordinatesIncludingSubSubordinatesOfBinlogServers: done handling binlog regrouping for %+v; will proceed with normal RegroupSubordinates", *mainKey) + AuditOperation("regroup-subordinates-including-bls", mainKey, fmt.Sprintf("matched subordinates of binlog server subordinates of %+v under %+v", *mainKey, candidateSubordinate.Key)) return nil }() // Proceed to normal regroup: - return RegroupSlavesPseudoGTID(masterKey, returnSlaveEvenOnFailureToRegroup, onCandidateSlaveChosen, postponedFunctionsContainer) + return RegroupSubordinatesPseudoGTID(mainKey, returnSubordinateEvenOnFailureToRegroup, onCandidateSubordinateChosen, postponedFunctionsContainer) } -// RegroupSlavesGTID will choose a candidate slave of a given instance, and enslave its siblings using GTID -func RegroupSlavesGTID(masterKey *InstanceKey, returnSlaveEvenOnFailureToRegroup bool, onCandidateSlaveChosen func(*Instance)) ([](*Instance), [](*Instance), [](*Instance), *Instance, error) { - var emptySlaves [](*Instance) - candidateSlave, aheadSlaves, equalSlaves, laterSlaves, cannotReplicateSlaves, err := GetCandidateSlave(masterKey, true) +// RegroupSubordinatesGTID will choose a candidate subordinate of a given instance, and ensubordinate its siblings using GTID +func RegroupSubordinatesGTID(mainKey *InstanceKey, returnSubordinateEvenOnFailureToRegroup bool, onCandidateSubordinateChosen func(*Instance)) ([](*Instance), [](*Instance), [](*Instance), *Instance, error) { + var emptySubordinates [](*Instance) + candidateSubordinate, aheadSubordinates, equalSubordinates, laterSubordinates, cannotReplicateSubordinates, err := GetCandidateSubordinate(mainKey, true) if err != nil { - if !returnSlaveEvenOnFailureToRegroup { - candidateSlave = nil + if !returnSubordinateEvenOnFailureToRegroup { + candidateSubordinate = nil } - return emptySlaves, emptySlaves, emptySlaves, candidateSlave, err + return emptySubordinates, emptySubordinates, emptySubordinates, candidateSubordinate, err } - if onCandidateSlaveChosen != nil { - onCandidateSlaveChosen(candidateSlave) + if onCandidateSubordinateChosen != nil { + onCandidateSubordinateChosen(candidateSubordinate) } - slavesToMove := append(equalSlaves, laterSlaves...) - log.Debugf("RegroupSlavesGTID: working on %d slaves", len(slavesToMove)) + subordinatesToMove := append(equalSubordinates, laterSubordinates...) + log.Debugf("RegroupSubordinatesGTID: working on %d subordinates", len(subordinatesToMove)) - movedSlaves, unmovedSlaves, err, _ := moveSlavesViaGTID(slavesToMove, candidateSlave) + movedSubordinates, unmovedSubordinates, err, _ := moveSubordinatesViaGTID(subordinatesToMove, candidateSubordinate) if err != nil { log.Errore(err) } - unmovedSlaves = append(unmovedSlaves, aheadSlaves...) - StartSlave(&candidateSlave.Key) + unmovedSubordinates = append(unmovedSubordinates, aheadSubordinates...) + StartSubordinate(&candidateSubordinate.Key) - log.Debugf("RegroupSlavesGTID: done") - AuditOperation("regroup-slaves-gtid", masterKey, fmt.Sprintf("regrouped slaves of %+v via GTID; promoted %+v", *masterKey, candidateSlave.Key)) - return unmovedSlaves, movedSlaves, cannotReplicateSlaves, candidateSlave, err + log.Debugf("RegroupSubordinatesGTID: done") + AuditOperation("regroup-subordinates-gtid", mainKey, fmt.Sprintf("regrouped subordinates of %+v via GTID; promoted %+v", *mainKey, candidateSubordinate.Key)) + return unmovedSubordinates, movedSubordinates, cannotReplicateSubordinates, candidateSubordinate, err } -// RegroupSlavesBinlogServers works on a binlog-servers topology. It picks the most up-to-date BLS and repoints all other +// RegroupSubordinatesBinlogServers works on a binlog-servers topology. It picks the most up-to-date BLS and repoints all other // BLS below it -func RegroupSlavesBinlogServers(masterKey *InstanceKey, returnSlaveEvenOnFailureToRegroup bool) (repointedBinlogServers [](*Instance), promotedBinlogServer *Instance, err error) { - var binlogServerSlaves [](*Instance) - promotedBinlogServer, binlogServerSlaves, err = getMostUpToDateActiveBinlogServer(masterKey) +func RegroupSubordinatesBinlogServers(mainKey *InstanceKey, returnSubordinateEvenOnFailureToRegroup bool) (repointedBinlogServers [](*Instance), promotedBinlogServer *Instance, err error) { + var binlogServerSubordinates [](*Instance) + promotedBinlogServer, binlogServerSubordinates, err = getMostUpToDateActiveBinlogServer(mainKey) resultOnError := func(err error) ([](*Instance), *Instance, error) { - if !returnSlaveEvenOnFailureToRegroup { + if !returnSubordinateEvenOnFailureToRegroup { promotedBinlogServer = nil } return repointedBinlogServers, promotedBinlogServer, err @@ -2336,65 +2336,65 @@ func RegroupSlavesBinlogServers(masterKey *InstanceKey, returnSlaveEvenOnFailure return resultOnError(err) } - repointedBinlogServers, err, _ = RepointTo(binlogServerSlaves, &promotedBinlogServer.Key) + repointedBinlogServers, err, _ = RepointTo(binlogServerSubordinates, &promotedBinlogServer.Key) if err != nil { return resultOnError(err) } - AuditOperation("regroup-slaves-bls", masterKey, fmt.Sprintf("regrouped binlog server slaves of %+v; promoted %+v", *masterKey, promotedBinlogServer.Key)) + AuditOperation("regroup-subordinates-bls", mainKey, fmt.Sprintf("regrouped binlog server subordinates of %+v; promoted %+v", *mainKey, promotedBinlogServer.Key)) return repointedBinlogServers, promotedBinlogServer, nil } -// RegroupSlaves is a "smart" method of promoting one slave over the others ("promoting" it on top of its siblings) +// RegroupSubordinates is a "smart" method of promoting one subordinate over the others ("promoting" it on top of its siblings) // This method decides which strategy to use: GTID, Pseudo-GTID, Binlog Servers. -func RegroupSlaves(masterKey *InstanceKey, returnSlaveEvenOnFailureToRegroup bool, - onCandidateSlaveChosen func(*Instance), +func RegroupSubordinates(mainKey *InstanceKey, returnSubordinateEvenOnFailureToRegroup bool, + onCandidateSubordinateChosen func(*Instance), postponedFunctionsContainer *PostponedFunctionsContainer) ( - aheadSlaves [](*Instance), equalSlaves [](*Instance), laterSlaves [](*Instance), cannotReplicateSlaves [](*Instance), instance *Instance, err error) { + aheadSubordinates [](*Instance), equalSubordinates [](*Instance), laterSubordinates [](*Instance), cannotReplicateSubordinates [](*Instance), instance *Instance, err error) { // - var emptySlaves [](*Instance) + var emptySubordinates [](*Instance) - slaves, err := ReadSlaveInstances(masterKey) + subordinates, err := ReadSubordinateInstances(mainKey) if err != nil { - return emptySlaves, emptySlaves, emptySlaves, emptySlaves, instance, err + return emptySubordinates, emptySubordinates, emptySubordinates, emptySubordinates, instance, err } - if len(slaves) == 0 { - return emptySlaves, emptySlaves, emptySlaves, emptySlaves, instance, err + if len(subordinates) == 0 { + return emptySubordinates, emptySubordinates, emptySubordinates, emptySubordinates, instance, err } - if len(slaves) == 1 { - return emptySlaves, emptySlaves, emptySlaves, emptySlaves, slaves[0], err + if len(subordinates) == 1 { + return emptySubordinates, emptySubordinates, emptySubordinates, emptySubordinates, subordinates[0], err } allGTID := true allBinlogServers := true allPseudoGTID := true - for _, slave := range slaves { - if !slave.UsingGTID() { + for _, subordinate := range subordinates { + if !subordinate.UsingGTID() { allGTID = false } - if !slave.IsBinlogServer() { + if !subordinate.IsBinlogServer() { allBinlogServers = false } - if !slave.UsingPseudoGTID { + if !subordinate.UsingPseudoGTID { allPseudoGTID = false } } if allGTID { - log.Debugf("RegroupSlaves: using GTID to regroup slaves of %+v", *masterKey) - unmovedSlaves, movedSlaves, cannotReplicateSlaves, candidateSlave, err := RegroupSlavesGTID(masterKey, returnSlaveEvenOnFailureToRegroup, onCandidateSlaveChosen) - return unmovedSlaves, emptySlaves, movedSlaves, cannotReplicateSlaves, candidateSlave, err + log.Debugf("RegroupSubordinates: using GTID to regroup subordinates of %+v", *mainKey) + unmovedSubordinates, movedSubordinates, cannotReplicateSubordinates, candidateSubordinate, err := RegroupSubordinatesGTID(mainKey, returnSubordinateEvenOnFailureToRegroup, onCandidateSubordinateChosen) + return unmovedSubordinates, emptySubordinates, movedSubordinates, cannotReplicateSubordinates, candidateSubordinate, err } if allBinlogServers { - log.Debugf("RegroupSlaves: using binlog servers to regroup slaves of %+v", *masterKey) - movedSlaves, candidateSlave, err := RegroupSlavesBinlogServers(masterKey, returnSlaveEvenOnFailureToRegroup) - return emptySlaves, emptySlaves, movedSlaves, cannotReplicateSlaves, candidateSlave, err + log.Debugf("RegroupSubordinates: using binlog servers to regroup subordinates of %+v", *mainKey) + movedSubordinates, candidateSubordinate, err := RegroupSubordinatesBinlogServers(mainKey, returnSubordinateEvenOnFailureToRegroup) + return emptySubordinates, emptySubordinates, movedSubordinates, cannotReplicateSubordinates, candidateSubordinate, err } if allPseudoGTID { - log.Debugf("RegroupSlaves: using Pseudo-GTID to regroup slaves of %+v", *masterKey) - return RegroupSlavesPseudoGTID(masterKey, returnSlaveEvenOnFailureToRegroup, onCandidateSlaveChosen, postponedFunctionsContainer) + log.Debugf("RegroupSubordinates: using Pseudo-GTID to regroup subordinates of %+v", *mainKey) + return RegroupSubordinatesPseudoGTID(mainKey, returnSubordinateEvenOnFailureToRegroup, onCandidateSubordinateChosen, postponedFunctionsContainer) } // And, as last resort, we do PseudoGTID & binlog servers - log.Warningf("RegroupSlaves: unsure what method to invoke for %+v; trying Pseudo-GTID+Binlog Servers", *masterKey) - return RegroupSlavesPseudoGTIDIncludingSubSlavesOfBinlogServers(masterKey, returnSlaveEvenOnFailureToRegroup, onCandidateSlaveChosen, postponedFunctionsContainer) + log.Warningf("RegroupSubordinates: unsure what method to invoke for %+v; trying Pseudo-GTID+Binlog Servers", *mainKey) + return RegroupSubordinatesPseudoGTIDIncludingSubSubordinatesOfBinlogServers(mainKey, returnSubordinateEvenOnFailureToRegroup, onCandidateSubordinateChosen, postponedFunctionsContainer) } // relocateBelowInternal is a protentially recursive function which chooses how to relocate an instance below another. @@ -2405,7 +2405,7 @@ func relocateBelowInternal(instance, other *Instance) (*Instance, error) { return instance, log.Errorf("%+v cannot replicate from %+v. Reason: %+v", instance.Key, other.Key, err) } // simplest: - if InstanceIsMasterOf(other, instance) { + if InstanceIsMainOf(other, instance) { // already the desired setup. return Repoint(&instance.Key, &other.Key, GTIDHintNeutral) } @@ -2419,41 +2419,41 @@ func relocateBelowInternal(instance, other *Instance) (*Instance, error) { if InstancesAreSiblings(instance, other) && other.IsBinlogServer() { return MoveBelow(&instance.Key, &other.Key) } - instanceMaster, _, err := ReadInstance(&instance.MasterKey) + instanceMain, _, err := ReadInstance(&instance.MainKey) if err != nil { return instance, err } - if instanceMaster != nil && instanceMaster.MasterKey.Equals(&other.Key) && instanceMaster.IsBinlogServer() { + if instanceMain != nil && instanceMain.MainKey.Equals(&other.Key) && instanceMain.IsBinlogServer() { // Moving to grandparent via binlog server - return Repoint(&instance.Key, &instanceMaster.MasterKey, GTIDHintDeny) + return Repoint(&instance.Key, &instanceMain.MainKey, GTIDHintDeny) } if other.IsBinlogServer() { - if instanceMaster != nil && instanceMaster.IsBinlogServer() && InstancesAreSiblings(instanceMaster, other) { + if instanceMain != nil && instanceMain.IsBinlogServer() && InstancesAreSiblings(instanceMain, other) { // Special case: this is a binlog server family; we move under the uncle, in one single step return Repoint(&instance.Key, &other.Key, GTIDHintDeny) } - // Relocate to its master, then repoint to the binlog server - otherMaster, found, err := ReadInstance(&other.MasterKey) + // Relocate to its main, then repoint to the binlog server + otherMain, found, err := ReadInstance(&other.MainKey) if err != nil { return instance, err } if !found { - return instance, log.Errorf("Cannot find master %+v", other.MasterKey) + return instance, log.Errorf("Cannot find main %+v", other.MainKey) } if !other.IsLastCheckValid { return instance, log.Errorf("Binlog server %+v is not reachable. It would take two steps to relocate %+v below it, and I won't even do the first step.", other.Key, instance.Key) } - log.Debugf("Relocating to a binlog server; will first attempt to relocate to the binlog server's master: %+v, and then repoint down", otherMaster.Key) - if _, err := relocateBelowInternal(instance, otherMaster); err != nil { + log.Debugf("Relocating to a binlog server; will first attempt to relocate to the binlog server's main: %+v, and then repoint down", otherMain.Key) + if _, err := relocateBelowInternal(instance, otherMain); err != nil { return instance, err } return Repoint(&instance.Key, &other.Key, GTIDHintDeny) } if instance.IsBinlogServer() { // Can only move within the binlog-server family tree - // And these have been covered just now: move up from a master binlog server, move below a binling binlog server. + // And these have been covered just now: move up from a main binlog server, move below a binling binlog server. // sure, the family can be more complex, but we keep these operations atomic return nil, log.Errorf("Relocating binlog server %+v below %+v turns to be too complex; please do it manually", instance.Key, other.Key) } @@ -2471,17 +2471,17 @@ func relocateBelowInternal(instance, other *Instance) (*Instance, error) { } // No Pseudo-GTID; cehck simple binlog file/pos operations: if InstancesAreSiblings(instance, other) { - // If comastering, only move below if it's read-only - if !other.IsCoMaster || other.ReadOnly { + // If comaining, only move below if it's read-only + if !other.IsCoMain || other.ReadOnly { return MoveBelow(&instance.Key, &other.Key) } } // See if we need to MoveUp - if instanceMaster != nil && instanceMaster.MasterKey.Equals(&other.Key) { - // Moving to grandparent--handles co-mastering writable case + if instanceMain != nil && instanceMain.MainKey.Equals(&other.Key) { + // Moving to grandparent--handles co-maining writable case return MoveUp(&instance.Key) } - if instanceMaster != nil && instanceMaster.IsBinlogServer() { + if instanceMain != nil && instanceMain.IsBinlogServer() { // Break operation into two: move (repoint) up, then continue if _, err := MoveUp(&instance.Key); err != nil { return instance, err @@ -2511,108 +2511,108 @@ func RelocateBelow(instanceKey, otherKey *InstanceKey) (*Instance, error) { return instance, err } -// relocateSlavesInternal is a protentially recursive function which chooses how to relocate -// slaves of an instance below another. +// relocateSubordinatesInternal is a protentially recursive function which chooses how to relocate +// subordinates of an instance below another. // It may choose to use Pseudo-GTID, or normal binlog positions, or take advantage of binlog servers, // or it may combine any of the above in a multi-step operation. -func relocateSlavesInternal(slaves [](*Instance), instance, other *Instance) ([](*Instance), error, []error) { +func relocateSubordinatesInternal(subordinates [](*Instance), instance, other *Instance) ([](*Instance), error, []error) { errs := []error{} var err error // simplest: if instance.Key.Equals(&other.Key) { // already the desired setup. - return RepointTo(slaves, &other.Key) + return RepointTo(subordinates, &other.Key) } // Try and take advantage of binlog servers: - if InstanceIsMasterOf(other, instance) && instance.IsBinlogServer() { + if InstanceIsMainOf(other, instance) && instance.IsBinlogServer() { // Up from a binlog server - return RepointTo(slaves, &other.Key) + return RepointTo(subordinates, &other.Key) } - if InstanceIsMasterOf(instance, other) && other.IsBinlogServer() { + if InstanceIsMainOf(instance, other) && other.IsBinlogServer() { // Down under a binlog server - return RepointTo(slaves, &other.Key) + return RepointTo(subordinates, &other.Key) } if InstancesAreSiblings(instance, other) && instance.IsBinlogServer() && other.IsBinlogServer() { // Between siblings - return RepointTo(slaves, &other.Key) + return RepointTo(subordinates, &other.Key) } if other.IsBinlogServer() { // Relocate to binlog server's parent (recursive call), then repoint down - otherMaster, found, err := ReadInstance(&other.MasterKey) + otherMain, found, err := ReadInstance(&other.MainKey) if err != nil || !found { return nil, err, errs } - slaves, err, errs = relocateSlavesInternal(slaves, instance, otherMaster) + subordinates, err, errs = relocateSubordinatesInternal(subordinates, instance, otherMain) if err != nil { - return slaves, err, errs + return subordinates, err, errs } - return RepointTo(slaves, &other.Key) + return RepointTo(subordinates, &other.Key) } // GTID { - movedSlaves, unmovedSlaves, err, errs := moveSlavesViaGTID(slaves, other) + movedSubordinates, unmovedSubordinates, err, errs := moveSubordinatesViaGTID(subordinates, other) - if len(movedSlaves) == len(slaves) { + if len(movedSubordinates) == len(subordinates) { // Moved (or tried moving) everything via GTID - return movedSlaves, err, errs - } else if len(movedSlaves) > 0 { + return movedSubordinates, err, errs + } else if len(movedSubordinates) > 0 { // something was moved via GTID; let's try further on - return relocateSlavesInternal(unmovedSlaves, instance, other) + return relocateSubordinatesInternal(unmovedSubordinates, instance, other) } // Otherwise nothing was moved via GTID. Maybe we don't have any GTIDs, we continue. } // Pseudo GTID if other.UsingPseudoGTID { - // Which slaves are using Pseudo GTID? - var pseudoGTIDSlaves [](*Instance) - for _, slave := range slaves { - if slave.UsingPseudoGTID { - pseudoGTIDSlaves = append(pseudoGTIDSlaves, slave) + // Which subordinates are using Pseudo GTID? + var pseudoGTIDSubordinates [](*Instance) + for _, subordinate := range subordinates { + if subordinate.UsingPseudoGTID { + pseudoGTIDSubordinates = append(pseudoGTIDSubordinates, subordinate) } } - pseudoGTIDSlaves, _, err, errs = MultiMatchBelow(pseudoGTIDSlaves, &other.Key, false, nil) - return pseudoGTIDSlaves, err, errs + pseudoGTIDSubordinates, _, err, errs = MultiMatchBelow(pseudoGTIDSubordinates, &other.Key, false, nil) + return pseudoGTIDSubordinates, err, errs } // Normal binlog file:pos - if InstanceIsMasterOf(other, instance) { - // moveUpSlaves -- but not supporting "slaves" argument at this time. + if InstanceIsMainOf(other, instance) { + // moveUpSubordinates -- but not supporting "subordinates" argument at this time. } // Too complex - return nil, log.Errorf("Relocating %+v slaves of %+v below %+v turns to be too complex; please do it manually", len(slaves), instance.Key, other.Key), errs + return nil, log.Errorf("Relocating %+v subordinates of %+v below %+v turns to be too complex; please do it manually", len(subordinates), instance.Key, other.Key), errs } -// RelocateSlaves will attempt moving slaves of an instance indicated by instanceKey below another instance. +// RelocateSubordinates will attempt moving subordinates of an instance indicated by instanceKey below another instance. // Orchestrator will try and figure out the best way to relocate the servers. This could span normal // binlog-position, pseudo-gtid, repointing, binlog servers... -func RelocateSlaves(instanceKey, otherKey *InstanceKey, pattern string) (slaves [](*Instance), other *Instance, err error, errs []error) { +func RelocateSubordinates(instanceKey, otherKey *InstanceKey, pattern string) (subordinates [](*Instance), other *Instance, err error, errs []error) { instance, found, err := ReadInstance(instanceKey) if err != nil || !found { - return slaves, other, log.Errorf("Error reading %+v", *instanceKey), errs + return subordinates, other, log.Errorf("Error reading %+v", *instanceKey), errs } other, found, err = ReadInstance(otherKey) if err != nil || !found { - return slaves, other, log.Errorf("Error reading %+v", *otherKey), errs + return subordinates, other, log.Errorf("Error reading %+v", *otherKey), errs } - slaves, err = ReadSlaveInstances(instanceKey) + subordinates, err = ReadSubordinateInstances(instanceKey) if err != nil { - return slaves, other, err, errs + return subordinates, other, err, errs } - slaves = RemoveInstance(slaves, otherKey) - slaves = filterInstancesByPattern(slaves, pattern) - if len(slaves) == 0 { + subordinates = RemoveInstance(subordinates, otherKey) + subordinates = filterInstancesByPattern(subordinates, pattern) + if len(subordinates) == 0 { // Nothing to do - return slaves, other, nil, errs + return subordinates, other, nil, errs } - slaves, err, errs = relocateSlavesInternal(slaves, instance, other) + subordinates, err, errs = relocateSubordinatesInternal(subordinates, instance, other) if err == nil { - AuditOperation("relocate-slaves", instanceKey, fmt.Sprintf("relocated %+v slaves of %+v below %+v", len(slaves), *instanceKey, *otherKey)) + AuditOperation("relocate-subordinates", instanceKey, fmt.Sprintf("relocated %+v subordinates of %+v below %+v", len(subordinates), *instanceKey, *otherKey)) } - return slaves, other, err, errs + return subordinates, other, err, errs } diff --git a/go/inst/instance_topology_dao.go b/go/inst/instance_topology_dao.go index 3a8f61df..c940c83c 100644 --- a/go/inst/instance_topology_dao.go +++ b/go/inst/instance_topology_dao.go @@ -119,9 +119,9 @@ func RefreshTopologyInstances(instances [](*Instance)) { for _, instance := range instances { instance := instance go func() { - // Signal completed slave + // Signal completed subordinate defer func() { barrier <- instance.Key }() - // Wait your turn to read a slave + // Wait your turn to read a subordinate ExecuteOnTopology(func() { log.Debugf("... reading instance: %+v", instance.Key) ReadTopologyInstanceUnbuffered(&instance.Key) @@ -133,10 +133,10 @@ func RefreshTopologyInstances(instances [](*Instance)) { } } -// RefreshInstanceSlaveHosts is a workaround for a bug in MySQL where -// SHOW SLAVE HOSTS continues to present old, long disconnected slaves. +// RefreshInstanceSubordinateHosts is a workaround for a bug in MySQL where +// SHOW SLAVE HOSTS continues to present old, long disconnected subordinates. // It turns out issuing a couple FLUSH commands mitigates the problem. -func RefreshInstanceSlaveHosts(instanceKey *InstanceKey) (*Instance, error) { +func RefreshInstanceSubordinateHosts(instanceKey *InstanceKey) (*Instance, error) { _, _ = ExecInstance(instanceKey, `flush error logs`) _, _ = ExecInstance(instanceKey, `flush error logs`) @@ -144,32 +144,32 @@ func RefreshInstanceSlaveHosts(instanceKey *InstanceKey) (*Instance, error) { return instance, err } -// GetSlaveRestartPreserveStatements returns a sequence of statements that make sure a slave is stopped -// and then returned to the same state. For example, if the slave was fully running, this will issue +// GetSubordinateRestartPreserveStatements returns a sequence of statements that make sure a subordinate is stopped +// and then returned to the same state. For example, if the subordinate was fully running, this will issue // a STOP on both io_thread and sql_thread, followed by START on both. If one of them is not running // at the time this function is called, said thread will be neither stopped nor started. -// The caller may provide an injected statememt, to be executed while the slave is stopped. -// This is useful for CHANGE MASTER TO commands, that unfortunately must take place while the slave +// The caller may provide an injected statememt, to be executed while the subordinate is stopped. +// This is useful for CHANGE MASTER TO commands, that unfortunately must take place while the subordinate // is completely stopped. -func GetSlaveRestartPreserveStatements(instanceKey *InstanceKey, injectedStatement string) (statements []string, err error) { +func GetSubordinateRestartPreserveStatements(instanceKey *InstanceKey, injectedStatement string) (statements []string, err error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return statements, err } - if instance.Slave_IO_Running { - statements = append(statements, SemicolonTerminated(`stop slave io_thread`)) + if instance.Subordinate_IO_Running { + statements = append(statements, SemicolonTerminated(`stop subordinate io_thread`)) } - if instance.Slave_SQL_Running { - statements = append(statements, SemicolonTerminated(`stop slave sql_thread`)) + if instance.Subordinate_SQL_Running { + statements = append(statements, SemicolonTerminated(`stop subordinate sql_thread`)) } if injectedStatement != "" { statements = append(statements, SemicolonTerminated(injectedStatement)) } - if instance.Slave_SQL_Running { - statements = append(statements, SemicolonTerminated(`start slave sql_thread`)) + if instance.Subordinate_SQL_Running { + statements = append(statements, SemicolonTerminated(`start subordinate sql_thread`)) } - if instance.Slave_IO_Running { - statements = append(statements, SemicolonTerminated(`start slave io_thread`)) + if instance.Subordinate_IO_Running { + statements = append(statements, SemicolonTerminated(`start subordinate io_thread`)) } return statements, err } @@ -233,21 +233,21 @@ func PurgeBinaryLogsToCurrent(instanceKey *InstanceKey) (*Instance, error) { return PurgeBinaryLogsTo(instanceKey, instance.SelfBinlogCoordinates.LogFile) } -// StopSlaveNicely stops a slave such that SQL_thread and IO_thread are aligned (i.e. +// StopSubordinateNicely stops a subordinate such that SQL_thread and IO_thread are aligned (i.e. // SQL_thread consumes all relay log entries) -// It will actually START the sql_thread even if the slave is completely stopped. -func StopSlaveNicely(instanceKey *InstanceKey, timeout time.Duration) (*Instance, error) { +// It will actually START the sql_thread even if the subordinate is completely stopped. +func StopSubordinateNicely(instanceKey *InstanceKey, timeout time.Duration) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, log.Errore(err) } - if !instance.IsSlave() { - return instance, fmt.Errorf("instance is not a slave: %+v", instanceKey) + if !instance.IsSubordinate() { + return instance, fmt.Errorf("instance is not a subordinate: %+v", instanceKey) } - _, err = ExecInstanceNoPrepare(instanceKey, `stop slave io_thread`) - _, err = ExecInstanceNoPrepare(instanceKey, `start slave sql_thread`) + _, err = ExecInstanceNoPrepare(instanceKey, `stop subordinate io_thread`) + _, err = ExecInstanceNoPrepare(instanceKey, `start subordinate sql_thread`) if instance.SQLDelay == 0 { // Otherwise we don't bother. @@ -255,7 +255,7 @@ func StopSlaveNicely(instanceKey *InstanceKey, timeout time.Duration) (*Instance for upToDate := false; !upToDate; { if timeout > 0 && time.Since(startTime) >= timeout { // timeout - return nil, log.Errorf("StopSlaveNicely timeout on %+v", *instanceKey) + return nil, log.Errorf("StopSubordinateNicely timeout on %+v", *instanceKey) } instance, err = ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { @@ -269,10 +269,10 @@ func StopSlaveNicely(instanceKey *InstanceKey, timeout time.Duration) (*Instance } } } - _, err = ExecInstanceNoPrepare(instanceKey, `stop slave`) + _, err = ExecInstanceNoPrepare(instanceKey, `stop subordinate`) if err != nil { - // Patch; current MaxScale behavior for STOP SLAVE is to throw an error if slave already stopped. - if instance.isMaxScale() && err.Error() == "Error 1199: Slave connection is not running" { + // Patch; current MaxScale behavior for STOP SLAVE is to throw an error if subordinate already stopped. + if instance.isMaxScale() && err.Error() == "Error 1199: Subordinate connection is not running" { err = nil } } @@ -281,51 +281,51 @@ func StopSlaveNicely(instanceKey *InstanceKey, timeout time.Duration) (*Instance } instance, err = ReadTopologyInstanceUnbuffered(instanceKey) - log.Infof("Stopped slave nicely on %+v, Self:%+v, Exec:%+v", *instanceKey, instance.SelfBinlogCoordinates, instance.ExecBinlogCoordinates) + log.Infof("Stopped subordinate nicely on %+v, Self:%+v, Exec:%+v", *instanceKey, instance.SelfBinlogCoordinates, instance.ExecBinlogCoordinates) return instance, err } -// StopSlavesNicely will attemt to stop all given slaves nicely, up to timeout -func StopSlavesNicely(slaves [](*Instance), timeout time.Duration) [](*Instance) { - refreshedSlaves := [](*Instance){} +// StopSubordinatesNicely will attemt to stop all given subordinates nicely, up to timeout +func StopSubordinatesNicely(subordinates [](*Instance), timeout time.Duration) [](*Instance) { + refreshedSubordinates := [](*Instance){} - log.Debugf("Stopping %d slaves nicely", len(slaves)) + log.Debugf("Stopping %d subordinates nicely", len(subordinates)) // use concurrency but wait for all to complete barrier := make(chan *Instance) - for _, slave := range slaves { - slave := slave + for _, subordinate := range subordinates { + subordinate := subordinate go func() { - updatedSlave := &slave - // Signal completed slave - defer func() { barrier <- *updatedSlave }() - // Wait your turn to read a slave + updatedSubordinate := &subordinate + // Signal completed subordinate + defer func() { barrier <- *updatedSubordinate }() + // Wait your turn to read a subordinate ExecuteOnTopology(func() { - StopSlaveNicely(&slave.Key, timeout) - slave, _ = StopSlave(&slave.Key) - updatedSlave = &slave + StopSubordinateNicely(&subordinate.Key, timeout) + subordinate, _ = StopSubordinate(&subordinate.Key) + updatedSubordinate = &subordinate }) }() } - for range slaves { - refreshedSlaves = append(refreshedSlaves, <-barrier) + for range subordinates { + refreshedSubordinates = append(refreshedSubordinates, <-barrier) } - return refreshedSlaves + return refreshedSubordinates } -// StopSlave stops replication on a given instance -func StopSlave(instanceKey *InstanceKey) (*Instance, error) { +// StopSubordinate stops replication on a given instance +func StopSubordinate(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, log.Errore(err) } - if !instance.IsSlave() { - return instance, fmt.Errorf("instance is not a slave: %+v", instanceKey) + if !instance.IsSubordinate() { + return instance, fmt.Errorf("instance is not a subordinate: %+v", instanceKey) } - _, err = ExecInstanceNoPrepare(instanceKey, `stop slave`) + _, err = ExecInstanceNoPrepare(instanceKey, `stop subordinate`) if err != nil { - // Patch; current MaxScale behavior for STOP SLAVE is to throw an error if slave already stopped. - if instance.isMaxScale() && err.Error() == "Error 1199: Slave connection is not running" { + // Patch; current MaxScale behavior for STOP SLAVE is to throw an error if subordinate already stopped. + if instance.isMaxScale() && err.Error() == "Error 1199: Subordinate connection is not running" { err = nil } } @@ -335,55 +335,55 @@ func StopSlave(instanceKey *InstanceKey) (*Instance, error) { } instance, err = ReadTopologyInstanceUnbuffered(instanceKey) - log.Infof("Stopped slave on %+v, Self:%+v, Exec:%+v", *instanceKey, instance.SelfBinlogCoordinates, instance.ExecBinlogCoordinates) + log.Infof("Stopped subordinate on %+v, Self:%+v, Exec:%+v", *instanceKey, instance.SelfBinlogCoordinates, instance.ExecBinlogCoordinates) return instance, err } -// StartSlave starts replication on a given instance. -func StartSlave(instanceKey *InstanceKey) (*Instance, error) { +// StartSubordinate starts replication on a given instance. +func StartSubordinate(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, log.Errore(err) } - if !instance.IsSlave() { - return instance, fmt.Errorf("instance is not a slave: %+v", instanceKey) + if !instance.IsSubordinate() { + return instance, fmt.Errorf("instance is not a subordinate: %+v", instanceKey) } - // If async fallback is disallowed, we'd better make sure to enable slaves to - // send ACKs before START SLAVE. Slave ACKing is off at mysqld startup because - // some slaves (those that must never be promoted) should never ACK. - // Note: We assume that slaves use 'skip-slave-start' so they won't + // If async fallback is disallowed, we'd better make sure to enable subordinates to + // send ACKs before START SLAVE. Subordinate ACKing is off at mysqld startup because + // some subordinates (those that must never be promoted) should never ACK. + // Note: We assume that subordinates use 'skip-subordinate-start' so they won't // START SLAVE on their own upon restart. if instance.SemiSyncEnforced { // Send ACK only from promotable instances. sendACK := instance.PromotionRule != MustNotPromoteRule - // Always disable master setting, in case we're converting a former master. + // Always disable main setting, in case we're converting a former main. if err := EnableSemiSync(instanceKey, false, sendACK); err != nil { return instance, log.Errore(err) } } - _, err = ExecInstanceNoPrepare(instanceKey, `start slave`) + _, err = ExecInstanceNoPrepare(instanceKey, `start subordinate`) if err != nil { return instance, log.Errore(err) } - log.Infof("Started slave on %+v", instanceKey) - if config.Config.SlaveStartPostWaitMilliseconds > 0 { - time.Sleep(time.Duration(config.Config.SlaveStartPostWaitMilliseconds) * time.Millisecond) + log.Infof("Started subordinate on %+v", instanceKey) + if config.Config.SubordinateStartPostWaitMilliseconds > 0 { + time.Sleep(time.Duration(config.Config.SubordinateStartPostWaitMilliseconds) * time.Millisecond) } instance, err = ReadTopologyInstanceUnbuffered(instanceKey) return instance, err } -// RestartSlave stops & starts replication on a given instance -func RestartSlave(instanceKey *InstanceKey) (instance *Instance, err error) { - instance, err = StopSlave(instanceKey) +// RestartSubordinate stops & starts replication on a given instance +func RestartSubordinate(instanceKey *InstanceKey) (instance *Instance, err error) { + instance, err = StopSubordinate(instanceKey) if err != nil { return instance, log.Errore(err) } - instance, err = StartSlave(instanceKey) + instance, err = StartSubordinate(instanceKey) if err != nil { return instance, log.Errore(err) } @@ -391,45 +391,45 @@ func RestartSlave(instanceKey *InstanceKey) (instance *Instance, err error) { } -// StartSlaves will do concurrent start-slave -func StartSlaves(slaves [](*Instance)) { +// StartSubordinates will do concurrent start-subordinate +func StartSubordinates(subordinates [](*Instance)) { // use concurrency but wait for all to complete - log.Debugf("Starting %d slaves", len(slaves)) + log.Debugf("Starting %d subordinates", len(subordinates)) barrier := make(chan InstanceKey) - for _, instance := range slaves { + for _, instance := range subordinates { instance := instance go func() { - // Signal compelted slave + // Signal compelted subordinate defer func() { barrier <- instance.Key }() - // Wait your turn to read a slave - ExecuteOnTopology(func() { StartSlave(&instance.Key) }) + // Wait your turn to read a subordinate + ExecuteOnTopology(func() { StartSubordinate(&instance.Key) }) }() } - for range slaves { + for range subordinates { <-barrier } } -// StartSlaveUntilMasterCoordinates issuesa START SLAVE UNTIL... statement on given instance -func StartSlaveUntilMasterCoordinates(instanceKey *InstanceKey, masterCoordinates *BinlogCoordinates) (*Instance, error) { +// StartSubordinateUntilMainCoordinates issuesa START SLAVE UNTIL... statement on given instance +func StartSubordinateUntilMainCoordinates(instanceKey *InstanceKey, mainCoordinates *BinlogCoordinates) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, log.Errore(err) } - if !instance.IsSlave() { - return instance, fmt.Errorf("instance is not a slave: %+v", instanceKey) + if !instance.IsSubordinate() { + return instance, fmt.Errorf("instance is not a subordinate: %+v", instanceKey) } - if instance.SlaveRunning() { - return instance, fmt.Errorf("slave already running: %+v", instanceKey) + if instance.SubordinateRunning() { + return instance, fmt.Errorf("subordinate already running: %+v", instanceKey) } - log.Infof("Will start slave on %+v until coordinates: %+v", instanceKey, masterCoordinates) + log.Infof("Will start subordinate on %+v until coordinates: %+v", instanceKey, mainCoordinates) if instance.SemiSyncEnforced { // Send ACK only from promotable instances. sendACK := instance.PromotionRule != MustNotPromoteRule - // Always disable master setting, in case we're converting a former master. + // Always disable main setting, in case we're converting a former main. if err := EnableSemiSync(instanceKey, false, sendACK); err != nil { return instance, log.Errore(err) } @@ -438,8 +438,8 @@ func StartSlaveUntilMasterCoordinates(instanceKey *InstanceKey, masterCoordinate // MariaDB has a bug: a CHANGE MASTER TO statement does not work properly with prepared statement... :P // See https://mariadb.atlassian.net/browse/MDEV-7640 // This is the reason for ExecInstanceNoPrepare - _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("start slave until master_log_file='%s', master_log_pos=%d", - masterCoordinates.LogFile, masterCoordinates.LogPos)) + _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("start subordinate until main_log_file='%s', main_log_pos=%d", + mainCoordinates.LogFile, mainCoordinates.LogPos)) if err != nil { return instance, log.Errore(err) } @@ -451,16 +451,16 @@ func StartSlaveUntilMasterCoordinates(instanceKey *InstanceKey, masterCoordinate } switch { - case instance.ExecBinlogCoordinates.SmallerThan(masterCoordinates): + case instance.ExecBinlogCoordinates.SmallerThan(mainCoordinates): time.Sleep(sqlThreadPollDuration) - case instance.ExecBinlogCoordinates.Equals(masterCoordinates): + case instance.ExecBinlogCoordinates.Equals(mainCoordinates): upToDate = true - case masterCoordinates.SmallerThan(&instance.ExecBinlogCoordinates): + case mainCoordinates.SmallerThan(&instance.ExecBinlogCoordinates): return instance, fmt.Errorf("Start SLAVE UNTIL is past coordinates: %+v", instanceKey) } } - instance, err = StopSlave(instanceKey) + instance, err = StopSubordinate(instanceKey) if err != nil { return instance, log.Errore(err) } @@ -468,76 +468,76 @@ func StartSlaveUntilMasterCoordinates(instanceKey *InstanceKey, masterCoordinate return instance, err } -// EnableSemiSync sets the rpl_semi_sync_(master|slave)_enabled variables +// EnableSemiSync sets the rpl_semi_sync_(main|subordinate)_enabled variables // on a given instance. -func EnableSemiSync(instanceKey *InstanceKey, master, slave bool) error { - log.Infof("instance %+v rpl_semi_sync_master_enabled: %t, rpl_semi_sync_slave_enabled: %t", instanceKey, master, slave) +func EnableSemiSync(instanceKey *InstanceKey, main, subordinate bool) error { + log.Infof("instance %+v rpl_semi_sync_main_enabled: %t, rpl_semi_sync_subordinate_enabled: %t", instanceKey, main, subordinate) _, err := ExecInstanceNoPrepare(instanceKey, - `set global rpl_semi_sync_master_enabled = ?, global rpl_semi_sync_slave_enabled = ?`, - master, slave) + `set global rpl_semi_sync_main_enabled = ?, global rpl_semi_sync_subordinate_enabled = ?`, + main, subordinate) return err } -// ChangeMasterCredentials issues a CHANGE MASTER TO... MASTER_USER=, MASTER_PASSWORD=... -func ChangeMasterCredentials(instanceKey *InstanceKey, masterUser string, masterPassword string) (*Instance, error) { +// ChangeMainCredentials issues a CHANGE MASTER TO... MASTER_USER=, MASTER_PASSWORD=... +func ChangeMainCredentials(instanceKey *InstanceKey, mainUser string, mainPassword string) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, log.Errore(err) } - if masterUser == "" { - return instance, log.Errorf("Empty user in ChangeMasterCredentials() for %+v", *instanceKey) + if mainUser == "" { + return instance, log.Errorf("Empty user in ChangeMainCredentials() for %+v", *instanceKey) } - if instance.SlaveRunning() { - return instance, fmt.Errorf("ChangeMasterTo: Cannot change master on: %+v because slave is running", *instanceKey) + if instance.SubordinateRunning() { + return instance, fmt.Errorf("ChangeMainTo: Cannot change main on: %+v because subordinate is running", *instanceKey) } - log.Debugf("ChangeMasterTo: will attempt changing master credentials on %+v", *instanceKey) + log.Debugf("ChangeMainTo: will attempt changing main credentials on %+v", *instanceKey) if *config.RuntimeCLIFlags.Noop { return instance, fmt.Errorf("noop: aborting CHANGE MASTER TO operation on %+v; signalling error but nothing went wrong.", *instanceKey) } - _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change master to master_user='%s', master_password='%s'", - masterUser, masterPassword)) + _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change main to main_user='%s', main_password='%s'", + mainUser, mainPassword)) if err != nil { return instance, log.Errore(err) } - log.Infof("ChangeMasterTo: Changed master credentials on %+v", *instanceKey) + log.Infof("ChangeMainTo: Changed main credentials on %+v", *instanceKey) instance, err = ReadTopologyInstanceUnbuffered(instanceKey) return instance, err } -// ChangeMasterTo changes the given instance's master according to given input. -func ChangeMasterTo(instanceKey *InstanceKey, masterKey *InstanceKey, masterBinlogCoordinates *BinlogCoordinates, skipUnresolve bool, gtidHint OperationGTIDHint) (*Instance, error) { +// ChangeMainTo changes the given instance's main according to given input. +func ChangeMainTo(instanceKey *InstanceKey, mainKey *InstanceKey, mainBinlogCoordinates *BinlogCoordinates, skipUnresolve bool, gtidHint OperationGTIDHint) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, log.Errore(err) } - if instance.SlaveRunning() { - return instance, fmt.Errorf("ChangeMasterTo: Cannot change master on: %+v because slave is running", *instanceKey) + if instance.SubordinateRunning() { + return instance, fmt.Errorf("ChangeMainTo: Cannot change main on: %+v because subordinate is running", *instanceKey) } - log.Debugf("ChangeMasterTo: will attempt changing master on %+v to %+v, %+v", *instanceKey, *masterKey, *masterBinlogCoordinates) - changeToMasterKey := masterKey + log.Debugf("ChangeMainTo: will attempt changing main on %+v to %+v, %+v", *instanceKey, *mainKey, *mainBinlogCoordinates) + changeToMainKey := mainKey if !skipUnresolve { - unresolvedMasterKey, nameUnresolved, err := UnresolveHostname(masterKey) + unresolvedMainKey, nameUnresolved, err := UnresolveHostname(mainKey) if err != nil { - log.Debugf("ChangeMasterTo: aborting operation on %+v due to resolving error on %+v: %+v", *instanceKey, *masterKey, err) + log.Debugf("ChangeMainTo: aborting operation on %+v due to resolving error on %+v: %+v", *instanceKey, *mainKey, err) return instance, err } if nameUnresolved { - log.Debugf("ChangeMasterTo: Unresolved %+v into %+v", *masterKey, unresolvedMasterKey) + log.Debugf("ChangeMainTo: Unresolved %+v into %+v", *mainKey, unresolvedMainKey) } - changeToMasterKey = &unresolvedMasterKey + changeToMainKey = &unresolvedMainKey } if *config.RuntimeCLIFlags.Noop { return instance, fmt.Errorf("noop: aborting CHANGE MASTER TO operation on %+v; signalling error but nothing went wrong.", *instanceKey) } - originalMasterKey := instance.MasterKey + originalMainKey := instance.MainKey originalExecBinlogCoordinates := instance.ExecBinlogCoordinates changedViaGTID := false @@ -546,51 +546,51 @@ func ChangeMasterTo(instanceKey *InstanceKey, masterKey *InstanceKey, masterBinl // See https://mariadb.atlassian.net/browse/MDEV-7640 // This is the reason for ExecInstanceNoPrepare // Keep on using GTID - _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change master to master_host='%s', master_port=%d", - changeToMasterKey.Hostname, changeToMasterKey.Port)) + _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change main to main_host='%s', main_port=%d", + changeToMainKey.Hostname, changeToMainKey.Port)) changedViaGTID = true } else if instance.UsingMariaDBGTID && gtidHint == GTIDHintDeny { // Make sure to not use GTID - _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change master to master_host='%s', master_port=%d, master_log_file='%s', master_log_pos=%d, master_use_gtid=no", - changeToMasterKey.Hostname, changeToMasterKey.Port, masterBinlogCoordinates.LogFile, masterBinlogCoordinates.LogPos)) + _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change main to main_host='%s', main_port=%d, main_log_file='%s', main_log_pos=%d, main_use_gtid=no", + changeToMainKey.Hostname, changeToMainKey.Port, mainBinlogCoordinates.LogFile, mainBinlogCoordinates.LogPos)) } else if instance.IsMariaDB() && gtidHint == GTIDHintForce { // Is MariaDB; not using GTID, turn into GTID - _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change master to master_host='%s', master_port=%d, master_use_gtid=slave_pos", - changeToMasterKey.Hostname, changeToMasterKey.Port)) + _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change main to main_host='%s', main_port=%d, main_use_gtid=subordinate_pos", + changeToMainKey.Hostname, changeToMainKey.Port)) changedViaGTID = true } else if instance.UsingOracleGTID && gtidHint != GTIDHintDeny { // Is Oracle; already uses GTID; keep using it. - _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change master to master_host='%s', master_port=%d", - changeToMasterKey.Hostname, changeToMasterKey.Port)) + _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change main to main_host='%s', main_port=%d", + changeToMainKey.Hostname, changeToMainKey.Port)) changedViaGTID = true } else if instance.UsingOracleGTID && gtidHint == GTIDHintDeny { // Is Oracle; already uses GTID - _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change master to master_host='%s', master_port=%d, master_log_file='%s', master_log_pos=%d, master_auto_position=0", - changeToMasterKey.Hostname, changeToMasterKey.Port, masterBinlogCoordinates.LogFile, masterBinlogCoordinates.LogPos)) + _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change main to main_host='%s', main_port=%d, main_log_file='%s', main_log_pos=%d, main_auto_position=0", + changeToMainKey.Hostname, changeToMainKey.Port, mainBinlogCoordinates.LogFile, mainBinlogCoordinates.LogPos)) } else if instance.SupportsOracleGTID && gtidHint == GTIDHintForce { // Is Oracle; not using GTID right now; turn into GTID - _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change master to master_host='%s', master_port=%d, master_auto_position=1", - changeToMasterKey.Hostname, changeToMasterKey.Port)) + _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change main to main_host='%s', main_port=%d, main_auto_position=1", + changeToMainKey.Hostname, changeToMainKey.Port)) changedViaGTID = true } else { // Normal binlog file:pos - _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change master to master_host='%s', master_port=%d, master_log_file='%s', master_log_pos=%d", - changeToMasterKey.Hostname, changeToMasterKey.Port, masterBinlogCoordinates.LogFile, masterBinlogCoordinates.LogPos)) + _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change main to main_host='%s', main_port=%d, main_log_file='%s', main_log_pos=%d", + changeToMainKey.Hostname, changeToMainKey.Port, mainBinlogCoordinates.LogFile, mainBinlogCoordinates.LogPos)) } if err != nil { return instance, log.Errore(err) } - WriteMasterPositionEquivalence(&originalMasterKey, &originalExecBinlogCoordinates, changeToMasterKey, masterBinlogCoordinates) + WriteMainPositionEquivalence(&originalMainKey, &originalExecBinlogCoordinates, changeToMainKey, mainBinlogCoordinates) - log.Infof("ChangeMasterTo: Changed master on %+v to: %+v, %+v. GTID: %+v", *instanceKey, masterKey, masterBinlogCoordinates, changedViaGTID) + log.Infof("ChangeMainTo: Changed main on %+v to: %+v, %+v. GTID: %+v", *instanceKey, mainKey, mainBinlogCoordinates, changedViaGTID) instance, err = ReadTopologyInstanceUnbuffered(instanceKey) return instance, err } -// SkipToNextBinaryLog changes master position to beginning of next binlog +// SkipToNextBinaryLog changes main position to beginning of next binlog // USE WITH CARE! -// Use case is binlog servers where the master was gone & replaced by another. +// Use case is binlog servers where the main was gone & replaced by another. func SkipToNextBinaryLog(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { @@ -604,67 +604,67 @@ func SkipToNextBinaryLog(instanceKey *InstanceKey) (*Instance, error) { nextFileCoordinates.LogPos = 4 log.Debugf("Will skip replication on %+v to next binary log: %+v", instance.Key, nextFileCoordinates.LogFile) - instance, err = ChangeMasterTo(&instance.Key, &instance.MasterKey, &nextFileCoordinates, false, GTIDHintNeutral) + instance, err = ChangeMainTo(&instance.Key, &instance.MainKey, &nextFileCoordinates, false, GTIDHintNeutral) if err != nil { return instance, log.Errore(err) } AuditOperation("skip-binlog", instanceKey, fmt.Sprintf("Skipped replication to next binary log: %+v", nextFileCoordinates.LogFile)) - return StartSlave(instanceKey) + return StartSubordinate(instanceKey) } -// ResetSlave resets a slave, breaking the replication -func ResetSlave(instanceKey *InstanceKey) (*Instance, error) { +// ResetSubordinate resets a subordinate, breaking the replication +func ResetSubordinate(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, log.Errore(err) } - if instance.SlaveRunning() { - return instance, fmt.Errorf("Cannot reset slave on: %+v because slave is running", instanceKey) + if instance.SubordinateRunning() { + return instance, fmt.Errorf("Cannot reset subordinate on: %+v because subordinate is running", instanceKey) } if *config.RuntimeCLIFlags.Noop { - return instance, fmt.Errorf("noop: aborting reset-slave operation on %+v; signalling error but nothing went wrong.", *instanceKey) + return instance, fmt.Errorf("noop: aborting reset-subordinate operation on %+v; signalling error but nothing went wrong.", *instanceKey) } // MySQL's RESET SLAVE is done correctly; however SHOW SLAVE STATUS still returns old hostnames etc // and only resets till after next restart. This leads to orchestrator still thinking the instance replicates // from old host. We therefore forcibly modify the hostname. // RESET SLAVE ALL command solves this, but only as of 5.6.3 - _, err = ExecInstanceNoPrepare(instanceKey, `change master to master_host='_'`) + _, err = ExecInstanceNoPrepare(instanceKey, `change main to main_host='_'`) if err != nil { return instance, log.Errore(err) } - _, err = ExecInstanceNoPrepare(instanceKey, `reset slave /*!50603 all */`) + _, err = ExecInstanceNoPrepare(instanceKey, `reset subordinate /*!50603 all */`) if err != nil { return instance, log.Errore(err) } - log.Infof("Reset slave %+v", instanceKey) + log.Infof("Reset subordinate %+v", instanceKey) instance, err = ReadTopologyInstanceUnbuffered(instanceKey) return instance, err } -// ResetMaster issues a RESET MASTER statement on given instance. Use with extreme care! -func ResetMaster(instanceKey *InstanceKey) (*Instance, error) { +// ResetMain issues a RESET MASTER statement on given instance. Use with extreme care! +func ResetMain(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, log.Errore(err) } - if instance.SlaveRunning() { - return instance, fmt.Errorf("Cannot reset master on: %+v because slave is running", instanceKey) + if instance.SubordinateRunning() { + return instance, fmt.Errorf("Cannot reset main on: %+v because subordinate is running", instanceKey) } if *config.RuntimeCLIFlags.Noop { - return instance, fmt.Errorf("noop: aborting reset-master operation on %+v; signalling error but nothing went wrong.", *instanceKey) + return instance, fmt.Errorf("noop: aborting reset-main operation on %+v; signalling error but nothing went wrong.", *instanceKey) } - _, err = ExecInstanceNoPrepare(instanceKey, `reset master`) + _, err = ExecInstanceNoPrepare(instanceKey, `reset main`) if err != nil { return instance, log.Errore(err) } - log.Infof("Reset master %+v", instanceKey) + log.Infof("Reset main %+v", instanceKey) instance, err = ReadTopologyInstanceUnbuffered(instanceKey) return instance, err @@ -682,11 +682,11 @@ func setGTIDPurged(instance *Instance, gtidPurged string) error { // skipQueryClassic skips a query in normal binlog file:pos replication func skipQueryClassic(instance *Instance) error { - _, err := ExecInstance(&instance.Key, `set global sql_slave_skip_counter := 1`) + _, err := ExecInstance(&instance.Key, `set global sql_subordinate_skip_counter := 1`) return err } -// skipQueryOracleGtid skips a single query in an Oracle GTID replicating slave, by injecting an empty transaction +// skipQueryOracleGtid skips a single query in an Oracle GTID replicating subordinate, by injecting an empty transaction func skipQueryOracleGtid(instance *Instance) error { nextGtid, err := instance.NextGTID() if err != nil { @@ -714,11 +714,11 @@ func SkipQuery(instanceKey *InstanceKey) (*Instance, error) { return instance, log.Errore(err) } - if !instance.IsSlave() { - return instance, fmt.Errorf("instance is not a slave: %+v", instanceKey) + if !instance.IsSubordinate() { + return instance, fmt.Errorf("instance is not a subordinate: %+v", instanceKey) } - if instance.Slave_SQL_Running { - return instance, fmt.Errorf("Slave SQL thread is running on %+v", instanceKey) + if instance.Subordinate_SQL_Running { + return instance, fmt.Errorf("Subordinate SQL thread is running on %+v", instanceKey) } if instance.LastSQLError == "" { return instance, fmt.Errorf("No SQL error on %+v", instanceKey) @@ -740,84 +740,84 @@ func SkipQuery(instanceKey *InstanceKey) (*Instance, error) { return instance, log.Errore(err) } AuditOperation("skip-query", instanceKey, "Skipped one query") - return StartSlave(instanceKey) + return StartSubordinate(instanceKey) } -// DetachSlave detaches a slave from replication; forcibly corrupting the binlog coordinates (though in such way +// DetachSubordinate detaches a subordinate from replication; forcibly corrupting the binlog coordinates (though in such way // that is reversible) -func DetachSlave(instanceKey *InstanceKey) (*Instance, error) { +func DetachSubordinate(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, log.Errore(err) } - if instance.SlaveRunning() { - return instance, fmt.Errorf("Cannot detach slave on: %+v because slave is running", instanceKey) + if instance.SubordinateRunning() { + return instance, fmt.Errorf("Cannot detach subordinate on: %+v because subordinate is running", instanceKey) } isDetached, _, _ := instance.ExecBinlogCoordinates.DetachedCoordinates() if isDetached { - return instance, fmt.Errorf("Cannot (need not) detach slave on: %+v because slave is already detached", instanceKey) + return instance, fmt.Errorf("Cannot (need not) detach subordinate on: %+v because subordinate is already detached", instanceKey) } if *config.RuntimeCLIFlags.Noop { - return instance, fmt.Errorf("noop: aborting detach-slave operation on %+v; signalling error but nothing went wrong.", *instanceKey) + return instance, fmt.Errorf("noop: aborting detach-subordinate operation on %+v; signalling error but nothing went wrong.", *instanceKey) } detachedCoordinates := BinlogCoordinates{LogFile: fmt.Sprintf("//%s:%d", instance.ExecBinlogCoordinates.LogFile, instance.ExecBinlogCoordinates.LogPos), LogPos: instance.ExecBinlogCoordinates.LogPos} // Encode the current coordinates within the log file name, in such way that replication is broken, but info can still be resurrected - _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf(`change master to master_log_file='%s', master_log_pos=%d`, detachedCoordinates.LogFile, detachedCoordinates.LogPos)) + _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf(`change main to main_log_file='%s', main_log_pos=%d`, detachedCoordinates.LogFile, detachedCoordinates.LogPos)) if err != nil { return instance, log.Errore(err) } - log.Infof("Detach slave %+v", instanceKey) + log.Infof("Detach subordinate %+v", instanceKey) instance, err = ReadTopologyInstanceUnbuffered(instanceKey) return instance, err } -// ReattachSlave restores a detached slave back into replication -func ReattachSlave(instanceKey *InstanceKey) (*Instance, error) { +// ReattachSubordinate restores a detached subordinate back into replication +func ReattachSubordinate(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, log.Errore(err) } - if instance.SlaveRunning() { - return instance, fmt.Errorf("Cannot (need not) reattach slave on: %+v because slave is running", instanceKey) + if instance.SubordinateRunning() { + return instance, fmt.Errorf("Cannot (need not) reattach subordinate on: %+v because subordinate is running", instanceKey) } isDetached, detachedLogFile, detachedLogPos := instance.ExecBinlogCoordinates.DetachedCoordinates() if !isDetached { - return instance, fmt.Errorf("Cannot reattach slave on: %+v because slave is not detached", instanceKey) + return instance, fmt.Errorf("Cannot reattach subordinate on: %+v because subordinate is not detached", instanceKey) } if *config.RuntimeCLIFlags.Noop { - return instance, fmt.Errorf("noop: aborting reattach-slave operation on %+v; signalling error but nothing went wrong.", *instanceKey) + return instance, fmt.Errorf("noop: aborting reattach-subordinate operation on %+v; signalling error but nothing went wrong.", *instanceKey) } - _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf(`change master to master_log_file='%s', master_log_pos=%s`, detachedLogFile, detachedLogPos)) + _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf(`change main to main_log_file='%s', main_log_pos=%s`, detachedLogFile, detachedLogPos)) if err != nil { return instance, log.Errore(err) } - log.Infof("Reattach slave %+v", instanceKey) + log.Infof("Reattach subordinate %+v", instanceKey) instance, err = ReadTopologyInstanceUnbuffered(instanceKey) return instance, err } -// MasterPosWait issues a MASTER_POS_WAIT() an given instance according to given coordinates. -func MasterPosWait(instanceKey *InstanceKey, binlogCoordinates *BinlogCoordinates) (*Instance, error) { +// MainPosWait issues a MASTER_POS_WAIT() an given instance according to given coordinates. +func MainPosWait(instanceKey *InstanceKey, binlogCoordinates *BinlogCoordinates) (*Instance, error) { instance, err := ReadTopologyInstanceUnbuffered(instanceKey) if err != nil { return instance, log.Errore(err) } - _, err = ExecInstance(instanceKey, `select master_pos_wait(?, ?)`, binlogCoordinates.LogFile, binlogCoordinates.LogPos) + _, err = ExecInstance(instanceKey, `select main_pos_wait(?, ?)`, binlogCoordinates.LogFile, binlogCoordinates.LogPos) if err != nil { return instance, log.Errore(err) } @@ -827,21 +827,21 @@ func MasterPosWait(instanceKey *InstanceKey, binlogCoordinates *BinlogCoordinate return instance, err } -// Attempt to read and return replication credentials from the mysql.slave_master_info system table +// Attempt to read and return replication credentials from the mysql.subordinate_main_info system table func ReadReplicationCredentials(instanceKey *InstanceKey) (replicationUser string, replicationPassword string, err error) { query := ` select ifnull(max(User_name), '') as user, ifnull(max(User_password), '') as password from - mysql.slave_master_info + mysql.subordinate_main_info ` err = ScanInstanceRow(instanceKey, query, &replicationUser, &replicationPassword) if err != nil { return replicationUser, replicationPassword, err } if replicationUser == "" { - err = fmt.Errorf("Cannot find credentials in mysql.slave_master_info") + err = fmt.Errorf("Cannot find credentials in mysql.subordinate_main_info") } return replicationUser, replicationPassword, err } @@ -857,7 +857,7 @@ func SetReadOnly(instanceKey *InstanceKey, readOnly bool) (*Instance, error) { return instance, fmt.Errorf("noop: aborting set-read-only operation on %+v; signalling error but nothing went wrong.", *instanceKey) } - // If async fallback is disallowed, we're responsible for flipping the master + // If async fallback is disallowed, we're responsible for flipping the main // semi-sync switch ON before accepting writes. The setting is off by default. if instance.SemiSyncEnforced && !readOnly { // Send ACK only from promotable instances. @@ -873,8 +873,8 @@ func SetReadOnly(instanceKey *InstanceKey, readOnly bool) (*Instance, error) { } instance, err = ReadTopologyInstanceUnbuffered(instanceKey) - // If we just went read-only, it's safe to flip the master semi-sync switch - // OFF, which is the default value so that slaves can make progress. + // If we just went read-only, it's safe to flip the main semi-sync switch + // OFF, which is the default value so that subordinates can make progress. if instance.SemiSyncEnforced && readOnly { // Send ACK only from promotable instances. sendACK := instance.PromotionRule != MustNotPromoteRule diff --git a/go/inst/instance_utils.go b/go/inst/instance_utils.go index 1692e9a9..a0ac723f 100644 --- a/go/inst/instance_utils.go +++ b/go/inst/instance_utils.go @@ -43,8 +43,8 @@ func (this InstancesByExecBinlogCoordinates) Less(i, j int) bool { return true } if this[i].ExecBinlogCoordinates.Equals(&this[j].ExecBinlogCoordinates) { - // Secondary sorting: "smaller" if not logging slave updates - if this[j].LogSlaveUpdatesEnabled && !this[i].LogSlaveUpdatesEnabled { + // Secondary sorting: "smaller" if not logging subordinate updates + if this[j].LogSubordinateUpdatesEnabled && !this[i].LogSubordinateUpdatesEnabled { return true } // Next sorting: "smaller" if of higher version (this will be reversed eventually) diff --git a/go/inst/master_equivalence_dao.go b/go/inst/master_equivalence_dao.go index da61cac0..59433047 100644 --- a/go/inst/master_equivalence_dao.go +++ b/go/inst/master_equivalence_dao.go @@ -23,56 +23,56 @@ import ( "github.com/outbrain/orchestrator/go/db" ) -func WriteMasterPositionEquivalence(master1Key *InstanceKey, master1BinlogCoordinates *BinlogCoordinates, - master2Key *InstanceKey, master2BinlogCoordinates *BinlogCoordinates) error { - if master1Key.Equals(master2Key) { +func WriteMainPositionEquivalence(main1Key *InstanceKey, main1BinlogCoordinates *BinlogCoordinates, + main2Key *InstanceKey, main2BinlogCoordinates *BinlogCoordinates) error { + if main1Key.Equals(main2Key) { // Not interesting return nil } writeFunc := func() error { _, err := db.ExecOrchestrator(` - insert into master_position_equivalence ( - master1_hostname, master1_port, master1_binary_log_file, master1_binary_log_pos, - master2_hostname, master2_port, master2_binary_log_file, master2_binary_log_pos, + insert into main_position_equivalence ( + main1_hostname, main1_port, main1_binary_log_file, main1_binary_log_pos, + main2_hostname, main2_port, main2_binary_log_file, main2_binary_log_pos, last_suggested) values (?, ?, ?, ?, ?, ?, ?, ?, NOW()) on duplicate key update last_suggested=values(last_suggested) - `, master1Key.Hostname, master1Key.Port, master1BinlogCoordinates.LogFile, master1BinlogCoordinates.LogPos, - master2Key.Hostname, master2Key.Port, master2BinlogCoordinates.LogFile, master2BinlogCoordinates.LogPos, + `, main1Key.Hostname, main1Key.Port, main1BinlogCoordinates.LogFile, main1BinlogCoordinates.LogPos, + main2Key.Hostname, main2Key.Port, main2BinlogCoordinates.LogFile, main2BinlogCoordinates.LogPos, ) return log.Errore(err) } return ExecDBWriteFunc(writeFunc) } -func GetEquivalentMasterCoordinates(instanceCoordinates *InstanceBinlogCoordinates) (result [](*InstanceBinlogCoordinates), err error) { +func GetEquivalentMainCoordinates(instanceCoordinates *InstanceBinlogCoordinates) (result [](*InstanceBinlogCoordinates), err error) { query := ` select - master1_hostname as hostname, - master1_port as port, - master1_binary_log_file as binlog_file, - master1_binary_log_pos as binlog_pos + main1_hostname as hostname, + main1_port as port, + main1_binary_log_file as binlog_file, + main1_binary_log_pos as binlog_pos from - master_position_equivalence + main_position_equivalence where - master2_hostname = ? - and master2_port = ? - and master2_binary_log_file = ? - and master2_binary_log_pos = ? + main2_hostname = ? + and main2_port = ? + and main2_binary_log_file = ? + and main2_binary_log_pos = ? union select - master2_hostname as hostname, - master2_port as port, - master2_binary_log_file as binlog_file, - master2_binary_log_pos as binlog_pos + main2_hostname as hostname, + main2_port as port, + main2_binary_log_file as binlog_file, + main2_binary_log_pos as binlog_pos from - master_position_equivalence + main_position_equivalence where - master1_hostname = ? - and master1_port = ? - and master1_binary_log_file = ? - and master1_binary_log_pos = ? + main1_hostname = ? + and main1_port = ? + and main1_binary_log_file = ? + and main1_binary_log_pos = ? ` args := sqlutils.Args( instanceCoordinates.Key.Hostname, @@ -104,7 +104,7 @@ func GetEquivalentMasterCoordinates(instanceCoordinates *InstanceBinlogCoordinat } func GetEquivalentBinlogCoordinatesFor(instanceCoordinates *InstanceBinlogCoordinates, belowKey *InstanceKey) (*BinlogCoordinates, error) { - possibleCoordinates, err := GetEquivalentMasterCoordinates(instanceCoordinates) + possibleCoordinates, err := GetEquivalentMainCoordinates(instanceCoordinates) if err != nil { return nil, err } @@ -116,11 +116,11 @@ func GetEquivalentBinlogCoordinatesFor(instanceCoordinates *InstanceBinlogCoordi return nil, nil } -// ExpireMasterPositionEquivalence expires old master_position_equivalence -func ExpireMasterPositionEquivalence() error { +// ExpireMainPositionEquivalence expires old main_position_equivalence +func ExpireMainPositionEquivalence() error { writeFunc := func() error { _, err := db.ExecOrchestrator(` - delete from master_position_equivalence + delete from main_position_equivalence where last_suggested < NOW() - INTERVAL ? HOUR `, config.Config.UnseenInstanceForgetHours, ) diff --git a/go/logic/orchestrator.go b/go/logic/orchestrator.go index 678f5ec8..9f4e7b21 100644 --- a/go/logic/orchestrator.go +++ b/go/logic/orchestrator.go @@ -113,7 +113,7 @@ func handleDiscoveryRequests() { } // discoverInstance will attempt discovering an instance (unless it is already up to date) and will -// list down its master and slaves (if any) for further discovery. +// list down its main and subordinates (if any) for further discovery. func discoverInstance(instanceKey inst.InstanceKey) { start := time.Now() defer func() { @@ -151,7 +151,7 @@ func discoverInstance(instanceKey inst.InstanceKey) { return } - log.Debugf("Discovered host: %+v, master: %+v, version: %+v in %.3fs", instance.Key, instance.MasterKey, instance.Version, time.Since(start).Seconds()) + log.Debugf("Discovered host: %+v, main: %+v, version: %+v in %.3fs", instance.Key, instance.MainKey, instance.Version, time.Since(start).Seconds()) if atomic.LoadInt64(&isElectedNode) == 0 { // Maybe this node was elected before, but isn't elected anymore. @@ -159,16 +159,16 @@ func discoverInstance(instanceKey inst.InstanceKey) { return } - // Investigate slaves: - for _, slaveKey := range instance.SlaveHosts.GetInstanceKeys() { - slaveKey := slaveKey - if slaveKey.IsValid() { - discoveryQueue.Push(slaveKey) + // Investigate subordinates: + for _, subordinateKey := range instance.SubordinateHosts.GetInstanceKeys() { + subordinateKey := subordinateKey + if subordinateKey.IsValid() { + discoveryQueue.Push(subordinateKey) } } - // Investigate master: - if instance.MasterKey.IsValid() { - discoveryQueue.Push(instance.MasterKey) + // Investigate main: + if instance.MainKey.IsValid() { + discoveryQueue.Push(instance.MainKey) } } @@ -263,8 +263,8 @@ func ContinuousDiscovery() { go inst.ForgetExpiredHostnameResolves() go inst.DeleteInvalidHostnameResolves() go inst.ReviewUnseenInstances() - go inst.InjectUnseenMasters() - go inst.ResolveUnknownMasterHostnameResolves() + go inst.InjectUnseenMains() + go inst.ResolveUnknownMainHostnameResolves() go inst.UpdateClusterAliases() go inst.ExpireMaintenance() go inst.ExpireDowntime() @@ -272,7 +272,7 @@ func ContinuousDiscovery() { go inst.ExpireHostnameUnresolve() go inst.ExpireClusterDomainName() go inst.ExpireAudit() - go inst.ExpireMasterPositionEquivalence() + go inst.ExpireMainPositionEquivalence() go inst.ExpirePoolInstances() go inst.FlushNontrivialResolveCacheToDatabase() go process.ExpireNodesHistory() diff --git a/go/logic/topology_recovery.go b/go/logic/topology_recovery.go index e210efe3..69be9e07 100644 --- a/go/logic/topology_recovery.go +++ b/go/logic/topology_recovery.go @@ -51,7 +51,7 @@ type TopologyRecovery struct { SuccessorAlias string IsActive bool IsSuccessful bool - LostSlaves inst.InstanceKeyMap + LostSubordinates inst.InstanceKeyMap ParticipatingInstanceKeys inst.InstanceKeyMap AllErrors []string RecoveryStartTimestamp string @@ -71,7 +71,7 @@ func NewTopologyRecovery(replicationAnalysis inst.ReplicationAnalysis) *Topology topologyRecovery := &TopologyRecovery{} topologyRecovery.AnalysisEntry = replicationAnalysis topologyRecovery.SuccessorKey = nil - topologyRecovery.LostSlaves = *inst.NewInstanceKeyMap() + topologyRecovery.LostSubordinates = *inst.NewInstanceKeyMap() topologyRecovery.ParticipatingInstanceKeys = *inst.NewInstanceKeyMap() topologyRecovery.AllErrors = []string{} topologyRecovery.PostponedFunctions = [](func() error){} @@ -91,57 +91,57 @@ func (this *TopologyRecovery) AddErrors(errs []error) { } } -type MasterRecoveryType string +type MainRecoveryType string const ( - MasterRecoveryGTID MasterRecoveryType = "MasterRecoveryGTID" - MasterRecoveryPseudoGTID = "MasterRecoveryPseudoGTID" - MasterRecoveryBinlogServer = "MasterRecoveryBinlogServer" + MainRecoveryGTID MainRecoveryType = "MainRecoveryGTID" + MainRecoveryPseudoGTID = "MainRecoveryPseudoGTID" + MainRecoveryBinlogServer = "MainRecoveryBinlogServer" ) -var emptySlavesList [](*inst.Instance) +var emptySubordinatesList [](*inst.Instance) var emergencyReadTopologyInstanceMap = cache.New(time.Duration(config.Config.InstancePollSeconds)*time.Second, time.Second) -// InstancesByCountSlaves sorts instances by umber of slaves, descending -type InstancesByCountSlaves [](*inst.Instance) +// InstancesByCountSubordinates sorts instances by umber of subordinates, descending +type InstancesByCountSubordinates [](*inst.Instance) -func (this InstancesByCountSlaves) Len() int { return len(this) } -func (this InstancesByCountSlaves) Swap(i, j int) { this[i], this[j] = this[j], this[i] } -func (this InstancesByCountSlaves) Less(i, j int) bool { - if len(this[i].SlaveHosts) == len(this[j].SlaveHosts) { - // Secondary sorting: prefer more advanced slaves +func (this InstancesByCountSubordinates) Len() int { return len(this) } +func (this InstancesByCountSubordinates) Swap(i, j int) { this[i], this[j] = this[j], this[i] } +func (this InstancesByCountSubordinates) Less(i, j int) bool { + if len(this[i].SubordinateHosts) == len(this[j].SubordinateHosts) { + // Secondary sorting: prefer more advanced subordinates return !this[i].ExecBinlogCoordinates.SmallerThan(&this[j].ExecBinlogCoordinates) } - return len(this[i].SlaveHosts) < len(this[j].SlaveHosts) + return len(this[i].SubordinateHosts) < len(this[j].SubordinateHosts) } -var recoverDeadMasterCounter = metrics.NewCounter() -var recoverDeadMasterSuccessCounter = metrics.NewCounter() -var recoverDeadMasterFailureCounter = metrics.NewCounter() -var recoverDeadIntermediateMasterCounter = metrics.NewCounter() -var recoverDeadIntermediateMasterSuccessCounter = metrics.NewCounter() -var recoverDeadIntermediateMasterFailureCounter = metrics.NewCounter() -var recoverDeadCoMasterCounter = metrics.NewCounter() -var recoverDeadCoMasterSuccessCounter = metrics.NewCounter() -var recoverDeadCoMasterFailureCounter = metrics.NewCounter() -var recoverUnreachableMasterWithStaleSlavesCounter = metrics.NewCounter() -var recoverUnreachableMasterWithStaleSlavesSuccessCounter = metrics.NewCounter() -var recoverUnreachableMasterWithStaleSlavesFailureCounter = metrics.NewCounter() +var recoverDeadMainCounter = metrics.NewCounter() +var recoverDeadMainSuccessCounter = metrics.NewCounter() +var recoverDeadMainFailureCounter = metrics.NewCounter() +var recoverDeadIntermediateMainCounter = metrics.NewCounter() +var recoverDeadIntermediateMainSuccessCounter = metrics.NewCounter() +var recoverDeadIntermediateMainFailureCounter = metrics.NewCounter() +var recoverDeadCoMainCounter = metrics.NewCounter() +var recoverDeadCoMainSuccessCounter = metrics.NewCounter() +var recoverDeadCoMainFailureCounter = metrics.NewCounter() +var recoverUnreachableMainWithStaleSubordinatesCounter = metrics.NewCounter() +var recoverUnreachableMainWithStaleSubordinatesSuccessCounter = metrics.NewCounter() +var recoverUnreachableMainWithStaleSubordinatesFailureCounter = metrics.NewCounter() func init() { - metrics.Register("recover.dead_master.start", recoverDeadMasterCounter) - metrics.Register("recover.dead_master.success", recoverDeadMasterSuccessCounter) - metrics.Register("recover.dead_master.fail", recoverDeadMasterFailureCounter) - metrics.Register("recover.dead_intermediate_master.start", recoverDeadIntermediateMasterCounter) - metrics.Register("recover.dead_intermediate_master.success", recoverDeadIntermediateMasterSuccessCounter) - metrics.Register("recover.dead_intermediate_master.fail", recoverDeadIntermediateMasterFailureCounter) - metrics.Register("recover.dead_co_master.start", recoverDeadCoMasterCounter) - metrics.Register("recover.dead_co_master.success", recoverDeadCoMasterSuccessCounter) - metrics.Register("recover.dead_co_master.fail", recoverDeadCoMasterFailureCounter) - metrics.Register("recover.unreach_master_stale_slaves.start", recoverUnreachableMasterWithStaleSlavesCounter) - metrics.Register("recover.unreach_master_stale_slaves.success", recoverUnreachableMasterWithStaleSlavesSuccessCounter) - metrics.Register("recover.unreach_master_stale_slaves.fail", recoverUnreachableMasterWithStaleSlavesFailureCounter) + metrics.Register("recover.dead_main.start", recoverDeadMainCounter) + metrics.Register("recover.dead_main.success", recoverDeadMainSuccessCounter) + metrics.Register("recover.dead_main.fail", recoverDeadMainFailureCounter) + metrics.Register("recover.dead_intermediate_main.start", recoverDeadIntermediateMainCounter) + metrics.Register("recover.dead_intermediate_main.success", recoverDeadIntermediateMainSuccessCounter) + metrics.Register("recover.dead_intermediate_main.fail", recoverDeadIntermediateMainFailureCounter) + metrics.Register("recover.dead_co_main.start", recoverDeadCoMainCounter) + metrics.Register("recover.dead_co_main.success", recoverDeadCoMainSuccessCounter) + metrics.Register("recover.dead_co_main.fail", recoverDeadCoMainFailureCounter) + metrics.Register("recover.unreach_main_stale_subordinates.start", recoverUnreachableMainWithStaleSubordinatesCounter) + metrics.Register("recover.unreach_main_stale_subordinates.success", recoverUnreachableMainWithStaleSubordinatesSuccessCounter) + metrics.Register("recover.unreach_main_stale_subordinates.fail", recoverUnreachableMainWithStaleSubordinatesFailureCounter) } // replaceCommandPlaceholders replaces agreed-upon placeholders with analysis data @@ -154,10 +154,10 @@ func replaceCommandPlaceholders(command string, topologyRecovery *TopologyRecove command = strings.Replace(command, "{failureCluster}", analysisEntry.ClusterDetails.ClusterName, -1) command = strings.Replace(command, "{failureClusterAlias}", analysisEntry.ClusterDetails.ClusterAlias, -1) command = strings.Replace(command, "{failureClusterDomain}", analysisEntry.ClusterDetails.ClusterDomain, -1) - command = strings.Replace(command, "{countSlaves}", fmt.Sprintf("%d", analysisEntry.CountSlaves), -1) + command = strings.Replace(command, "{countSubordinates}", fmt.Sprintf("%d", analysisEntry.CountSubordinates), -1) command = strings.Replace(command, "{isDowntimed}", fmt.Sprint(analysisEntry.IsDowntimed), -1) - command = strings.Replace(command, "{autoMasterRecovery}", fmt.Sprint(analysisEntry.ClusterDetails.HasAutomatedMasterRecovery), -1) - command = strings.Replace(command, "{autoIntermediateMasterRecovery}", fmt.Sprint(analysisEntry.ClusterDetails.HasAutomatedIntermediateMasterRecovery), -1) + command = strings.Replace(command, "{autoMainRecovery}", fmt.Sprint(analysisEntry.ClusterDetails.HasAutomatedMainRecovery), -1) + command = strings.Replace(command, "{autoIntermediateMainRecovery}", fmt.Sprint(analysisEntry.ClusterDetails.HasAutomatedIntermediateMainRecovery), -1) command = strings.Replace(command, "{orchestratorHost}", process.ThisHostname, -1) command = strings.Replace(command, "{isSuccessful}", fmt.Sprint(topologyRecovery.SuccessorKey != nil), -1) @@ -169,8 +169,8 @@ func replaceCommandPlaceholders(command string, topologyRecovery *TopologyRecove command = strings.Replace(command, "{successorAlias}", topologyRecovery.SuccessorAlias, -1) } - command = strings.Replace(command, "{lostSlaves}", topologyRecovery.LostSlaves.ToCommaDelimitedList(), -1) - command = strings.Replace(command, "{slaveHosts}", analysisEntry.SlaveHosts.ToCommaDelimitedList(), -1) + command = strings.Replace(command, "{lostSubordinates}", topologyRecovery.LostSubordinates.ToCommaDelimitedList(), -1) + command = strings.Replace(command, "{subordinateHosts}", analysisEntry.SubordinateHosts.ToCommaDelimitedList(), -1) return command } @@ -197,341 +197,341 @@ func executeProcesses(processes []string, description string, topologyRecovery * return err } -func recoverDeadMasterInBinlogServerTopology(topologyRecovery *TopologyRecovery) (promotedSlave *inst.Instance, err error) { - failedMasterKey := &topologyRecovery.AnalysisEntry.AnalyzedInstanceKey +func recoverDeadMainInBinlogServerTopology(topologyRecovery *TopologyRecovery) (promotedSubordinate *inst.Instance, err error) { + failedMainKey := &topologyRecovery.AnalysisEntry.AnalyzedInstanceKey var promotedBinlogServer *inst.Instance - _, promotedBinlogServer, err = inst.RegroupSlavesBinlogServers(failedMasterKey, true) + _, promotedBinlogServer, err = inst.RegroupSubordinatesBinlogServers(failedMainKey, true) if err != nil { return nil, log.Errore(err) } - promotedBinlogServer, err = inst.StopSlave(&promotedBinlogServer.Key) + promotedBinlogServer, err = inst.StopSubordinate(&promotedBinlogServer.Key) if err != nil { - return promotedSlave, log.Errore(err) + return promotedSubordinate, log.Errore(err) } - // Find candidate slave - promotedSlave, err = inst.GetCandidateSlaveOfBinlogServerTopology(&promotedBinlogServer.Key) + // Find candidate subordinate + promotedSubordinate, err = inst.GetCandidateSubordinateOfBinlogServerTopology(&promotedBinlogServer.Key) if err != nil { - return promotedSlave, log.Errore(err) + return promotedSubordinate, log.Errore(err) } // Align it with binlog server coordinates - promotedSlave, err = inst.StopSlave(&promotedSlave.Key) + promotedSubordinate, err = inst.StopSubordinate(&promotedSubordinate.Key) if err != nil { - return promotedSlave, log.Errore(err) + return promotedSubordinate, log.Errore(err) } - promotedSlave, err = inst.StartSlaveUntilMasterCoordinates(&promotedSlave.Key, &promotedBinlogServer.ExecBinlogCoordinates) + promotedSubordinate, err = inst.StartSubordinateUntilMainCoordinates(&promotedSubordinate.Key, &promotedBinlogServer.ExecBinlogCoordinates) if err != nil { - return promotedSlave, log.Errore(err) + return promotedSubordinate, log.Errore(err) } - promotedSlave, err = inst.StopSlave(&promotedSlave.Key) + promotedSubordinate, err = inst.StopSubordinate(&promotedSubordinate.Key) if err != nil { - return promotedSlave, log.Errore(err) + return promotedSubordinate, log.Errore(err) } // Detach, flush binary logs forward - promotedSlave, err = inst.ResetSlave(&promotedSlave.Key) + promotedSubordinate, err = inst.ResetSubordinate(&promotedSubordinate.Key) if err != nil { - return promotedSlave, log.Errore(err) + return promotedSubordinate, log.Errore(err) } - promotedSlave, err = inst.FlushBinaryLogsTo(&promotedSlave.Key, promotedBinlogServer.ExecBinlogCoordinates.LogFile) + promotedSubordinate, err = inst.FlushBinaryLogsTo(&promotedSubordinate.Key, promotedBinlogServer.ExecBinlogCoordinates.LogFile) if err != nil { - return promotedSlave, log.Errore(err) + return promotedSubordinate, log.Errore(err) } - promotedSlave, err = inst.FlushBinaryLogs(&promotedSlave.Key, 1) + promotedSubordinate, err = inst.FlushBinaryLogs(&promotedSubordinate.Key, 1) if err != nil { - return promotedSlave, log.Errore(err) + return promotedSubordinate, log.Errore(err) } - promotedSlave, err = inst.PurgeBinaryLogsToCurrent(&promotedSlave.Key) + promotedSubordinate, err = inst.PurgeBinaryLogsToCurrent(&promotedSubordinate.Key) if err != nil { - return promotedSlave, log.Errore(err) + return promotedSubordinate, log.Errore(err) } - // Reconnect binlog servers to promoted slave (now master): + // Reconnect binlog servers to promoted subordinate (now main): promotedBinlogServer, err = inst.SkipToNextBinaryLog(&promotedBinlogServer.Key) if err != nil { - return promotedSlave, log.Errore(err) + return promotedSubordinate, log.Errore(err) } - promotedBinlogServer, err = inst.Repoint(&promotedBinlogServer.Key, &promotedSlave.Key, inst.GTIDHintDeny) + promotedBinlogServer, err = inst.Repoint(&promotedBinlogServer.Key, &promotedSubordinate.Key, inst.GTIDHintDeny) if err != nil { return nil, log.Errore(err) } func() { - // Move binlog server slaves up to replicate from master. + // Move binlog server subordinates up to replicate from main. // This can only be done once a BLS has skipped to the next binlog - // We postpone this operation. The master is already promoted and we're happy. - binlogServerSlaves, err := inst.ReadBinlogServerSlaveInstances(&promotedBinlogServer.Key) + // We postpone this operation. The main is already promoted and we're happy. + binlogServerSubordinates, err := inst.ReadBinlogServerSubordinateInstances(&promotedBinlogServer.Key) if err != nil { return } maxBinlogServersToPromote := 3 - for i, binlogServerSlave := range binlogServerSlaves { - binlogServerSlave := binlogServerSlave + for i, binlogServerSubordinate := range binlogServerSubordinates { + binlogServerSubordinate := binlogServerSubordinate if i >= maxBinlogServersToPromote { return } postponedFunction := func() error { - binlogServerSlave, err := inst.StopSlave(&binlogServerSlave.Key) + binlogServerSubordinate, err := inst.StopSubordinate(&binlogServerSubordinate.Key) if err != nil { return err } - // Make sure the BLS has the "next binlog" -- the one the master flushed & purged to. Otherwise the BLS - // will request a binlog the master does not have - if binlogServerSlave.ExecBinlogCoordinates.SmallerThan(&promotedBinlogServer.ExecBinlogCoordinates) { - binlogServerSlave, err = inst.StartSlaveUntilMasterCoordinates(&binlogServerSlave.Key, &promotedBinlogServer.ExecBinlogCoordinates) + // Make sure the BLS has the "next binlog" -- the one the main flushed & purged to. Otherwise the BLS + // will request a binlog the main does not have + if binlogServerSubordinate.ExecBinlogCoordinates.SmallerThan(&promotedBinlogServer.ExecBinlogCoordinates) { + binlogServerSubordinate, err = inst.StartSubordinateUntilMainCoordinates(&binlogServerSubordinate.Key, &promotedBinlogServer.ExecBinlogCoordinates) if err != nil { return err } } - _, err = inst.Repoint(&binlogServerSlave.Key, &promotedSlave.Key, inst.GTIDHintDeny) + _, err = inst.Repoint(&binlogServerSubordinate.Key, &promotedSubordinate.Key, inst.GTIDHintDeny) return err } topologyRecovery.AddPostponedFunction(postponedFunction) } }() - return promotedSlave, err + return promotedSubordinate, err } -// RecoverDeadMaster recovers a dead master, complete logic inside -func RecoverDeadMaster(topologyRecovery *TopologyRecovery, skipProcesses bool) (promotedSlave *inst.Instance, lostSlaves [](*inst.Instance), err error) { +// RecoverDeadMain recovers a dead main, complete logic inside +func RecoverDeadMain(topologyRecovery *TopologyRecovery, skipProcesses bool) (promotedSubordinate *inst.Instance, lostSubordinates [](*inst.Instance), err error) { analysisEntry := &topologyRecovery.AnalysisEntry failedInstanceKey := &analysisEntry.AnalyzedInstanceKey - var cannotReplicateSlaves [](*inst.Instance) + var cannotReplicateSubordinates [](*inst.Instance) - inst.AuditOperation("recover-dead-master", failedInstanceKey, "problem found; will recover") + inst.AuditOperation("recover-dead-main", failedInstanceKey, "problem found; will recover") if !skipProcesses { if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", topologyRecovery, true); err != nil { - return nil, lostSlaves, topologyRecovery.AddError(err) + return nil, lostSubordinates, topologyRecovery.AddError(err) } } - log.Debugf("topology_recovery: RecoverDeadMaster: will recover %+v", *failedInstanceKey) + log.Debugf("topology_recovery: RecoverDeadMain: will recover %+v", *failedInstanceKey) - var masterRecoveryType MasterRecoveryType = MasterRecoveryPseudoGTID + var mainRecoveryType MainRecoveryType = MainRecoveryPseudoGTID if analysisEntry.OracleGTIDImmediateTopology || analysisEntry.MariaDBGTIDImmediateTopology { - masterRecoveryType = MasterRecoveryGTID + mainRecoveryType = MainRecoveryGTID } else if analysisEntry.BinlogServerImmediateTopology { - masterRecoveryType = MasterRecoveryBinlogServer + mainRecoveryType = MainRecoveryBinlogServer } - log.Debugf("topology_recovery: RecoverDeadMaster: masterRecoveryType=%+v", masterRecoveryType) + log.Debugf("topology_recovery: RecoverDeadMain: mainRecoveryType=%+v", mainRecoveryType) - switch masterRecoveryType { - case MasterRecoveryGTID: + switch mainRecoveryType { + case MainRecoveryGTID: { - lostSlaves, _, cannotReplicateSlaves, promotedSlave, err = inst.RegroupSlavesGTID(failedInstanceKey, true, nil) + lostSubordinates, _, cannotReplicateSubordinates, promotedSubordinate, err = inst.RegroupSubordinatesGTID(failedInstanceKey, true, nil) } - case MasterRecoveryPseudoGTID: + case MainRecoveryPseudoGTID: { - lostSlaves, _, _, cannotReplicateSlaves, promotedSlave, err = inst.RegroupSlavesPseudoGTIDIncludingSubSlavesOfBinlogServers(failedInstanceKey, true, nil, &topologyRecovery.PostponedFunctionsContainer) + lostSubordinates, _, _, cannotReplicateSubordinates, promotedSubordinate, err = inst.RegroupSubordinatesPseudoGTIDIncludingSubSubordinatesOfBinlogServers(failedInstanceKey, true, nil, &topologyRecovery.PostponedFunctionsContainer) } - case MasterRecoveryBinlogServer: + case MainRecoveryBinlogServer: { - promotedSlave, err = recoverDeadMasterInBinlogServerTopology(topologyRecovery) + promotedSubordinate, err = recoverDeadMainInBinlogServerTopology(topologyRecovery) } } topologyRecovery.AddError(err) - lostSlaves = append(lostSlaves, cannotReplicateSlaves...) + lostSubordinates = append(lostSubordinates, cannotReplicateSubordinates...) - if promotedSlave != nil && len(lostSlaves) > 0 && config.Config.DetachLostSlavesAfterMasterFailover { + if promotedSubordinate != nil && len(lostSubordinates) > 0 && config.Config.DetachLostSubordinatesAfterMainFailover { postponedFunction := func() error { - log.Debugf("topology_recovery: - RecoverDeadMaster: lost %+v slaves during recovery process; detaching them", len(lostSlaves)) - for _, slave := range lostSlaves { - slave := slave - inst.DetachSlaveOperation(&slave.Key) + log.Debugf("topology_recovery: - RecoverDeadMain: lost %+v subordinates during recovery process; detaching them", len(lostSubordinates)) + for _, subordinate := range lostSubordinates { + subordinate := subordinate + inst.DetachSubordinateOperation(&subordinate.Key) } return nil } topologyRecovery.AddPostponedFunction(postponedFunction) } - if config.Config.MasterFailoverLostInstancesDowntimeMinutes > 0 { + if config.Config.MainFailoverLostInstancesDowntimeMinutes > 0 { postponedFunction := func() error { - inst.BeginDowntime(failedInstanceKey, inst.GetMaintenanceOwner(), inst.DowntimeLostInRecoveryMessage, config.Config.MasterFailoverLostInstancesDowntimeMinutes*60) - for _, slave := range lostSlaves { - slave := slave - inst.BeginDowntime(&slave.Key, inst.GetMaintenanceOwner(), inst.DowntimeLostInRecoveryMessage, config.Config.MasterFailoverLostInstancesDowntimeMinutes*60) + inst.BeginDowntime(failedInstanceKey, inst.GetMaintenanceOwner(), inst.DowntimeLostInRecoveryMessage, config.Config.MainFailoverLostInstancesDowntimeMinutes*60) + for _, subordinate := range lostSubordinates { + subordinate := subordinate + inst.BeginDowntime(&subordinate.Key, inst.GetMaintenanceOwner(), inst.DowntimeLostInRecoveryMessage, config.Config.MainFailoverLostInstancesDowntimeMinutes*60) } return nil } topologyRecovery.AddPostponedFunction(postponedFunction) } - if promotedSlave == nil { - inst.AuditOperation("recover-dead-master", failedInstanceKey, "Failure: no slave promoted.") + if promotedSubordinate == nil { + inst.AuditOperation("recover-dead-main", failedInstanceKey, "Failure: no subordinate promoted.") } else { - inst.AuditOperation("recover-dead-master", failedInstanceKey, fmt.Sprintf("promoted slave: %+v", promotedSlave.Key)) + inst.AuditOperation("recover-dead-main", failedInstanceKey, fmt.Sprintf("promoted subordinate: %+v", promotedSubordinate.Key)) } - return promotedSlave, lostSlaves, err + return promotedSubordinate, lostSubordinates, err } -// replacePromotedSlaveWithCandidate is called after an intermediate master has died and been replaced by some promotedSlave. -// But, is there an even better slave to promote? -// if candidateInstanceKey is given, then it is forced to be promoted over the promotedSlave +// replacePromotedSubordinateWithCandidate is called after an intermediate main has died and been replaced by some promotedSubordinate. +// But, is there an even better subordinate to promote? +// if candidateInstanceKey is given, then it is forced to be promoted over the promotedSubordinate // Otherwise, search for the best to promote! -func replacePromotedSlaveWithCandidate(deadInstanceKey *inst.InstanceKey, promotedSlave *inst.Instance, candidateInstanceKey *inst.InstanceKey) (*inst.Instance, error) { - candidateSlaves, _ := inst.ReadClusterCandidateInstances(promotedSlave.ClusterName) - // So we've already promoted a slave. - // However, can we improve on our choice? Are there any slaves marked with "is_candidate"? - // Maybe we actually promoted such a slave. Does that mean we should keep it? +func replacePromotedSubordinateWithCandidate(deadInstanceKey *inst.InstanceKey, promotedSubordinate *inst.Instance, candidateInstanceKey *inst.InstanceKey) (*inst.Instance, error) { + candidateSubordinates, _ := inst.ReadClusterCandidateInstances(promotedSubordinate.ClusterName) + // So we've already promoted a subordinate. + // However, can we improve on our choice? Are there any subordinates marked with "is_candidate"? + // Maybe we actually promoted such a subordinate. Does that mean we should keep it? // The current logic is: - // - 1. we prefer to promote a "is_candidate" which is in the same DC & env as the dead intermediate master (or do nothing if the promtoed slave is such one) - // - 2. we prefer to promote a "is_candidate" which is in the same DC & env as the promoted slave (or do nothing if the promtoed slave is such one) + // - 1. we prefer to promote a "is_candidate" which is in the same DC & env as the dead intermediate main (or do nothing if the promtoed subordinate is such one) + // - 2. we prefer to promote a "is_candidate" which is in the same DC & env as the promoted subordinate (or do nothing if the promtoed subordinate is such one) // - 3. keep to current choice - log.Infof("topology_recovery: checking if should replace promoted slave with a better candidate") + log.Infof("topology_recovery: checking if should replace promoted subordinate with a better candidate") if candidateInstanceKey == nil { if deadInstance, _, err := inst.ReadInstance(deadInstanceKey); err == nil && deadInstance != nil { - for _, candidateSlave := range candidateSlaves { - if promotedSlave.Key.Equals(&candidateSlave.Key) && - promotedSlave.DataCenter == deadInstance.DataCenter && - promotedSlave.PhysicalEnvironment == deadInstance.PhysicalEnvironment { + for _, candidateSubordinate := range candidateSubordinates { + if promotedSubordinate.Key.Equals(&candidateSubordinate.Key) && + promotedSubordinate.DataCenter == deadInstance.DataCenter && + promotedSubordinate.PhysicalEnvironment == deadInstance.PhysicalEnvironment { // Seems like we promoted a candidate in the same DC & ENV as dead IM! Ideal! We're happy! - log.Infof("topology_recovery: promoted slave %+v is the ideal candidate", promotedSlave.Key) - return promotedSlave, nil + log.Infof("topology_recovery: promoted subordinate %+v is the ideal candidate", promotedSubordinate.Key) + return promotedSubordinate, nil } } } } // We didn't pick the ideal candidate; let's see if we can replace with a candidate from same DC and ENV if candidateInstanceKey == nil { - // Try a candidate slave that is in same DC & env as the dead instance + // Try a candidate subordinate that is in same DC & env as the dead instance if deadInstance, _, err := inst.ReadInstance(deadInstanceKey); err == nil && deadInstance != nil { - for _, candidateSlave := range candidateSlaves { - if candidateSlave.DataCenter == deadInstance.DataCenter && - candidateSlave.PhysicalEnvironment == deadInstance.PhysicalEnvironment && - candidateSlave.MasterKey.Equals(&promotedSlave.Key) { + for _, candidateSubordinate := range candidateSubordinates { + if candidateSubordinate.DataCenter == deadInstance.DataCenter && + candidateSubordinate.PhysicalEnvironment == deadInstance.PhysicalEnvironment && + candidateSubordinate.MainKey.Equals(&promotedSubordinate.Key) { // This would make a great candidate - candidateInstanceKey = &candidateSlave.Key - log.Debugf("topology_recovery: no candidate was offered for %+v but orchestrator picks %+v as candidate replacement, based on being in same DC & env as failed instance", promotedSlave.Key, candidateSlave.Key) + candidateInstanceKey = &candidateSubordinate.Key + log.Debugf("topology_recovery: no candidate was offered for %+v but orchestrator picks %+v as candidate replacement, based on being in same DC & env as failed instance", promotedSubordinate.Key, candidateSubordinate.Key) } } } } if candidateInstanceKey == nil { - // We cannot find a candidate in same DC and ENV as dead master - for _, candidateSlave := range candidateSlaves { - if promotedSlave.Key.Equals(&candidateSlave.Key) { - // Seems like we promoted a candidate slave (though not in same DC and ENV as dead master). Good enough. + // We cannot find a candidate in same DC and ENV as dead main + for _, candidateSubordinate := range candidateSubordinates { + if promotedSubordinate.Key.Equals(&candidateSubordinate.Key) { + // Seems like we promoted a candidate subordinate (though not in same DC and ENV as dead main). Good enough. // No further action required. - log.Infof("topology_recovery: promoted slave %+v is a good candidate", promotedSlave.Key) - return promotedSlave, nil + log.Infof("topology_recovery: promoted subordinate %+v is a good candidate", promotedSubordinate.Key) + return promotedSubordinate, nil } } } // Still nothing? if candidateInstanceKey == nil { - // Try a candidate slave that is in same DC & env as the promoted slave (our promoted slave is not an "is_candidate") - for _, candidateSlave := range candidateSlaves { - if promotedSlave.DataCenter == candidateSlave.DataCenter && - promotedSlave.PhysicalEnvironment == candidateSlave.PhysicalEnvironment && - candidateSlave.MasterKey.Equals(&promotedSlave.Key) { + // Try a candidate subordinate that is in same DC & env as the promoted subordinate (our promoted subordinate is not an "is_candidate") + for _, candidateSubordinate := range candidateSubordinates { + if promotedSubordinate.DataCenter == candidateSubordinate.DataCenter && + promotedSubordinate.PhysicalEnvironment == candidateSubordinate.PhysicalEnvironment && + candidateSubordinate.MainKey.Equals(&promotedSubordinate.Key) { // OK, better than nothing - candidateInstanceKey = &candidateSlave.Key - log.Debugf("topology_recovery: no candidate was offered for %+v but orchestrator picks %+v as candidate replacement, based on being in same DC & env as promoted instance", promotedSlave.Key, candidateSlave.Key) + candidateInstanceKey = &candidateSubordinate.Key + log.Debugf("topology_recovery: no candidate was offered for %+v but orchestrator picks %+v as candidate replacement, based on being in same DC & env as promoted instance", promotedSubordinate.Key, candidateSubordinate.Key) } } } // So do we have a candidate? if candidateInstanceKey == nil { - // Found nothing. Stick with promoted slave - return promotedSlave, nil + // Found nothing. Stick with promoted subordinate + return promotedSubordinate, nil } - if promotedSlave.Key.Equals(candidateInstanceKey) { + if promotedSubordinate.Key.Equals(candidateInstanceKey) { // Sanity. It IS the candidate, nothing to promote... - return promotedSlave, nil + return promotedSubordinate, nil } // Try and promote suggested candidate, if applicable and possible - log.Debugf("topology_recovery: promoted instance %+v is not the suggested candidate %+v. Will see what can be done", promotedSlave.Key, *candidateInstanceKey) + log.Debugf("topology_recovery: promoted instance %+v is not the suggested candidate %+v. Will see what can be done", promotedSubordinate.Key, *candidateInstanceKey) candidateInstance, _, err := inst.ReadInstance(candidateInstanceKey) if err != nil { - return promotedSlave, log.Errore(err) + return promotedSubordinate, log.Errore(err) } - if candidateInstance.MasterKey.Equals(&promotedSlave.Key) { - log.Debugf("topology_recovery: suggested candidate %+v is slave of promoted instance %+v. Will try and enslave its master", *candidateInstanceKey, promotedSlave.Key) - candidateInstance, err = inst.EnslaveMaster(&candidateInstance.Key) + if candidateInstance.MainKey.Equals(&promotedSubordinate.Key) { + log.Debugf("topology_recovery: suggested candidate %+v is subordinate of promoted instance %+v. Will try and ensubordinate its main", *candidateInstanceKey, promotedSubordinate.Key) + candidateInstance, err = inst.EnsubordinateMain(&candidateInstance.Key) if err != nil { - return promotedSlave, log.Errore(err) + return promotedSubordinate, log.Errore(err) } - log.Debugf("topology_recovery: success promoting %+v over %+v", *candidateInstanceKey, promotedSlave.Key) + log.Debugf("topology_recovery: success promoting %+v over %+v", *candidateInstanceKey, promotedSubordinate.Key) return candidateInstance, nil } log.Debugf("topology_recovery: could not manage to promoted suggested candidate %+v", *candidateInstanceKey) - return promotedSlave, nil + return promotedSubordinate, nil } -// checkAndRecoverDeadMaster checks a given analysis, decides whether to take action, and possibly takes action +// checkAndRecoverDeadMain checks a given analysis, decides whether to take action, and possibly takes action // Returns true when action was taken. -func checkAndRecoverDeadMaster(analysisEntry inst.ReplicationAnalysis, candidateInstanceKey *inst.InstanceKey, forceInstanceRecovery bool, skipProcesses bool) (bool, *TopologyRecovery, error) { - if !(forceInstanceRecovery || analysisEntry.ClusterDetails.HasAutomatedMasterRecovery) { +func checkAndRecoverDeadMain(analysisEntry inst.ReplicationAnalysis, candidateInstanceKey *inst.InstanceKey, forceInstanceRecovery bool, skipProcesses bool) (bool, *TopologyRecovery, error) { + if !(forceInstanceRecovery || analysisEntry.ClusterDetails.HasAutomatedMainRecovery) { return false, nil, nil } topologyRecovery, err := AttemptRecoveryRegistration(&analysisEntry, !forceInstanceRecovery, !forceInstanceRecovery) if topologyRecovery == nil { - log.Debugf("topology_recovery: found an active or recent recovery on %+v. Will not issue another RecoverDeadMaster.", analysisEntry.AnalyzedInstanceKey) + log.Debugf("topology_recovery: found an active or recent recovery on %+v. Will not issue another RecoverDeadMain.", analysisEntry.AnalyzedInstanceKey) return false, nil, err } // That's it! We must do recovery! - log.Debugf("topology_recovery: will handle DeadMaster event on %+v", analysisEntry.ClusterDetails.ClusterName) - recoverDeadMasterCounter.Inc(1) - promotedSlave, lostSlaves, err := RecoverDeadMaster(topologyRecovery, skipProcesses) - topologyRecovery.LostSlaves.AddInstances(lostSlaves) + log.Debugf("topology_recovery: will handle DeadMain event on %+v", analysisEntry.ClusterDetails.ClusterName) + recoverDeadMainCounter.Inc(1) + promotedSubordinate, lostSubordinates, err := RecoverDeadMain(topologyRecovery, skipProcesses) + topologyRecovery.LostSubordinates.AddInstances(lostSubordinates) - if promotedSlave != nil { - promotedSlave, err = replacePromotedSlaveWithCandidate(&analysisEntry.AnalyzedInstanceKey, promotedSlave, candidateInstanceKey) + if promotedSubordinate != nil { + promotedSubordinate, err = replacePromotedSubordinateWithCandidate(&analysisEntry.AnalyzedInstanceKey, promotedSubordinate, candidateInstanceKey) topologyRecovery.AddError(err) } // And this is the end; whether successful or not, we're done. - ResolveRecovery(topologyRecovery, promotedSlave) - if promotedSlave != nil { + ResolveRecovery(topologyRecovery, promotedSubordinate) + if promotedSubordinate != nil { // Success! - recoverDeadMasterSuccessCounter.Inc(1) + recoverDeadMainSuccessCounter.Inc(1) - if config.Config.ApplyMySQLPromotionAfterMasterFailover { - log.Debugf("topology_recovery: - RecoverDeadMaster: will apply MySQL changes to promoted master") - inst.ResetSlaveOperation(&promotedSlave.Key) - inst.SetReadOnly(&promotedSlave.Key, false) + if config.Config.ApplyMySQLPromotionAfterMainFailover { + log.Debugf("topology_recovery: - RecoverDeadMain: will apply MySQL changes to promoted main") + inst.ResetSubordinateOperation(&promotedSubordinate.Key) + inst.SetReadOnly(&promotedSubordinate.Key, false) } if !skipProcesses { - // Execute post master-failover processes - executeProcesses(config.Config.PostMasterFailoverProcesses, "PostMasterFailoverProcesses", topologyRecovery, false) + // Execute post main-failover processes + executeProcesses(config.Config.PostMainFailoverProcesses, "PostMainFailoverProcesses", topologyRecovery, false) } - if config.Config.MasterFailoverDetachSlaveMasterHost { + if config.Config.MainFailoverDetachSubordinateMainHost { postponedFunction := func() error { - log.Debugf("topology_recovery: - RecoverDeadMaster: detaching master host on promoted master") - inst.DetachSlaveMasterHost(&promotedSlave.Key) + log.Debugf("topology_recovery: - RecoverDeadMain: detaching main host on promoted main") + inst.DetachSubordinateMainHost(&promotedSubordinate.Key) return nil } topologyRecovery.AddPostponedFunction(postponedFunction) } postponedFunction := func() error { - log.Debugf("topology_recovery: - RecoverDeadMaster: updating cluster_alias") - inst.ReplaceAliasClusterName(analysisEntry.AnalyzedInstanceKey.StringCode(), promotedSlave.Key.StringCode()) + log.Debugf("topology_recovery: - RecoverDeadMain: updating cluster_alias") + inst.ReplaceAliasClusterName(analysisEntry.AnalyzedInstanceKey.StringCode(), promotedSubordinate.Key.StringCode()) return nil } topologyRecovery.AddPostponedFunction(postponedFunction) - attributes.SetGeneralAttribute(analysisEntry.ClusterDetails.ClusterDomain, promotedSlave.Key.StringCode()) + attributes.SetGeneralAttribute(analysisEntry.ClusterDetails.ClusterDomain, promotedSubordinate.Key.StringCode()) } else { - recoverDeadMasterFailureCounter.Inc(1) + recoverDeadMainFailureCounter.Inc(1) } return true, topologyRecovery, err } -// isGeneralyValidAsCandidateSiblingOfIntermediateMaster sees that basic server configuration and state are valid -func isGeneralyValidAsCandidateSiblingOfIntermediateMaster(sibling *inst.Instance) bool { +// isGeneralyValidAsCandidateSiblingOfIntermediateMain sees that basic server configuration and state are valid +func isGeneralyValidAsCandidateSiblingOfIntermediateMain(sibling *inst.Instance) bool { if !sibling.LogBinEnabled { return false } - if !sibling.LogSlaveUpdatesEnabled { + if !sibling.LogSubordinateUpdatesEnabled { return false } - if !sibling.SlaveRunning() { + if !sibling.SubordinateRunning() { return false } if !sibling.IsLastCheckValid { @@ -540,55 +540,55 @@ func isGeneralyValidAsCandidateSiblingOfIntermediateMaster(sibling *inst.Instanc return true } -// isValidAsCandidateSiblingOfIntermediateMaster checks to see that the given sibling is capable to take over instance's slaves -func isValidAsCandidateSiblingOfIntermediateMaster(intermediateMasterInstance *inst.Instance, sibling *inst.Instance) bool { - if sibling.Key.Equals(&intermediateMasterInstance.Key) { +// isValidAsCandidateSiblingOfIntermediateMain checks to see that the given sibling is capable to take over instance's subordinates +func isValidAsCandidateSiblingOfIntermediateMain(intermediateMainInstance *inst.Instance, sibling *inst.Instance) bool { + if sibling.Key.Equals(&intermediateMainInstance.Key) { // same instance return false } - if !isGeneralyValidAsCandidateSiblingOfIntermediateMaster(sibling) { + if !isGeneralyValidAsCandidateSiblingOfIntermediateMain(sibling) { return false } - if sibling.HasReplicationFilters != intermediateMasterInstance.HasReplicationFilters { + if sibling.HasReplicationFilters != intermediateMainInstance.HasReplicationFilters { return false } - if sibling.IsBinlogServer() != intermediateMasterInstance.IsBinlogServer() { + if sibling.IsBinlogServer() != intermediateMainInstance.IsBinlogServer() { // When both are binlog servers, failover is trivial. - // When failed IM is binlog server, its sibling is still valid, but we catually prefer to just repoint the slave up -- simplest! + // When failed IM is binlog server, its sibling is still valid, but we catually prefer to just repoint the subordinate up -- simplest! return false } - if sibling.ExecBinlogCoordinates.SmallerThan(&intermediateMasterInstance.ExecBinlogCoordinates) { + if sibling.ExecBinlogCoordinates.SmallerThan(&intermediateMainInstance.ExecBinlogCoordinates) { return false } return true } -// GetCandidateSiblingOfIntermediateMaster chooses the best sibling of a dead intermediate master -// to whom the IM's slaves can be moved. -func GetCandidateSiblingOfIntermediateMaster(intermediateMasterInstance *inst.Instance) (*inst.Instance, error) { +// GetCandidateSiblingOfIntermediateMain chooses the best sibling of a dead intermediate main +// to whom the IM's subordinates can be moved. +func GetCandidateSiblingOfIntermediateMain(intermediateMainInstance *inst.Instance) (*inst.Instance, error) { - siblings, err := inst.ReadSlaveInstances(&intermediateMasterInstance.MasterKey) + siblings, err := inst.ReadSubordinateInstances(&intermediateMainInstance.MainKey) if err != nil { return nil, err } if len(siblings) <= 1 { - return nil, log.Errorf("topology_recovery: no siblings found for %+v", intermediateMasterInstance.Key) + return nil, log.Errorf("topology_recovery: no siblings found for %+v", intermediateMainInstance.Key) } - sort.Sort(sort.Reverse(InstancesByCountSlaves(siblings))) + sort.Sort(sort.Reverse(InstancesByCountSubordinates(siblings))) // In the next series of steps we attempt to return a good replacement. // None of the below attempts is sure to pick a winning server. Perhaps picked server is not enough up-todate -- but // this has small likelihood in the general case, and, well, it's an attempt. It's a Plan A, but we have Plan B & C if this fails. // At first, we try to return an "is_candidate" server in same dc & env - log.Infof("topology_recovery: searching for the best candidate sibling of dead intermediate master") + log.Infof("topology_recovery: searching for the best candidate sibling of dead intermediate main") for _, sibling := range siblings { sibling := sibling - if isValidAsCandidateSiblingOfIntermediateMaster(intermediateMasterInstance, sibling) && + if isValidAsCandidateSiblingOfIntermediateMain(intermediateMainInstance, sibling) && sibling.IsCandidate && - sibling.DataCenter == intermediateMasterInstance.DataCenter && - sibling.PhysicalEnvironment == intermediateMasterInstance.PhysicalEnvironment { + sibling.DataCenter == intermediateMainInstance.DataCenter && + sibling.PhysicalEnvironment == intermediateMainInstance.PhysicalEnvironment { log.Infof("topology_recovery: found %+v as the ideal candidate", sibling.Key) return sibling, nil } @@ -596,9 +596,9 @@ func GetCandidateSiblingOfIntermediateMaster(intermediateMasterInstance *inst.In // Go for something else in the same DC & ENV for _, sibling := range siblings { sibling := sibling - if isValidAsCandidateSiblingOfIntermediateMaster(intermediateMasterInstance, sibling) && - sibling.DataCenter == intermediateMasterInstance.DataCenter && - sibling.PhysicalEnvironment == intermediateMasterInstance.PhysicalEnvironment { + if isValidAsCandidateSiblingOfIntermediateMain(intermediateMainInstance, sibling) && + sibling.DataCenter == intermediateMainInstance.DataCenter && + sibling.PhysicalEnvironment == intermediateMainInstance.PhysicalEnvironment { log.Infof("topology_recovery: found %+v as a replacement in same dc & environment", sibling.Key) return sibling, nil } @@ -606,7 +606,7 @@ func GetCandidateSiblingOfIntermediateMaster(intermediateMasterInstance *inst.In // Nothing in same DC & env, let's just go for some is_candidate for _, sibling := range siblings { sibling := sibling - if isValidAsCandidateSiblingOfIntermediateMaster(intermediateMasterInstance, sibling) && sibling.IsCandidate { + if isValidAsCandidateSiblingOfIntermediateMain(intermediateMainInstance, sibling) && sibling.IsCandidate { log.Infof("topology_recovery: found %+v as a good candidate", sibling.Key) return sibling, nil } @@ -614,99 +614,99 @@ func GetCandidateSiblingOfIntermediateMaster(intermediateMasterInstance *inst.In // Havent found an "is_candidate". Just whatever is valid. for _, sibling := range siblings { sibling := sibling - if isValidAsCandidateSiblingOfIntermediateMaster(intermediateMasterInstance, sibling) { + if isValidAsCandidateSiblingOfIntermediateMain(intermediateMainInstance, sibling) { log.Infof("topology_recovery: found %+v as a replacement", sibling.Key) return sibling, nil } } - return nil, log.Errorf("topology_recovery: cannot find candidate sibling of %+v", intermediateMasterInstance.Key) + return nil, log.Errorf("topology_recovery: cannot find candidate sibling of %+v", intermediateMainInstance.Key) } -// RecoverDeadIntermediateMaster performs intermediate master recovery; complete logic inside -func RecoverDeadIntermediateMaster(topologyRecovery *TopologyRecovery, skipProcesses bool) (successorInstance *inst.Instance, err error) { +// RecoverDeadIntermediateMain performs intermediate main recovery; complete logic inside +func RecoverDeadIntermediateMain(topologyRecovery *TopologyRecovery, skipProcesses bool) (successorInstance *inst.Instance, err error) { analysisEntry := &topologyRecovery.AnalysisEntry failedInstanceKey := &analysisEntry.AnalyzedInstanceKey recoveryResolved := false - inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, "problem found; will recover") + inst.AuditOperation("recover-dead-intermediate-main", failedInstanceKey, "problem found; will recover") if !skipProcesses { if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", topologyRecovery, true); err != nil { return nil, topologyRecovery.AddError(err) } } - intermediateMasterInstance, _, err := inst.ReadInstance(failedInstanceKey) + intermediateMainInstance, _, err := inst.ReadInstance(failedInstanceKey) if err != nil { return nil, topologyRecovery.AddError(err) } // Find possible candidate - candidateSiblingOfIntermediateMaster, err := GetCandidateSiblingOfIntermediateMaster(intermediateMasterInstance) - relocateSlavesToCandidateSibling := func() { - if candidateSiblingOfIntermediateMaster == nil { + candidateSiblingOfIntermediateMain, err := GetCandidateSiblingOfIntermediateMain(intermediateMainInstance) + relocateSubordinatesToCandidateSibling := func() { + if candidateSiblingOfIntermediateMain == nil { return } // We have a candidate - log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will attempt a candidate intermediate master: %+v", candidateSiblingOfIntermediateMaster.Key) - relocatedSlaves, candidateSibling, err, errs := inst.RelocateSlaves(failedInstanceKey, &candidateSiblingOfIntermediateMaster.Key, "") + log.Debugf("topology_recovery: - RecoverDeadIntermediateMain: will attempt a candidate intermediate main: %+v", candidateSiblingOfIntermediateMain.Key) + relocatedSubordinates, candidateSibling, err, errs := inst.RelocateSubordinates(failedInstanceKey, &candidateSiblingOfIntermediateMain.Key, "") topologyRecovery.AddErrors(errs) - topologyRecovery.ParticipatingInstanceKeys.AddKey(candidateSiblingOfIntermediateMaster.Key) + topologyRecovery.ParticipatingInstanceKeys.AddKey(candidateSiblingOfIntermediateMain.Key) - if len(relocatedSlaves) == 0 { - log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: failed to move any slave to candidate intermediate master (%+v)", candidateSibling.Key) + if len(relocatedSubordinates) == 0 { + log.Debugf("topology_recovery: - RecoverDeadIntermediateMain: failed to move any subordinate to candidate intermediate main (%+v)", candidateSibling.Key) return } if err != nil || len(errs) > 0 { - log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: move to candidate intermediate master (%+v) did not complete: %+v", candidateSibling.Key, err) + log.Debugf("topology_recovery: - RecoverDeadIntermediateMain: move to candidate intermediate main (%+v) did not complete: %+v", candidateSibling.Key, err) return } if err == nil { recoveryResolved = true successorInstance = candidateSibling - inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Relocated %d slaves under candidate sibling: %+v; %d errors: %+v", len(relocatedSlaves), candidateSibling.Key, len(errs), errs)) + inst.AuditOperation("recover-dead-intermediate-main", failedInstanceKey, fmt.Sprintf("Relocated %d subordinates under candidate sibling: %+v; %d errors: %+v", len(relocatedSubordinates), candidateSibling.Key, len(errs), errs)) } } - // Plan A: find a replacement intermediate master in same Data Center - if candidateSiblingOfIntermediateMaster != nil && candidateSiblingOfIntermediateMaster.DataCenter == intermediateMasterInstance.DataCenter { - relocateSlavesToCandidateSibling() + // Plan A: find a replacement intermediate main in same Data Center + if candidateSiblingOfIntermediateMain != nil && candidateSiblingOfIntermediateMain.DataCenter == intermediateMainInstance.DataCenter { + relocateSubordinatesToCandidateSibling() } if !recoveryResolved { - log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will next attempt regrouping of slaves") + log.Debugf("topology_recovery: - RecoverDeadIntermediateMain: will next attempt regrouping of subordinates") // Plan B: regroup (we wish to reduce cross-DC replication streams) - _, _, _, _, regroupPromotedSlave, err := inst.RegroupSlaves(failedInstanceKey, true, nil, nil) + _, _, _, _, regroupPromotedSubordinate, err := inst.RegroupSubordinates(failedInstanceKey, true, nil, nil) if err != nil { topologyRecovery.AddError(err) - log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: regroup failed on: %+v", err) + log.Debugf("topology_recovery: - RecoverDeadIntermediateMain: regroup failed on: %+v", err) } - if regroupPromotedSlave != nil { - topologyRecovery.ParticipatingInstanceKeys.AddKey(regroupPromotedSlave.Key) + if regroupPromotedSubordinate != nil { + topologyRecovery.ParticipatingInstanceKeys.AddKey(regroupPromotedSubordinate.Key) } - // Plan C: try replacement intermediate master in other DC... - if candidateSiblingOfIntermediateMaster != nil && candidateSiblingOfIntermediateMaster.DataCenter != intermediateMasterInstance.DataCenter { - log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will next attempt relocating to another DC server") - relocateSlavesToCandidateSibling() + // Plan C: try replacement intermediate main in other DC... + if candidateSiblingOfIntermediateMain != nil && candidateSiblingOfIntermediateMain.DataCenter != intermediateMainInstance.DataCenter { + log.Debugf("topology_recovery: - RecoverDeadIntermediateMain: will next attempt relocating to another DC server") + relocateSubordinatesToCandidateSibling() } } if !recoveryResolved { - // Do we still have leftovers? Some slaves couldn't move? Couldn't regroup? Only left with regroup's resulting leader? + // Do we still have leftovers? Some subordinates couldn't move? Couldn't regroup? Only left with regroup's resulting leader? // nothing moved? // We don't care much if regroup made it or not. We prefer that it made it, in whcih case we only need to relocate up - // one slave, but the operation is still valid if regroup partially/completely failed. We just promote anything + // one subordinate, but the operation is still valid if regroup partially/completely failed. We just promote anything // not regrouped. // So, match up all that's left, plan D - log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will next attempt to relocate up from %+v", *failedInstanceKey) + log.Debugf("topology_recovery: - RecoverDeadIntermediateMain: will next attempt to relocate up from %+v", *failedInstanceKey) var errs []error - var relocatedSlaves [](*inst.Instance) - relocatedSlaves, successorInstance, err, errs = inst.RelocateSlaves(failedInstanceKey, &analysisEntry.AnalyzedInstanceMasterKey, "") + var relocatedSubordinates [](*inst.Instance) + relocatedSubordinates, successorInstance, err, errs = inst.RelocateSubordinates(failedInstanceKey, &analysisEntry.AnalyzedInstanceMainKey, "") topologyRecovery.AddErrors(errs) - topologyRecovery.ParticipatingInstanceKeys.AddKey(analysisEntry.AnalyzedInstanceMasterKey) + topologyRecovery.ParticipatingInstanceKeys.AddKey(analysisEntry.AnalyzedInstanceMainKey) - if len(relocatedSlaves) > 0 { + if len(relocatedSubordinates) > 0 { recoveryResolved = true - inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Relocated slaves under: %+v %d errors: %+v", successorInstance.Key, len(errs), errs)) + inst.AuditOperation("recover-dead-intermediate-main", failedInstanceKey, fmt.Sprintf("Relocated subordinates under: %+v %d errors: %+v", successorInstance.Key, len(errs), errs)) } else { - err = log.Errorf("topology_recovery: RecoverDeadIntermediateMaster failed to match up any slave from %+v", *failedInstanceKey) + err = log.Errorf("topology_recovery: RecoverDeadIntermediateMain failed to match up any subordinate from %+v", *failedInstanceKey) topologyRecovery.AddError(err) } } @@ -717,113 +717,113 @@ func RecoverDeadIntermediateMaster(topologyRecovery *TopologyRecovery, skipProce return successorInstance, err } -// checkAndRecoverDeadIntermediateMaster checks a given analysis, decides whether to take action, and possibly takes action +// checkAndRecoverDeadIntermediateMain checks a given analysis, decides whether to take action, and possibly takes action // Returns true when action was taken. -func checkAndRecoverDeadIntermediateMaster(analysisEntry inst.ReplicationAnalysis, candidateInstanceKey *inst.InstanceKey, forceInstanceRecovery bool, skipProcesses bool) (bool, *TopologyRecovery, error) { - if !(forceInstanceRecovery || analysisEntry.ClusterDetails.HasAutomatedIntermediateMasterRecovery) { +func checkAndRecoverDeadIntermediateMain(analysisEntry inst.ReplicationAnalysis, candidateInstanceKey *inst.InstanceKey, forceInstanceRecovery bool, skipProcesses bool) (bool, *TopologyRecovery, error) { + if !(forceInstanceRecovery || analysisEntry.ClusterDetails.HasAutomatedIntermediateMainRecovery) { return false, nil, nil } topologyRecovery, err := AttemptRecoveryRegistration(&analysisEntry, !forceInstanceRecovery, !forceInstanceRecovery) if topologyRecovery == nil { - log.Debugf("topology_recovery: found an active or recent recovery on %+v. Will not issue another RecoverDeadIntermediateMaster.", analysisEntry.AnalyzedInstanceKey) + log.Debugf("topology_recovery: found an active or recent recovery on %+v. Will not issue another RecoverDeadIntermediateMain.", analysisEntry.AnalyzedInstanceKey) return false, nil, err } // That's it! We must do recovery! - recoverDeadIntermediateMasterCounter.Inc(1) - promotedSlave, err := RecoverDeadIntermediateMaster(topologyRecovery, skipProcesses) - if promotedSlave != nil { + recoverDeadIntermediateMainCounter.Inc(1) + promotedSubordinate, err := RecoverDeadIntermediateMain(topologyRecovery, skipProcesses) + if promotedSubordinate != nil { // success - recoverDeadIntermediateMasterSuccessCounter.Inc(1) + recoverDeadIntermediateMainSuccessCounter.Inc(1) if !skipProcesses { - // Execute post intermediate-master-failover processes - topologyRecovery.SuccessorKey = &promotedSlave.Key - topologyRecovery.SuccessorAlias = promotedSlave.InstanceAlias - executeProcesses(config.Config.PostIntermediateMasterFailoverProcesses, "PostIntermediateMasterFailoverProcesses", topologyRecovery, false) + // Execute post intermediate-main-failover processes + topologyRecovery.SuccessorKey = &promotedSubordinate.Key + topologyRecovery.SuccessorAlias = promotedSubordinate.InstanceAlias + executeProcesses(config.Config.PostIntermediateMainFailoverProcesses, "PostIntermediateMainFailoverProcesses", topologyRecovery, false) } } else { - recoverDeadIntermediateMasterFailureCounter.Inc(1) + recoverDeadIntermediateMainFailureCounter.Inc(1) } return true, topologyRecovery, err } -// RecoverDeadCoMaster recovers a dead co-master, complete logic inside -func RecoverDeadCoMaster(topologyRecovery *TopologyRecovery, skipProcesses bool) (promotedSlave *inst.Instance, lostSlaves [](*inst.Instance), err error) { +// RecoverDeadCoMain recovers a dead co-main, complete logic inside +func RecoverDeadCoMain(topologyRecovery *TopologyRecovery, skipProcesses bool) (promotedSubordinate *inst.Instance, lostSubordinates [](*inst.Instance), err error) { analysisEntry := &topologyRecovery.AnalysisEntry failedInstanceKey := &analysisEntry.AnalyzedInstanceKey - otherCoMasterKey := &analysisEntry.AnalyzedInstanceMasterKey - otherCoMaster, found, _ := inst.ReadInstance(otherCoMasterKey) - if otherCoMaster == nil || !found { - return nil, lostSlaves, topologyRecovery.AddError(log.Errorf("RecoverDeadCoMaster: could not read info for co-master %+v of %+v", *otherCoMasterKey, *failedInstanceKey)) + otherCoMainKey := &analysisEntry.AnalyzedInstanceMainKey + otherCoMain, found, _ := inst.ReadInstance(otherCoMainKey) + if otherCoMain == nil || !found { + return nil, lostSubordinates, topologyRecovery.AddError(log.Errorf("RecoverDeadCoMain: could not read info for co-main %+v of %+v", *otherCoMainKey, *failedInstanceKey)) } - inst.AuditOperation("recover-dead-co-master", failedInstanceKey, "problem found; will recover") + inst.AuditOperation("recover-dead-co-main", failedInstanceKey, "problem found; will recover") if !skipProcesses { if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", topologyRecovery, true); err != nil { - return nil, lostSlaves, topologyRecovery.AddError(err) + return nil, lostSubordinates, topologyRecovery.AddError(err) } } - log.Debugf("topology_recovery: RecoverDeadCoMaster: will recover %+v", *failedInstanceKey) + log.Debugf("topology_recovery: RecoverDeadCoMain: will recover %+v", *failedInstanceKey) - var coMasterRecoveryType MasterRecoveryType = MasterRecoveryPseudoGTID + var coMainRecoveryType MainRecoveryType = MainRecoveryPseudoGTID if analysisEntry.OracleGTIDImmediateTopology || analysisEntry.MariaDBGTIDImmediateTopology { - coMasterRecoveryType = MasterRecoveryGTID + coMainRecoveryType = MainRecoveryGTID } - log.Debugf("topology_recovery: RecoverDeadCoMaster: coMasterRecoveryType=%+v", coMasterRecoveryType) + log.Debugf("topology_recovery: RecoverDeadCoMain: coMainRecoveryType=%+v", coMainRecoveryType) - var cannotReplicateSlaves [](*inst.Instance) - switch coMasterRecoveryType { - case MasterRecoveryGTID: + var cannotReplicateSubordinates [](*inst.Instance) + switch coMainRecoveryType { + case MainRecoveryGTID: { - lostSlaves, _, cannotReplicateSlaves, promotedSlave, err = inst.RegroupSlavesGTID(failedInstanceKey, true, nil) + lostSubordinates, _, cannotReplicateSubordinates, promotedSubordinate, err = inst.RegroupSubordinatesGTID(failedInstanceKey, true, nil) } - case MasterRecoveryPseudoGTID: + case MainRecoveryPseudoGTID: { - lostSlaves, _, _, cannotReplicateSlaves, promotedSlave, err = inst.RegroupSlavesPseudoGTIDIncludingSubSlavesOfBinlogServers(failedInstanceKey, true, nil, &topologyRecovery.PostponedFunctionsContainer) + lostSubordinates, _, _, cannotReplicateSubordinates, promotedSubordinate, err = inst.RegroupSubordinatesPseudoGTIDIncludingSubSubordinatesOfBinlogServers(failedInstanceKey, true, nil, &topologyRecovery.PostponedFunctionsContainer) } } topologyRecovery.AddError(err) - lostSlaves = append(lostSlaves, cannotReplicateSlaves...) + lostSubordinates = append(lostSubordinates, cannotReplicateSubordinates...) - mustPromoteOtherCoMaster := config.Config.CoMasterRecoveryMustPromoteOtherCoMaster - if !otherCoMaster.ReadOnly { - log.Debugf("topology_recovery: RecoverDeadCoMaster: other co-master %+v is writeable hence has to be promoted", otherCoMaster.Key) - mustPromoteOtherCoMaster = true + mustPromoteOtherCoMain := config.Config.CoMainRecoveryMustPromoteOtherCoMain + if !otherCoMain.ReadOnly { + log.Debugf("topology_recovery: RecoverDeadCoMain: other co-main %+v is writeable hence has to be promoted", otherCoMain.Key) + mustPromoteOtherCoMain = true } - log.Debugf("topology_recovery: RecoverDeadCoMaster: mustPromoteOtherCoMaster? %+v", mustPromoteOtherCoMaster) + log.Debugf("topology_recovery: RecoverDeadCoMain: mustPromoteOtherCoMain? %+v", mustPromoteOtherCoMain) - if promotedSlave != nil { - topologyRecovery.ParticipatingInstanceKeys.AddKey(promotedSlave.Key) - if mustPromoteOtherCoMaster { - log.Debugf("topology_recovery: mustPromoteOtherCoMaster. Verifying that %+v is/can be promoted", *otherCoMasterKey) - promotedSlave, err = replacePromotedSlaveWithCandidate(failedInstanceKey, promotedSlave, otherCoMasterKey) + if promotedSubordinate != nil { + topologyRecovery.ParticipatingInstanceKeys.AddKey(promotedSubordinate.Key) + if mustPromoteOtherCoMain { + log.Debugf("topology_recovery: mustPromoteOtherCoMain. Verifying that %+v is/can be promoted", *otherCoMainKey) + promotedSubordinate, err = replacePromotedSubordinateWithCandidate(failedInstanceKey, promotedSubordinate, otherCoMainKey) } else { // We are allowed to promote any server - promotedSlave, err = replacePromotedSlaveWithCandidate(failedInstanceKey, promotedSlave, nil) + promotedSubordinate, err = replacePromotedSubordinateWithCandidate(failedInstanceKey, promotedSubordinate, nil) - if promotedSlave.DataCenter == otherCoMaster.DataCenter && - promotedSlave.PhysicalEnvironment == otherCoMaster.PhysicalEnvironment && false { - // and _still_ we prefer to promote the co-master! They're in same env & DC so no worries about geo issues! - promotedSlave, err = replacePromotedSlaveWithCandidate(failedInstanceKey, promotedSlave, otherCoMasterKey) + if promotedSubordinate.DataCenter == otherCoMain.DataCenter && + promotedSubordinate.PhysicalEnvironment == otherCoMain.PhysicalEnvironment && false { + // and _still_ we prefer to promote the co-main! They're in same env & DC so no worries about geo issues! + promotedSubordinate, err = replacePromotedSubordinateWithCandidate(failedInstanceKey, promotedSubordinate, otherCoMainKey) } } topologyRecovery.AddError(err) } - if promotedSlave != nil { - if mustPromoteOtherCoMaster && !promotedSlave.Key.Equals(otherCoMasterKey) { - topologyRecovery.AddError(log.Errorf("RecoverDeadCoMaster: could not manage to promote other-co-master %+v; was only able to promote %+v; CoMasterRecoveryMustPromoteOtherCoMaster is true, therefore failing", *otherCoMasterKey, promotedSlave.Key)) - promotedSlave = nil + if promotedSubordinate != nil { + if mustPromoteOtherCoMain && !promotedSubordinate.Key.Equals(otherCoMainKey) { + topologyRecovery.AddError(log.Errorf("RecoverDeadCoMain: could not manage to promote other-co-main %+v; was only able to promote %+v; CoMainRecoveryMustPromoteOtherCoMain is true, therefore failing", *otherCoMainKey, promotedSubordinate.Key)) + promotedSubordinate = nil } } - if promotedSlave != nil { - topologyRecovery.ParticipatingInstanceKeys.AddKey(promotedSlave.Key) + if promotedSubordinate != nil { + topologyRecovery.ParticipatingInstanceKeys.AddKey(promotedSubordinate.Key) } - // OK, we may have someone promoted. Either this was the other co-master or another slave. - // Noting down that we DO NOT attempt to set a new co-master topology. We are good with remaining with a single master. - // I tried solving the "let's promote a slave and create a new co-master setup" but this turns so complex due to various factors. + // OK, we may have someone promoted. Either this was the other co-main or another subordinate. + // Noting down that we DO NOT attempt to set a new co-main topology. We are good with remaining with a single main. + // I tried solving the "let's promote a subordinate and create a new co-main setup" but this turns so complex due to various factors. // I see this as risky and not worth the questionable benefit. // Maybe future me is a smarter person and finds a simple solution. Unlikely. I'm getting dumber. // @@ -832,97 +832,97 @@ func RecoverDeadCoMaster(topologyRecovery *TopologyRecovery, skipProcesses bool) // Say we started with M1<->M2<-S1, with M2 failing, and we promoted S1. // We now have M1->S1 (because S1 is promoted), S1->M2 (because that's what it remembers), M2->M1 (because that's what it remembers) // !! This is an evil 3-node circle that must be broken. - // config.Config.ApplyMySQLPromotionAfterMasterFailover, if true, will cause it to break, because we would RESET SLAVE on S1 + // config.Config.ApplyMySQLPromotionAfterMainFailover, if true, will cause it to break, because we would RESET SLAVE on S1 // but we want to make sure the circle is broken no matter what. - // So in the case we promoted not-the-other-co-master, we issue a detach-slave-master-host, which is a reversible operation - if promotedSlave != nil && !promotedSlave.Key.Equals(otherCoMasterKey) { - _, err = inst.DetachSlaveMasterHost(&promotedSlave.Key) + // So in the case we promoted not-the-other-co-main, we issue a detach-subordinate-main-host, which is a reversible operation + if promotedSubordinate != nil && !promotedSubordinate.Key.Equals(otherCoMainKey) { + _, err = inst.DetachSubordinateMainHost(&promotedSubordinate.Key) topologyRecovery.AddError(log.Errore(err)) } - if promotedSlave != nil && len(lostSlaves) > 0 && config.Config.DetachLostSlavesAfterMasterFailover { + if promotedSubordinate != nil && len(lostSubordinates) > 0 && config.Config.DetachLostSubordinatesAfterMainFailover { postponedFunction := func() error { - log.Debugf("topology_recovery: - RecoverDeadCoMaster: lost %+v slaves during recovery process; detaching them", len(lostSlaves)) - for _, slave := range lostSlaves { - slave := slave - inst.DetachSlaveOperation(&slave.Key) + log.Debugf("topology_recovery: - RecoverDeadCoMain: lost %+v subordinates during recovery process; detaching them", len(lostSubordinates)) + for _, subordinate := range lostSubordinates { + subordinate := subordinate + inst.DetachSubordinateOperation(&subordinate.Key) } return nil } topologyRecovery.AddPostponedFunction(postponedFunction) } - if config.Config.MasterFailoverLostInstancesDowntimeMinutes > 0 { + if config.Config.MainFailoverLostInstancesDowntimeMinutes > 0 { postponedFunction := func() error { - inst.BeginDowntime(failedInstanceKey, inst.GetMaintenanceOwner(), inst.DowntimeLostInRecoveryMessage, config.Config.MasterFailoverLostInstancesDowntimeMinutes*60) - for _, slave := range lostSlaves { - slave := slave - inst.BeginDowntime(&slave.Key, inst.GetMaintenanceOwner(), inst.DowntimeLostInRecoveryMessage, config.Config.MasterFailoverLostInstancesDowntimeMinutes*60) + inst.BeginDowntime(failedInstanceKey, inst.GetMaintenanceOwner(), inst.DowntimeLostInRecoveryMessage, config.Config.MainFailoverLostInstancesDowntimeMinutes*60) + for _, subordinate := range lostSubordinates { + subordinate := subordinate + inst.BeginDowntime(&subordinate.Key, inst.GetMaintenanceOwner(), inst.DowntimeLostInRecoveryMessage, config.Config.MainFailoverLostInstancesDowntimeMinutes*60) } return nil } topologyRecovery.AddPostponedFunction(postponedFunction) } - return promotedSlave, lostSlaves, err + return promotedSubordinate, lostSubordinates, err } -// checkAndRecoverDeadCoMaster checks a given analysis, decides whether to take action, and possibly takes action +// checkAndRecoverDeadCoMain checks a given analysis, decides whether to take action, and possibly takes action // Returns true when action was taken. -func checkAndRecoverDeadCoMaster(analysisEntry inst.ReplicationAnalysis, candidateInstanceKey *inst.InstanceKey, forceInstanceRecovery bool, skipProcesses bool) (bool, *TopologyRecovery, error) { +func checkAndRecoverDeadCoMain(analysisEntry inst.ReplicationAnalysis, candidateInstanceKey *inst.InstanceKey, forceInstanceRecovery bool, skipProcesses bool) (bool, *TopologyRecovery, error) { failedInstanceKey := &analysisEntry.AnalyzedInstanceKey - if !(forceInstanceRecovery || analysisEntry.ClusterDetails.HasAutomatedMasterRecovery) { + if !(forceInstanceRecovery || analysisEntry.ClusterDetails.HasAutomatedMainRecovery) { return false, nil, nil } topologyRecovery, err := AttemptRecoveryRegistration(&analysisEntry, !forceInstanceRecovery, !forceInstanceRecovery) if topologyRecovery == nil { - log.Debugf("topology_recovery: found an active or recent recovery on %+v. Will not issue another RecoverDeadCoMaster.", analysisEntry.AnalyzedInstanceKey) + log.Debugf("topology_recovery: found an active or recent recovery on %+v. Will not issue another RecoverDeadCoMain.", analysisEntry.AnalyzedInstanceKey) return false, nil, err } // That's it! We must do recovery! - recoverDeadCoMasterCounter.Inc(1) - promotedSlave, lostSlaves, err := RecoverDeadCoMaster(topologyRecovery, skipProcesses) - ResolveRecovery(topologyRecovery, promotedSlave) - if promotedSlave == nil { - inst.AuditOperation("recover-dead-co-master", failedInstanceKey, "Failure: no slave promoted.") + recoverDeadCoMainCounter.Inc(1) + promotedSubordinate, lostSubordinates, err := RecoverDeadCoMain(topologyRecovery, skipProcesses) + ResolveRecovery(topologyRecovery, promotedSubordinate) + if promotedSubordinate == nil { + inst.AuditOperation("recover-dead-co-main", failedInstanceKey, "Failure: no subordinate promoted.") } else { - inst.AuditOperation("recover-dead-co-master", failedInstanceKey, fmt.Sprintf("promoted: %+v", promotedSlave.Key)) + inst.AuditOperation("recover-dead-co-main", failedInstanceKey, fmt.Sprintf("promoted: %+v", promotedSubordinate.Key)) } - topologyRecovery.LostSlaves.AddInstances(lostSlaves) - if promotedSlave != nil { + topologyRecovery.LostSubordinates.AddInstances(lostSubordinates) + if promotedSubordinate != nil { // success - recoverDeadCoMasterSuccessCounter.Inc(1) + recoverDeadCoMainSuccessCounter.Inc(1) - if config.Config.ApplyMySQLPromotionAfterMasterFailover { - log.Debugf("topology_recovery: - RecoverDeadMaster: will apply MySQL changes to promoted master") - inst.SetReadOnly(&promotedSlave.Key, false) + if config.Config.ApplyMySQLPromotionAfterMainFailover { + log.Debugf("topology_recovery: - RecoverDeadMain: will apply MySQL changes to promoted main") + inst.SetReadOnly(&promotedSubordinate.Key, false) } if !skipProcesses { - // Execute post intermediate-master-failover processes - topologyRecovery.SuccessorKey = &promotedSlave.Key - topologyRecovery.SuccessorAlias = promotedSlave.InstanceAlias - executeProcesses(config.Config.PostMasterFailoverProcesses, "PostMasterFailoverProcesses", topologyRecovery, false) + // Execute post intermediate-main-failover processes + topologyRecovery.SuccessorKey = &promotedSubordinate.Key + topologyRecovery.SuccessorAlias = promotedSubordinate.InstanceAlias + executeProcesses(config.Config.PostMainFailoverProcesses, "PostMainFailoverProcesses", topologyRecovery, false) } } else { - recoverDeadCoMasterFailureCounter.Inc(1) + recoverDeadCoMainFailureCounter.Inc(1) } return true, topologyRecovery, err } -// checkAndRecoverUnreachableMasterWithStaleSlaves executes an external process. No other action is taken. +// checkAndRecoverUnreachableMainWithStaleSubordinates executes an external process. No other action is taken. // Returns false. -func checkAndRecoverUnreachableMasterWithStaleSlaves(analysisEntry inst.ReplicationAnalysis, candidateInstanceKey *inst.InstanceKey, forceInstanceRecovery bool, skipProcesses bool) (bool, *TopologyRecovery, error) { +func checkAndRecoverUnreachableMainWithStaleSubordinates(analysisEntry inst.ReplicationAnalysis, candidateInstanceKey *inst.InstanceKey, forceInstanceRecovery bool, skipProcesses bool) (bool, *TopologyRecovery, error) { topologyRecovery, err := AttemptRecoveryRegistration(&analysisEntry, !forceInstanceRecovery, !forceInstanceRecovery) if topologyRecovery == nil { - log.Debugf("topology_recovery: found an active or recent recovery on %+v. Will not issue another UnreachableMasterWithStaleSlaves.", analysisEntry.AnalyzedInstanceKey) + log.Debugf("topology_recovery: found an active or recent recovery on %+v. Will not issue another UnreachableMainWithStaleSubordinates.", analysisEntry.AnalyzedInstanceKey) } else { - recoverUnreachableMasterWithStaleSlavesCounter.Inc(1) + recoverUnreachableMainWithStaleSubordinatesCounter.Inc(1) if !skipProcesses { - err := executeProcesses(config.Config.UnreachableMasterWithStaleSlavesProcesses, "UnreachableMasterWithStaleSlavesProcesses", topologyRecovery, false) + err := executeProcesses(config.Config.UnreachableMainWithStaleSubordinatesProcesses, "UnreachableMainWithStaleSubordinatesProcesses", topologyRecovery, false) if err != nil { - recoverUnreachableMasterWithStaleSlavesFailureCounter.Inc(1) + recoverUnreachableMainWithStaleSubordinatesFailureCounter.Inc(1) } else { - recoverUnreachableMasterWithStaleSlavesSuccessCounter.Inc(1) + recoverUnreachableMainWithStaleSubordinatesSuccessCounter.Inc(1) } } } @@ -947,15 +947,15 @@ func emergentlyReadTopologyInstance(instanceKey *inst.InstanceKey, analysisCode }) } -// Force reading of slaves of given instance. This is because we suspect the instance is dead, and want to speed up -// detection of replication failure from its slaves. -func emergentlyReadTopologyInstanceSlaves(instanceKey *inst.InstanceKey, analysisCode inst.AnalysisCode) { - slaves, err := inst.ReadSlaveInstancesIncludingBinlogServerSubSlaves(instanceKey) +// Force reading of subordinates of given instance. This is because we suspect the instance is dead, and want to speed up +// detection of replication failure from its subordinates. +func emergentlyReadTopologyInstanceSubordinates(instanceKey *inst.InstanceKey, analysisCode inst.AnalysisCode) { + subordinates, err := inst.ReadSubordinateInstancesIncludingBinlogServerSubSubordinates(instanceKey) if err != nil { return } - for _, slave := range slaves { - go emergentlyReadTopologyInstance(&slave.Key, analysisCode) + for _, subordinate := range subordinates { + go emergentlyReadTopologyInstance(&subordinate.Key, analysisCode) } } @@ -980,36 +980,36 @@ func executeCheckAndRecoverFunction(analysisEntry inst.ReplicationAnalysis, cand var checkAndRecoverFunction func(analysisEntry inst.ReplicationAnalysis, candidateInstanceKey *inst.InstanceKey, forceInstanceRecovery bool, skipProcesses bool) (recoveryAttempted bool, topologyRecovery *TopologyRecovery, err error) = nil switch analysisEntry.Analysis { - case inst.DeadMaster: - checkAndRecoverFunction = checkAndRecoverDeadMaster - case inst.DeadMasterAndSomeSlaves: - checkAndRecoverFunction = checkAndRecoverDeadMaster - case inst.DeadIntermediateMaster: - checkAndRecoverFunction = checkAndRecoverDeadIntermediateMaster - case inst.DeadIntermediateMasterAndSomeSlaves: - checkAndRecoverFunction = checkAndRecoverDeadIntermediateMaster - case inst.DeadIntermediateMasterWithSingleSlaveFailingToConnect: - checkAndRecoverFunction = checkAndRecoverDeadIntermediateMaster - case inst.AllIntermediateMasterSlavesFailingToConnectOrDead: - checkAndRecoverFunction = checkAndRecoverDeadIntermediateMaster - case inst.DeadCoMaster: - checkAndRecoverFunction = checkAndRecoverDeadCoMaster - case inst.DeadCoMasterAndSomeSlaves: - checkAndRecoverFunction = checkAndRecoverDeadCoMaster - case inst.DeadMasterAndSlaves: - go emergentlyReadTopologyInstance(&analysisEntry.AnalyzedInstanceMasterKey, analysisEntry.Analysis) - case inst.UnreachableMaster: - go emergentlyReadTopologyInstanceSlaves(&analysisEntry.AnalyzedInstanceKey, analysisEntry.Analysis) - case inst.AllMasterSlavesNotReplicating: + case inst.DeadMain: + checkAndRecoverFunction = checkAndRecoverDeadMain + case inst.DeadMainAndSomeSubordinates: + checkAndRecoverFunction = checkAndRecoverDeadMain + case inst.DeadIntermediateMain: + checkAndRecoverFunction = checkAndRecoverDeadIntermediateMain + case inst.DeadIntermediateMainAndSomeSubordinates: + checkAndRecoverFunction = checkAndRecoverDeadIntermediateMain + case inst.DeadIntermediateMainWithSingleSubordinateFailingToConnect: + checkAndRecoverFunction = checkAndRecoverDeadIntermediateMain + case inst.AllIntermediateMainSubordinatesFailingToConnectOrDead: + checkAndRecoverFunction = checkAndRecoverDeadIntermediateMain + case inst.DeadCoMain: + checkAndRecoverFunction = checkAndRecoverDeadCoMain + case inst.DeadCoMainAndSomeSubordinates: + checkAndRecoverFunction = checkAndRecoverDeadCoMain + case inst.DeadMainAndSubordinates: + go emergentlyReadTopologyInstance(&analysisEntry.AnalyzedInstanceMainKey, analysisEntry.Analysis) + case inst.UnreachableMain: + go emergentlyReadTopologyInstanceSubordinates(&analysisEntry.AnalyzedInstanceKey, analysisEntry.Analysis) + case inst.AllMainSubordinatesNotReplicating: go emergentlyReadTopologyInstance(&analysisEntry.AnalyzedInstanceKey, analysisEntry.Analysis) - case inst.FirstTierSlaveFailingToConnectToMaster: - go emergentlyReadTopologyInstance(&analysisEntry.AnalyzedInstanceMasterKey, analysisEntry.Analysis) - case inst.UnreachableMasterWithStaleSlaves: - checkAndRecoverFunction = checkAndRecoverUnreachableMasterWithStaleSlaves + case inst.FirstTierSubordinateFailingToConnectToMain: + go emergentlyReadTopologyInstance(&analysisEntry.AnalyzedInstanceMainKey, analysisEntry.Analysis) + case inst.UnreachableMainWithStaleSubordinates: + checkAndRecoverFunction = checkAndRecoverUnreachableMainWithStaleSubordinates } // Right now this is mostly causing noise with no clear action. // Will revisit this in the future. - // case inst.AllMasterSlavesStale: + // case inst.AllMainSubordinatesStale: // checkAndRecoverFunction = checkAndRecoverGenericProblem if checkAndRecoverFunction == nil { @@ -1045,7 +1045,7 @@ func executeCheckAndRecoverFunction(analysisEntry inst.ReplicationAnalysis, cand } // CheckAndRecover is the main entry point for the recovery mechanism -func CheckAndRecover(specificInstance *inst.InstanceKey, candidateInstanceKey *inst.InstanceKey, skipProcesses bool) (recoveryAttempted bool, promotedSlaveKey *inst.InstanceKey, err error) { +func CheckAndRecover(specificInstance *inst.InstanceKey, candidateInstanceKey *inst.InstanceKey, skipProcesses bool) (recoveryAttempted bool, promotedSubordinateKey *inst.InstanceKey, err error) { // Allow the analysis to run evern if we don't want to recover replicationAnalysis, err := inst.GetReplicationAnalysis("", true, true) if err != nil { @@ -1077,7 +1077,7 @@ func CheckAndRecover(specificInstance *inst.InstanceKey, candidateInstanceKey *i var topologyRecovery *TopologyRecovery recoveryAttempted, topologyRecovery, err = executeCheckAndRecoverFunction(analysisEntry, candidateInstanceKey, true, skipProcesses) if topologyRecovery != nil { - promotedSlaveKey = topologyRecovery.SuccessorKey + promotedSubordinateKey = topologyRecovery.SuccessorKey } } else if recoveryDisabledGlobally { log.Infof("CheckAndRecover: InstanceKey: %+v, candidateInstanceKey: %+v, "+ @@ -1087,7 +1087,7 @@ func CheckAndRecover(specificInstance *inst.InstanceKey, candidateInstanceKey *i go executeCheckAndRecoverFunction(analysisEntry, candidateInstanceKey, false, skipProcesses) } } - return recoveryAttempted, promotedSlaveKey, err + return recoveryAttempted, promotedSubordinateKey, err } // ForceExecuteRecovery can be called to issue a recovery process even if analysis says there is no recovery case. @@ -1107,24 +1107,24 @@ func ForceExecuteRecovery(clusterName string, analysisCode inst.AnalysisCode, fa return executeCheckAndRecoverFunction(analysisEntry, candidateInstanceKey, true, skipProcesses) } -// ForceMasterTakeover *trusts* master of given cluster is dead and fails over to designated instance, +// ForceMainTakeover *trusts* main of given cluster is dead and fails over to designated instance, // which has to be its direct child. -func ForceMasterTakeover(clusterName string, destination *inst.Instance) (topologyRecovery *TopologyRecovery, err error) { - clusterMasters, err := inst.ReadClusterWriteableMaster(clusterName) +func ForceMainTakeover(clusterName string, destination *inst.Instance) (topologyRecovery *TopologyRecovery, err error) { + clusterMains, err := inst.ReadClusterWriteableMain(clusterName) if err != nil { - return nil, fmt.Errorf("Cannot deduce cluster master for %+v", clusterName) + return nil, fmt.Errorf("Cannot deduce cluster main for %+v", clusterName) } - if len(clusterMasters) != 1 { - return nil, fmt.Errorf("Cannot deduce cluster master for %+v", clusterName) + if len(clusterMains) != 1 { + return nil, fmt.Errorf("Cannot deduce cluster main for %+v", clusterName) } - clusterMaster := clusterMasters[0] + clusterMain := clusterMains[0] - if !destination.MasterKey.Equals(&clusterMaster.Key) { - return nil, fmt.Errorf("You may only promote a direct child of the master %+v. The master of %+v is %+v.", clusterMaster.Key, destination.Key, destination.MasterKey) + if !destination.MainKey.Equals(&clusterMain.Key) { + return nil, fmt.Errorf("You may only promote a direct child of the main %+v. The main of %+v is %+v.", clusterMain.Key, destination.Key, destination.MainKey) } - log.Debugf("Will demote %+v and promote %+v instead", clusterMaster.Key, destination.Key) + log.Debugf("Will demote %+v and promote %+v instead", clusterMain.Key, destination.Key) - recoveryAttempted, topologyRecovery, err := ForceExecuteRecovery(clusterName, inst.DeadMaster, &clusterMaster.Key, &destination.Key, false) + recoveryAttempted, topologyRecovery, err := ForceExecuteRecovery(clusterName, inst.DeadMain, &clusterMain.Key, &destination.Key, false) if err != nil { return nil, err } @@ -1135,64 +1135,64 @@ func ForceMasterTakeover(clusterName string, destination *inst.Instance) (topolo return nil, fmt.Errorf("Recovery attempted but with no results. This should not happen") } if topologyRecovery.SuccessorKey == nil { - return nil, fmt.Errorf("Recovery attempted yet no slave promoted") + return nil, fmt.Errorf("Recovery attempted yet no subordinate promoted") } return topologyRecovery, nil } -// GracefulMasterTakeover will demote master of existing topology and promote its +// GracefulMainTakeover will demote main of existing topology and promote its // direct replica instead. // It expects that replica to have no siblings. -// This function is graceful in that it will first lock down the master, then wait +// This function is graceful in that it will first lock down the main, then wait // for the designated replica to catch up with last position. -func GracefulMasterTakeover(clusterName string) (topologyRecovery *TopologyRecovery, promotedMasterCoordinates *inst.BinlogCoordinates, err error) { - clusterMasters, err := inst.ReadClusterWriteableMaster(clusterName) +func GracefulMainTakeover(clusterName string) (topologyRecovery *TopologyRecovery, promotedMainCoordinates *inst.BinlogCoordinates, err error) { + clusterMains, err := inst.ReadClusterWriteableMain(clusterName) if err != nil { - return nil, nil, fmt.Errorf("Cannot deduce cluster master for %+v", clusterName) + return nil, nil, fmt.Errorf("Cannot deduce cluster main for %+v", clusterName) } - if len(clusterMasters) != 1 { - return nil, nil, fmt.Errorf("Cannot deduce cluster master for %+v. Found %+v potential masters", clusterName, len(clusterMasters)) + if len(clusterMains) != 1 { + return nil, nil, fmt.Errorf("Cannot deduce cluster main for %+v. Found %+v potential mains", clusterName, len(clusterMains)) } - clusterMaster := clusterMasters[0] - if len(clusterMaster.SlaveHosts) == 0 { - return nil, nil, fmt.Errorf("Master %+v doesn't seem to have replicas", clusterMaster.Key) + clusterMain := clusterMains[0] + if len(clusterMain.SubordinateHosts) == 0 { + return nil, nil, fmt.Errorf("Main %+v doesn't seem to have replicas", clusterMain.Key) } - if len(clusterMaster.SlaveHosts) > 1 { - return nil, nil, fmt.Errorf("GracefulMasterTakeover: master %+v should only have one replica (making the takeover safe and simple), but has %+v. Aborting", clusterMaster.Key, len(clusterMaster.SlaveHosts)) + if len(clusterMain.SubordinateHosts) > 1 { + return nil, nil, fmt.Errorf("GracefulMainTakeover: main %+v should only have one replica (making the takeover safe and simple), but has %+v. Aborting", clusterMain.Key, len(clusterMain.SubordinateHosts)) } - designatedInstanceKey := &(clusterMaster.SlaveHosts.GetInstanceKeys()[0]) + designatedInstanceKey := &(clusterMain.SubordinateHosts.GetInstanceKeys()[0]) designatedInstance, err := inst.ReadTopologyInstanceUnbuffered(designatedInstanceKey) if err != nil { return nil, nil, err } - masterOfDesigntaedInstance, err := inst.GetInstanceMaster(designatedInstance) + mainOfDesigntaedInstance, err := inst.GetInstanceMain(designatedInstance) if err != nil { return nil, nil, err } - if !masterOfDesigntaedInstance.Key.Equals(&clusterMaster.Key) { - return nil, nil, fmt.Errorf("Sanity check failure. It seems like the designated instance %+v does not replicate from the master %+v (designated instance's master key is %+v). This error is strange. Panicking", designatedInstance.Key, clusterMaster.Key, designatedInstance.MasterKey) + if !mainOfDesigntaedInstance.Key.Equals(&clusterMain.Key) { + return nil, nil, fmt.Errorf("Sanity check failure. It seems like the designated instance %+v does not replicate from the main %+v (designated instance's main key is %+v). This error is strange. Panicking", designatedInstance.Key, clusterMain.Key, designatedInstance.MainKey) } if !designatedInstance.HasReasonableMaintenanceReplicationLag() { return nil, nil, fmt.Errorf("Desginated instance %+v seems to be lagging to much for thie operation. Aborting.", designatedInstance.Key) } - log.Debugf("Will demote %+v and promote %+v instead", clusterMaster.Key, designatedInstance.Key) + log.Debugf("Will demote %+v and promote %+v instead", clusterMain.Key, designatedInstance.Key) - if designatedInstance, err = inst.StopSlave(&designatedInstance.Key); err != nil { + if designatedInstance, err = inst.StopSubordinate(&designatedInstance.Key); err != nil { return nil, nil, err } - log.Debugf("Will set %+v as read_only", clusterMaster.Key) - if clusterMaster, err = inst.SetReadOnly(&clusterMaster.Key, true); err != nil { + log.Debugf("Will set %+v as read_only", clusterMain.Key) + if clusterMain, err = inst.SetReadOnly(&clusterMain.Key, true); err != nil { return nil, nil, err } - log.Debugf("Will advance %+v to master coordinates %+v", designatedInstance.Key, clusterMaster.SelfBinlogCoordinates) - if designatedInstance, err = inst.StartSlaveUntilMasterCoordinates(&designatedInstance.Key, &clusterMaster.SelfBinlogCoordinates); err != nil { + log.Debugf("Will advance %+v to main coordinates %+v", designatedInstance.Key, clusterMain.SelfBinlogCoordinates) + if designatedInstance, err = inst.StartSubordinateUntilMainCoordinates(&designatedInstance.Key, &clusterMain.SelfBinlogCoordinates); err != nil { return nil, nil, err } - promotedMasterCoordinates = &designatedInstance.SelfBinlogCoordinates + promotedMainCoordinates = &designatedInstance.SelfBinlogCoordinates - recoveryAttempted, topologyRecovery, err := ForceExecuteRecovery(clusterName, inst.DeadMaster, &clusterMaster.Key, &designatedInstance.Key, false) + recoveryAttempted, topologyRecovery, err := ForceExecuteRecovery(clusterName, inst.DeadMain, &clusterMain.Key, &designatedInstance.Key, false) if err != nil { return nil, nil, err } @@ -1203,7 +1203,7 @@ func GracefulMasterTakeover(clusterName string) (topologyRecovery *TopologyRecov return nil, nil, fmt.Errorf("Recovery attempted but with no results. This should not happen") } if topologyRecovery.SuccessorKey == nil { - return nil, nil, fmt.Errorf("Recovery attempted yet no slave promoted") + return nil, nil, fmt.Errorf("Recovery attempted yet no subordinate promoted") } - return topologyRecovery, promotedMasterCoordinates, nil + return topologyRecovery, promotedMainCoordinates, nil } diff --git a/go/logic/topology_recovery_dao.go b/go/logic/topology_recovery_dao.go index 8b7f6bea..97b4e3f9 100644 --- a/go/logic/topology_recovery_dao.go +++ b/go/logic/topology_recovery_dao.go @@ -43,8 +43,8 @@ func AttemptFailureDetectionRegistration(analysisEntry *inst.ReplicationAnalysis analysis, cluster_name, cluster_alias, - count_affected_slaves, - slave_hosts + count_affected_subordinates, + subordinate_hosts ) values ( ?, ?, @@ -60,7 +60,7 @@ func AttemptFailureDetectionRegistration(analysisEntry *inst.ReplicationAnalysis ? ) `, analysisEntry.AnalyzedInstanceKey.Hostname, analysisEntry.AnalyzedInstanceKey.Port, process.ThisHostname, process.ProcessToken.Hash, - string(analysisEntry.Analysis), analysisEntry.ClusterDetails.ClusterName, analysisEntry.ClusterDetails.ClusterAlias, analysisEntry.CountSlaves, analysisEntry.SlaveHosts.ToCommaDelimitedList(), + string(analysisEntry.Analysis), analysisEntry.ClusterDetails.ClusterName, analysisEntry.ClusterDetails.ClusterAlias, analysisEntry.CountSubordinates, analysisEntry.SubordinateHosts.ToCommaDelimitedList(), ) if err != nil { return false, log.Errore(err) @@ -146,8 +146,8 @@ func AttemptRecoveryRegistration(analysisEntry *inst.ReplicationAnalysis, failIf analysis, cluster_name, cluster_alias, - count_affected_slaves, - slave_hosts, + count_affected_subordinates, + subordinate_hosts, last_detection_id ) values ( ?, @@ -165,7 +165,7 @@ func AttemptRecoveryRegistration(analysisEntry *inst.ReplicationAnalysis, failIf (select ifnull(max(detection_id), 0) from topology_failure_detection where hostname=? and port=?) ) `, analysisEntry.AnalyzedInstanceKey.Hostname, analysisEntry.AnalyzedInstanceKey.Port, process.ThisHostname, process.ProcessToken.Hash, - string(analysisEntry.Analysis), analysisEntry.ClusterDetails.ClusterName, analysisEntry.ClusterDetails.ClusterAlias, analysisEntry.CountSlaves, analysisEntry.SlaveHosts.ToCommaDelimitedList(), + string(analysisEntry.Analysis), analysisEntry.ClusterDetails.ClusterName, analysisEntry.ClusterDetails.ClusterAlias, analysisEntry.CountSubordinates, analysisEntry.SubordinateHosts.ToCommaDelimitedList(), analysisEntry.AnalyzedInstanceKey.Hostname, analysisEntry.AnalyzedInstanceKey.Port, ) if err != nil { @@ -374,7 +374,7 @@ func ResolveRecovery(topologyRecovery *TopologyRecovery, successorInstance *inst successor_hostname = ?, successor_port = ?, successor_alias = ?, - lost_slaves = ?, + lost_subordinates = ?, participating_instances = ?, all_errors = ?, end_recovery = NOW() @@ -384,7 +384,7 @@ func ResolveRecovery(topologyRecovery *TopologyRecovery, successorInstance *inst AND processing_node_hostname = ? AND processcing_node_token = ? `, isSuccessful, successorKeyToWrite.Hostname, successorKeyToWrite.Port, - successorAliasToWrite, topologyRecovery.LostSlaves.ToCommaDelimitedList(), + successorAliasToWrite, topologyRecovery.LostSubordinates.ToCommaDelimitedList(), topologyRecovery.ParticipatingInstanceKeys.ToCommaDelimitedList(), strings.Join(topologyRecovery.AllErrors, "\n"), topologyRecovery.Id, process.ThisHostname, process.ProcessToken.Hash, @@ -413,10 +413,10 @@ func readRecoveries(whereCondition string, limit string, args []interface{}) ([] analysis, cluster_name, cluster_alias, - count_affected_slaves, - slave_hosts, + count_affected_subordinates, + subordinate_hosts, participating_instances, - lost_slaves, + lost_subordinates, all_errors, acknowledged, acknowledged_at, @@ -446,8 +446,8 @@ func readRecoveries(whereCondition string, limit string, args []interface{}) ([] topologyRecovery.AnalysisEntry.Analysis = inst.AnalysisCode(m.GetString("analysis")) topologyRecovery.AnalysisEntry.ClusterDetails.ClusterName = m.GetString("cluster_name") topologyRecovery.AnalysisEntry.ClusterDetails.ClusterAlias = m.GetString("cluster_alias") - topologyRecovery.AnalysisEntry.CountSlaves = m.GetUint("count_affected_slaves") - topologyRecovery.AnalysisEntry.ReadSlaveHostsFromString(m.GetString("slave_hosts")) + topologyRecovery.AnalysisEntry.CountSubordinates = m.GetUint("count_affected_subordinates") + topologyRecovery.AnalysisEntry.ReadSubordinateHostsFromString(m.GetString("subordinate_hosts")) topologyRecovery.SuccessorKey = &inst.InstanceKey{} topologyRecovery.SuccessorKey.Hostname = m.GetString("successor_hostname") @@ -457,7 +457,7 @@ func readRecoveries(whereCondition string, limit string, args []interface{}) ([] topologyRecovery.AnalysisEntry.ClusterDetails.ReadRecoveryInfo() topologyRecovery.AllErrors = strings.Split(m.GetString("all_errors"), "\n") - topologyRecovery.LostSlaves.ReadCommaDelimitedList(m.GetString("lost_slaves")) + topologyRecovery.LostSubordinates.ReadCommaDelimitedList(m.GetString("lost_subordinates")) topologyRecovery.ParticipatingInstanceKeys.ReadCommaDelimitedList(m.GetString("participating_instances")) topologyRecovery.Acknowledged = m.GetBool("acknowledged") @@ -588,8 +588,8 @@ func readFailureDetections(whereCondition string, limit string, args []interface analysis, cluster_name, cluster_alias, - count_affected_slaves, - slave_hosts, + count_affected_subordinates, + subordinate_hosts, (select max(recovery_id) from topology_recovery where topology_recovery.last_detection_id = detection_id) as related_recovery_id from topology_failure_detection @@ -612,8 +612,8 @@ func readFailureDetections(whereCondition string, limit string, args []interface failureDetection.AnalysisEntry.Analysis = inst.AnalysisCode(m.GetString("analysis")) failureDetection.AnalysisEntry.ClusterDetails.ClusterName = m.GetString("cluster_name") failureDetection.AnalysisEntry.ClusterDetails.ClusterAlias = m.GetString("cluster_alias") - failureDetection.AnalysisEntry.CountSlaves = m.GetUint("count_affected_slaves") - failureDetection.AnalysisEntry.ReadSlaveHostsFromString(m.GetString("slave_hosts")) + failureDetection.AnalysisEntry.CountSubordinates = m.GetUint("count_affected_subordinates") + failureDetection.AnalysisEntry.ReadSubordinateHostsFromString(m.GetString("subordinate_hosts")) failureDetection.RelatedRecoveryId = m.GetInt64("related_recovery_id") diff --git a/tests/integration/instance_dao_test.go b/tests/integration/instance_dao_test.go index 0e2cff06..d8fa71ec 100644 --- a/tests/integration/instance_dao_test.go +++ b/tests/integration/instance_dao_test.go @@ -31,23 +31,23 @@ type TestSuite struct{} var _ = Suite(&TestSuite{}) -// This test suite assumes one master and three direct slaves, as follows; +// This test suite assumes one main and three direct subordinates, as follows; // This was setup with mysqlsandbox (using MySQL 5.5.32, not that it matters) via: // $ make_replication_sandbox --how_many_nodes=3 --replication_directory=55orchestrator /path/to/sandboxes/5.5.32 // modify below to fit your own environment -var masterKey = InstanceKey{ +var mainKey = InstanceKey{ Hostname: "127.0.0.1", Port: 22987, } -var slave1Key = InstanceKey{ +var subordinate1Key = InstanceKey{ Hostname: "127.0.0.1", Port: 22988, } -var slave2Key = InstanceKey{ +var subordinate2Key = InstanceKey{ Hostname: "127.0.0.1", Port: 22989, } -var slave3Key = InstanceKey{ +var subordinate3Key = InstanceKey{ Hostname: "127.0.0.1", Port: 22990, } @@ -65,95 +65,95 @@ func (s *TestSuite) SetUpSuite(c *C) { config.Config.MySQLOrchestratorDatabase = "orchestrator" config.Config.MySQLOrchestratorUser = "msandbox" config.Config.MySQLOrchestratorPassword = "msandbox" - config.Config.DiscoverByShowSlaveHosts = true + config.Config.DiscoverByShowSubordinateHosts = true - _, _ = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", masterKey.Hostname, masterKey.Port) - _, _ = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave1Key.Hostname, slave1Key.Port) - _, _ = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave2Key.Hostname, slave2Key.Port) - _, _ = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave3Key.Hostname, slave3Key.Port) + _, _ = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", mainKey.Hostname, mainKey.Port) + _, _ = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", subordinate1Key.Hostname, subordinate1Key.Port) + _, _ = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", subordinate2Key.Hostname, subordinate2Key.Port) + _, _ = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", subordinate3Key.Hostname, subordinate3Key.Port) - ExecInstance(&masterKey, "drop database if exists orchestrator_test") - ExecInstance(&masterKey, "create database orchestrator_test") - ExecInstance(&masterKey, `create table orchestrator_test.test_table( + ExecInstance(&mainKey, "drop database if exists orchestrator_test") + ExecInstance(&mainKey, "create database orchestrator_test") + ExecInstance(&mainKey, `create table orchestrator_test.test_table( name varchar(128) charset ascii not null primary key, value varchar(128) charset ascii not null )`) rand.Seed(time.Now().UTC().UnixNano()) } -func (s *TestSuite) TestReadTopologyMaster(c *C) { - key := masterKey +func (s *TestSuite) TestReadTopologyMain(c *C) { + key := mainKey i, _ := ReadTopologyInstanceUnbuffered(&key) c.Assert(i.Key.Hostname, Equals, key.Hostname) - c.Assert(i.IsSlave(), Equals, false) - c.Assert(len(i.SlaveHosts), Equals, 3) - c.Assert(len(i.SlaveHosts.GetInstanceKeys()), Equals, len(i.SlaveHosts)) + c.Assert(i.IsSubordinate(), Equals, false) + c.Assert(len(i.SubordinateHosts), Equals, 3) + c.Assert(len(i.SubordinateHosts.GetInstanceKeys()), Equals, len(i.SubordinateHosts)) } -func (s *TestSuite) TestReadTopologySlave(c *C) { - key := slave3Key +func (s *TestSuite) TestReadTopologySubordinate(c *C) { + key := subordinate3Key i, _ := ReadTopologyInstanceUnbuffered(&key) c.Assert(i.Key.Hostname, Equals, key.Hostname) - c.Assert(i.IsSlave(), Equals, true) - c.Assert(len(i.SlaveHosts), Equals, 0) + c.Assert(i.IsSubordinate(), Equals, true) + c.Assert(len(i.SubordinateHosts), Equals, 0) } -func (s *TestSuite) TestReadTopologyAndInstanceMaster(c *C) { - i, _ := ReadTopologyInstanceUnbuffered(&masterKey) - iRead, found, _ := ReadInstance(&masterKey) +func (s *TestSuite) TestReadTopologyAndInstanceMain(c *C) { + i, _ := ReadTopologyInstanceUnbuffered(&mainKey) + iRead, found, _ := ReadInstance(&mainKey) c.Assert(found, Equals, true) c.Assert(iRead.Key.Hostname, Equals, i.Key.Hostname) c.Assert(iRead.Version, Equals, i.Version) - c.Assert(len(iRead.SlaveHosts), Equals, len(i.SlaveHosts)) + c.Assert(len(iRead.SubordinateHosts), Equals, len(i.SubordinateHosts)) } -func (s *TestSuite) TestReadTopologyAndInstanceSlave(c *C) { - i, _ := ReadTopologyInstanceUnbuffered(&slave1Key) - iRead, found, _ := ReadInstance(&slave1Key) +func (s *TestSuite) TestReadTopologyAndInstanceSubordinate(c *C) { + i, _ := ReadTopologyInstanceUnbuffered(&subordinate1Key) + iRead, found, _ := ReadInstance(&subordinate1Key) c.Assert(found, Equals, true) c.Assert(iRead.Key.Hostname, Equals, i.Key.Hostname) c.Assert(iRead.Version, Equals, i.Version) } -func (s *TestSuite) TestGetMasterOfASlave(c *C) { - i, err := ReadTopologyInstanceUnbuffered(&slave1Key) +func (s *TestSuite) TestGetMainOfASubordinate(c *C) { + i, err := ReadTopologyInstanceUnbuffered(&subordinate1Key) c.Assert(err, IsNil) - master, err := GetInstanceMaster(i) + main, err := GetInstanceMain(i) c.Assert(err, IsNil) - c.Assert(master.IsSlave(), Equals, false) - c.Assert(master.Key.Port, Equals, 22987) + c.Assert(main.IsSubordinate(), Equals, false) + c.Assert(main.Key.Port, Equals, 22987) } -func (s *TestSuite) TestSlavesAreSiblings(c *C) { - i0, _ := ReadTopologyInstanceUnbuffered(&slave1Key) - i1, _ := ReadTopologyInstanceUnbuffered(&slave2Key) +func (s *TestSuite) TestSubordinatesAreSiblings(c *C) { + i0, _ := ReadTopologyInstanceUnbuffered(&subordinate1Key) + i1, _ := ReadTopologyInstanceUnbuffered(&subordinate2Key) c.Assert(InstancesAreSiblings(i0, i1), Equals, true) } func (s *TestSuite) TestNonSiblings(c *C) { - i0, _ := ReadTopologyInstanceUnbuffered(&masterKey) - i1, _ := ReadTopologyInstanceUnbuffered(&slave1Key) + i0, _ := ReadTopologyInstanceUnbuffered(&mainKey) + i1, _ := ReadTopologyInstanceUnbuffered(&subordinate1Key) c.Assert(InstancesAreSiblings(i0, i1), Not(Equals), true) } -func (s *TestSuite) TestInstanceIsMasterOf(c *C) { - i0, _ := ReadTopologyInstanceUnbuffered(&masterKey) - i1, _ := ReadTopologyInstanceUnbuffered(&slave1Key) - c.Assert(InstanceIsMasterOf(i0, i1), Equals, true) +func (s *TestSuite) TestInstanceIsMainOf(c *C) { + i0, _ := ReadTopologyInstanceUnbuffered(&mainKey) + i1, _ := ReadTopologyInstanceUnbuffered(&subordinate1Key) + c.Assert(InstanceIsMainOf(i0, i1), Equals, true) } -func (s *TestSuite) TestStopStartSlave(c *C) { +func (s *TestSuite) TestStopStartSubordinate(c *C) { - i, _ := ReadTopologyInstanceUnbuffered(&slave1Key) - c.Assert(i.SlaveRunning(), Equals, true) - i, _ = StopSlaveNicely(&i.Key, 0) + i, _ := ReadTopologyInstanceUnbuffered(&subordinate1Key) + c.Assert(i.SubordinateRunning(), Equals, true) + i, _ = StopSubordinateNicely(&i.Key, 0) - c.Assert(i.SlaveRunning(), Equals, false) + c.Assert(i.SubordinateRunning(), Equals, false) c.Assert(i.SQLThreadUpToDate(), Equals, true) - i, _ = StartSlave(&i.Key) - c.Assert(i.SlaveRunning(), Equals, true) + i, _ = StartSubordinate(&i.Key) + c.Assert(i.SubordinateRunning(), Equals, true) } func (s *TestSuite) TestReadTopologyUnexisting(c *C) { @@ -169,18 +169,18 @@ func (s *TestSuite) TestReadTopologyUnexisting(c *C) { func (s *TestSuite) TestMoveBelowAndBack(c *C) { clearTestMaintenance() // become child - slave1, err := MoveBelow(&slave1Key, &slave2Key) + subordinate1, err := MoveBelow(&subordinate1Key, &subordinate2Key) c.Assert(err, IsNil) - c.Assert(slave1.MasterKey.Equals(&slave2Key), Equals, true) - c.Assert(slave1.SlaveRunning(), Equals, true) + c.Assert(subordinate1.MainKey.Equals(&subordinate2Key), Equals, true) + c.Assert(subordinate1.SubordinateRunning(), Equals, true) // And back; keep topology intact - slave1, _ = MoveUp(&slave1Key) - slave2, _ := ReadTopologyInstanceUnbuffered(&slave2Key) + subordinate1, _ = MoveUp(&subordinate1Key) + subordinate2, _ := ReadTopologyInstanceUnbuffered(&subordinate2Key) - c.Assert(InstancesAreSiblings(slave1, slave2), Equals, true) - c.Assert(slave1.SlaveRunning(), Equals, true) + c.Assert(InstancesAreSiblings(subordinate1, subordinate2), Equals, true) + c.Assert(subordinate1.SubordinateRunning(), Equals, true) } @@ -188,134 +188,134 @@ func (s *TestSuite) TestMoveBelowAndBackComplex(c *C) { clearTestMaintenance() // become child - slave1, _ := MoveBelow(&slave1Key, &slave2Key) + subordinate1, _ := MoveBelow(&subordinate1Key, &subordinate2Key) - c.Assert(slave1.MasterKey.Equals(&slave2Key), Equals, true) - c.Assert(slave1.SlaveRunning(), Equals, true) + c.Assert(subordinate1.MainKey.Equals(&subordinate2Key), Equals, true) + c.Assert(subordinate1.SubordinateRunning(), Equals, true) - // Now let's have fun. Stop slave2 (which is now parent of slave1), execute queries on master, - // move s1 back under master, start all, verify queries. + // Now let's have fun. Stop subordinate2 (which is now parent of subordinate1), execute queries on main, + // move s1 back under main, start all, verify queries. - _, err := StopSlave(&slave2Key) + _, err := StopSubordinate(&subordinate2Key) c.Assert(err, IsNil) randValue := rand.Int() - _, err = ExecInstance(&masterKey, `replace into orchestrator_test.test_table (name, value) values ('TestMoveBelowAndBackComplex', ?)`, randValue) + _, err = ExecInstance(&mainKey, `replace into orchestrator_test.test_table (name, value) values ('TestMoveBelowAndBackComplex', ?)`, randValue) c.Assert(err, IsNil) - master, err := ReadTopologyInstanceUnbuffered(&masterKey) + main, err := ReadTopologyInstanceUnbuffered(&mainKey) c.Assert(err, IsNil) // And back; keep topology intact - slave1, err = MoveUp(&slave1Key) + subordinate1, err = MoveUp(&subordinate1Key) c.Assert(err, IsNil) - _, err = MasterPosWait(&slave1Key, &master.SelfBinlogCoordinates) + _, err = MainPosWait(&subordinate1Key, &main.SelfBinlogCoordinates) c.Assert(err, IsNil) - slave2, err := ReadTopologyInstanceUnbuffered(&slave2Key) + subordinate2, err := ReadTopologyInstanceUnbuffered(&subordinate2Key) c.Assert(err, IsNil) - _, err = MasterPosWait(&slave2Key, &master.SelfBinlogCoordinates) + _, err = MainPosWait(&subordinate2Key, &main.SelfBinlogCoordinates) c.Assert(err, IsNil) // Now check for value! var value1, value2 int - ScanInstanceRow(&slave1Key, `select value from orchestrator_test.test_table where name='TestMoveBelowAndBackComplex'`, &value1) - ScanInstanceRow(&slave2Key, `select value from orchestrator_test.test_table where name='TestMoveBelowAndBackComplex'`, &value2) + ScanInstanceRow(&subordinate1Key, `select value from orchestrator_test.test_table where name='TestMoveBelowAndBackComplex'`, &value1) + ScanInstanceRow(&subordinate2Key, `select value from orchestrator_test.test_table where name='TestMoveBelowAndBackComplex'`, &value2) - c.Assert(InstancesAreSiblings(slave1, slave2), Equals, true) + c.Assert(InstancesAreSiblings(subordinate1, subordinate2), Equals, true) c.Assert(value1, Equals, randValue) c.Assert(value2, Equals, randValue) } func (s *TestSuite) TestFailMoveBelow(c *C) { clearTestMaintenance() - _, _ = ExecInstance(&slave2Key, `set global binlog_format:='ROW'`) - _, err := MoveBelow(&slave1Key, &slave2Key) - _, _ = ExecInstance(&slave2Key, `set global binlog_format:='STATEMENT'`) + _, _ = ExecInstance(&subordinate2Key, `set global binlog_format:='ROW'`) + _, err := MoveBelow(&subordinate1Key, &subordinate2Key) + _, _ = ExecInstance(&subordinate2Key, `set global binlog_format:='STATEMENT'`) c.Assert(err, Not(IsNil)) } -func (s *TestSuite) TestMakeCoMasterAndBack(c *C) { +func (s *TestSuite) TestMakeCoMainAndBack(c *C) { clearTestMaintenance() - slave1, err := MakeCoMaster(&slave1Key) + subordinate1, err := MakeCoMain(&subordinate1Key) c.Assert(err, IsNil) - // Now master & slave1 expected to be co-masters. Check! - master, _ := ReadTopologyInstanceUnbuffered(&masterKey) - c.Assert(master.IsSlaveOf(slave1), Equals, true) - c.Assert(slave1.IsSlaveOf(master), Equals, true) + // Now main & subordinate1 expected to be co-mains. Check! + main, _ := ReadTopologyInstanceUnbuffered(&mainKey) + c.Assert(main.IsSubordinateOf(subordinate1), Equals, true) + c.Assert(subordinate1.IsSubordinateOf(main), Equals, true) // reset - restore to original state - master, err = ResetSlaveOperation(&masterKey) - slave1, _ = ReadTopologyInstanceUnbuffered(&slave1Key) + main, err = ResetSubordinateOperation(&mainKey) + subordinate1, _ = ReadTopologyInstanceUnbuffered(&subordinate1Key) c.Assert(err, IsNil) - c.Assert(master.MasterKey.Hostname, Equals, "_") + c.Assert(main.MainKey.Hostname, Equals, "_") } -func (s *TestSuite) TestFailMakeCoMaster(c *C) { +func (s *TestSuite) TestFailMakeCoMain(c *C) { clearTestMaintenance() - _, err := MakeCoMaster(&masterKey) + _, err := MakeCoMain(&mainKey) c.Assert(err, Not(IsNil)) } -func (s *TestSuite) TestMakeCoMasterAndBackAndFailOthersToBecomeCoMasters(c *C) { +func (s *TestSuite) TestMakeCoMainAndBackAndFailOthersToBecomeCoMains(c *C) { clearTestMaintenance() - slave1, err := MakeCoMaster(&slave1Key) + subordinate1, err := MakeCoMain(&subordinate1Key) c.Assert(err, IsNil) - // Now master & slave1 expected to be co-masters. Check! - master, _, _ := ReadInstance(&masterKey) - c.Assert(master.IsSlaveOf(slave1), Equals, true) - c.Assert(slave1.IsSlaveOf(master), Equals, true) + // Now main & subordinate1 expected to be co-mains. Check! + main, _, _ := ReadInstance(&mainKey) + c.Assert(main.IsSubordinateOf(subordinate1), Equals, true) + c.Assert(subordinate1.IsSubordinateOf(main), Equals, true) - // Verify can't have additional co-masters - _, err = MakeCoMaster(&masterKey) + // Verify can't have additional co-mains + _, err = MakeCoMain(&mainKey) c.Assert(err, Not(IsNil)) - _, err = MakeCoMaster(&slave1Key) + _, err = MakeCoMain(&subordinate1Key) c.Assert(err, Not(IsNil)) - _, err = MakeCoMaster(&slave2Key) + _, err = MakeCoMain(&subordinate2Key) c.Assert(err, Not(IsNil)) - // reset slave - restore to original state - master, err = ResetSlaveOperation(&masterKey) + // reset subordinate - restore to original state + main, err = ResetSubordinateOperation(&mainKey) c.Assert(err, IsNil) - c.Assert(master.MasterKey.Hostname, Equals, "_") + c.Assert(main.MainKey.Hostname, Equals, "_") } func (s *TestSuite) TestDiscover(c *C) { var err error - _, err = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", masterKey.Hostname, masterKey.Port) - _, err = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave1Key.Hostname, slave1Key.Port) - _, err = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave2Key.Hostname, slave2Key.Port) - _, err = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave3Key.Hostname, slave3Key.Port) - _, found, _ := ReadInstance(&masterKey) + _, err = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", mainKey.Hostname, mainKey.Port) + _, err = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", subordinate1Key.Hostname, subordinate1Key.Port) + _, err = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", subordinate2Key.Hostname, subordinate2Key.Port) + _, err = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", subordinate3Key.Hostname, subordinate3Key.Port) + _, found, _ := ReadInstance(&mainKey) c.Assert(found, Equals, false) - _, _ = ReadTopologyInstanceUnbuffered(&slave1Key) - _, found, err = ReadInstance(&slave1Key) + _, _ = ReadTopologyInstanceUnbuffered(&subordinate1Key) + _, found, err = ReadInstance(&subordinate1Key) c.Assert(found, Equals, true) c.Assert(err, IsNil) } -func (s *TestSuite) TestForgetMaster(c *C) { - _, _ = ReadTopologyInstanceUnbuffered(&masterKey) - _, found, _ := ReadInstance(&masterKey) +func (s *TestSuite) TestForgetMain(c *C) { + _, _ = ReadTopologyInstanceUnbuffered(&mainKey) + _, found, _ := ReadInstance(&mainKey) c.Assert(found, Equals, true) - ForgetInstance(&masterKey) - _, found, _ = ReadInstance(&masterKey) + ForgetInstance(&mainKey) + _, found, _ = ReadInstance(&mainKey) c.Assert(found, Equals, false) } func (s *TestSuite) TestBeginMaintenance(c *C) { clearTestMaintenance() - _, _ = ReadTopologyInstanceUnbuffered(&masterKey) - _, err := BeginMaintenance(&masterKey, "unittest", "TestBeginMaintenance") + _, _ = ReadTopologyInstanceUnbuffered(&mainKey) + _, err := BeginMaintenance(&mainKey, "unittest", "TestBeginMaintenance") c.Assert(err, IsNil) } func (s *TestSuite) TestBeginEndMaintenance(c *C) { clearTestMaintenance() - _, _ = ReadTopologyInstanceUnbuffered(&masterKey) - k, err := BeginMaintenance(&masterKey, "unittest", "TestBeginEndMaintenance") + _, _ = ReadTopologyInstanceUnbuffered(&mainKey) + k, err := BeginMaintenance(&mainKey, "unittest", "TestBeginEndMaintenance") c.Assert(err, IsNil) err = EndMaintenance(k) c.Assert(err, IsNil) @@ -323,17 +323,17 @@ func (s *TestSuite) TestBeginEndMaintenance(c *C) { func (s *TestSuite) TestFailBeginMaintenanceTwice(c *C) { clearTestMaintenance() - _, _ = ReadTopologyInstanceUnbuffered(&masterKey) - _, err := BeginMaintenance(&masterKey, "unittest", "TestFailBeginMaintenanceTwice") + _, _ = ReadTopologyInstanceUnbuffered(&mainKey) + _, err := BeginMaintenance(&mainKey, "unittest", "TestFailBeginMaintenanceTwice") c.Assert(err, IsNil) - _, err = BeginMaintenance(&masterKey, "unittest", "TestFailBeginMaintenanceTwice") + _, err = BeginMaintenance(&mainKey, "unittest", "TestFailBeginMaintenanceTwice") c.Assert(err, Not(IsNil)) } func (s *TestSuite) TestFailEndMaintenanceTwice(c *C) { clearTestMaintenance() - _, _ = ReadTopologyInstanceUnbuffered(&masterKey) - k, err := BeginMaintenance(&masterKey, "unittest", "TestFailEndMaintenanceTwice") + _, _ = ReadTopologyInstanceUnbuffered(&mainKey) + k, err := BeginMaintenance(&mainKey, "unittest", "TestFailEndMaintenanceTwice") c.Assert(err, IsNil) err = EndMaintenance(k) c.Assert(err, IsNil) @@ -343,41 +343,41 @@ func (s *TestSuite) TestFailEndMaintenanceTwice(c *C) { func (s *TestSuite) TestFailMoveBelowUponMaintenance(c *C) { clearTestMaintenance() - _, _ = ReadTopologyInstanceUnbuffered(&slave1Key) - k, err := BeginMaintenance(&slave1Key, "unittest", "TestBeginEndMaintenance") + _, _ = ReadTopologyInstanceUnbuffered(&subordinate1Key) + k, err := BeginMaintenance(&subordinate1Key, "unittest", "TestBeginEndMaintenance") c.Assert(err, IsNil) - _, err = MoveBelow(&slave1Key, &slave2Key) + _, err = MoveBelow(&subordinate1Key, &subordinate2Key) c.Assert(err, Not(IsNil)) err = EndMaintenance(k) c.Assert(err, IsNil) } -func (s *TestSuite) TestFailMoveBelowUponSlaveStopped(c *C) { +func (s *TestSuite) TestFailMoveBelowUponSubordinateStopped(c *C) { clearTestMaintenance() - slave1, _ := ReadTopologyInstanceUnbuffered(&slave1Key) - c.Assert(slave1.SlaveRunning(), Equals, true) - slave1, _ = StopSlaveNicely(&slave1.Key, 0) - c.Assert(slave1.SlaveRunning(), Equals, false) + subordinate1, _ := ReadTopologyInstanceUnbuffered(&subordinate1Key) + c.Assert(subordinate1.SubordinateRunning(), Equals, true) + subordinate1, _ = StopSubordinateNicely(&subordinate1.Key, 0) + c.Assert(subordinate1.SubordinateRunning(), Equals, false) - _, err := MoveBelow(&slave1Key, &slave2Key) + _, err := MoveBelow(&subordinate1Key, &subordinate2Key) c.Assert(err, Not(IsNil)) - _, _ = StartSlave(&slave1.Key) + _, _ = StartSubordinate(&subordinate1.Key) } -func (s *TestSuite) TestFailMoveBelowUponOtherSlaveStopped(c *C) { +func (s *TestSuite) TestFailMoveBelowUponOtherSubordinateStopped(c *C) { clearTestMaintenance() - slave1, _ := ReadTopologyInstanceUnbuffered(&slave1Key) - c.Assert(slave1.SlaveRunning(), Equals, true) - slave1, _ = StopSlaveNicely(&slave1.Key, 0) - c.Assert(slave1.SlaveRunning(), Equals, false) + subordinate1, _ := ReadTopologyInstanceUnbuffered(&subordinate1Key) + c.Assert(subordinate1.SubordinateRunning(), Equals, true) + subordinate1, _ = StopSubordinateNicely(&subordinate1.Key, 0) + c.Assert(subordinate1.SubordinateRunning(), Equals, false) - _, err := MoveBelow(&slave2Key, &slave1Key) + _, err := MoveBelow(&subordinate2Key, &subordinate1Key) c.Assert(err, Not(IsNil)) - _, _ = StartSlave(&slave1.Key) + _, _ = StartSubordinate(&subordinate1.Key) }