Skip to content

Commit

Permalink
feat: bootstrap-on-rollback (#1540)
Browse files Browse the repository at this point in the history
  • Loading branch information
indougnito authored Oct 20, 2023
1 parent 4db3bfb commit 4bfbb5a
Show file tree
Hide file tree
Showing 10 changed files with 426 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ public class DeploymentDirectoryManager {
static final String ROLLBACK_SNAPSHOT_FILE = "rollback_snapshot.tlog";
static final String TARGET_CONFIG_FILE = "target_config.tlog";
static final String BOOTSTRAP_TASK_FILE = "bootstrap_task.json";
static final String ROLLBACK_BOOTSTRAP_TASK_FILE = "rollback_bootstrap_task.json";
static final String DEPLOYMENT_METADATA_FILE = "deployment_metadata.json";
static final String CONFIG_SNAPSHOT_ERROR = "config_snapshot_error";

Expand Down Expand Up @@ -215,6 +216,16 @@ public Path getBootstrapTaskFilePath() throws IOException {
return getDeploymentDirectoryPath().resolve(BOOTSTRAP_TASK_FILE);
}

/**
* Resolve file path to persisted bootstrap task list of a rollback deployment.
*
* @return Path to file
* @throws IOException on I/O errors
*/
public Path getRollbackBootstrapTaskFilePath() throws IOException {
return getDeploymentDirectoryPath().resolve(ROLLBACK_BOOTSTRAP_TASK_FILE);
}

/**
* Resolve file path to persisted deployment metadata.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.REQUEST_RESTART;
import static com.aws.greengrass.deployment.model.Deployment.DeploymentStage.KERNEL_ACTIVATION;
import static com.aws.greengrass.deployment.model.Deployment.DeploymentStage.KERNEL_ROLLBACK;
import static com.aws.greengrass.deployment.model.Deployment.DeploymentStage.ROLLBACK_BOOTSTRAP;

public class KernelUpdateDeploymentTask implements DeploymentTask {
private final Kernel kernel;
Expand Down Expand Up @@ -72,6 +73,9 @@ public DeploymentResult call() {
} else if (KERNEL_ROLLBACK.equals(stage)) {
result = new DeploymentResult(DeploymentResult.DeploymentStatus.FAILED_ROLLBACK_COMPLETE,
getDeploymentStatusDetails());
} else if (ROLLBACK_BOOTSTRAP.equals(stage)) {
result = new DeploymentResult(DeploymentResult.DeploymentStatus.FAILED_UNABLE_TO_ROLLBACK,
getDeploymentStatusDetails());
}

componentManager.cleanupStaleVersions();
Expand Down Expand Up @@ -100,7 +104,7 @@ public DeploymentResult call() {
return new DeploymentResult(DeploymentResult.DeploymentStatus.FAILED_UNABLE_TO_ROLLBACK, e);
}
return null;
} else if (KERNEL_ROLLBACK.equals(stage)) {
} else if (KERNEL_ROLLBACK.equals(stage) || ROLLBACK_BOOTSTRAP.equals(stage)) {
logger.atError().log("Nucleus update workflow failed on rollback", e);
return new DeploymentResult(DeploymentResult.DeploymentStatus.FAILED_UNABLE_TO_ROLLBACK,
getDeploymentStatusDetails());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.REQUEST_REBOOT;
import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.REQUEST_RESTART;
import static com.aws.greengrass.deployment.model.Deployment.DeploymentStage.KERNEL_ROLLBACK;
import static com.aws.greengrass.deployment.model.Deployment.DeploymentStage.ROLLBACK_BOOTSTRAP;

/**
* Activation and rollback of Kernel update deployments.
Expand Down Expand Up @@ -119,7 +120,10 @@ void rollback(Deployment deployment, Throwable failureCause) {
deployment.setErrorTypes(errorReport.getRight());
deployment.setStageDetails(Utils.generateFailureMessage(failureCause));

deployment.setDeploymentStage(KERNEL_ROLLBACK);
final boolean bootstrapOnRollbackRequired = kernelAlternatives.prepareBootstrapOnRollbackIfNeeded(
kernel.getContext(), deploymentDirectoryManager, bootstrapManager);

deployment.setDeploymentStage(bootstrapOnRollbackRequired ? ROLLBACK_BOOTSTRAP : KERNEL_ROLLBACK);

try {
deploymentDirectoryManager.writeDeploymentMetadata(deployment);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ public class BootstrapManager implements Iterator<BootstrapTaskStatus> {
@Setter(AccessLevel.PACKAGE)
@Getter(AccessLevel.PACKAGE)
private List<BootstrapTaskStatus> bootstrapTaskStatusList = new ArrayList<>();
private BootstrapTaskStatus activeTask;
private final Kernel kernel;
private final Platform platform;
private int cursor;
Expand All @@ -109,6 +110,21 @@ public BootstrapManager(Kernel kernel) {
this.platform = platform;
}

/**
* Get the set of pending bootstrap tasks, excluding the active task.
*
* @return set of pending bootstrap tasks, excluding the active task
*/
public Set<String> getUnstartedTasks() {
final Set<String> pendingTasks = new HashSet<>();
this.bootstrapTaskStatusList.forEach((task) -> {
if (task != this.activeTask && isIncompleteOrErrored(task)) {
pendingTasks.add(task.getComponentName());
}
});
return pendingTasks;
}

/**
* Check if any bootstrap tasks are pending based on new configuration. Meanwhile resolve a list of bootstrap
* tasks.
Expand All @@ -121,6 +137,22 @@ public BootstrapManager(Kernel kernel) {
@SuppressWarnings("PMD.PrematureDeclaration")
public boolean isBootstrapRequired(Map<String, Object> newConfig)
throws ServiceUpdateException, ComponentConfigurationValidationException {
return isBootstrapRequired(newConfig, Collections.emptySet());
}

/**
* Check if any bootstrap tasks are pending based on new configuration. Meanwhile resolve a list of bootstrap
* tasks.
*
* @param newConfig new configuration from deployment
* @param componentsToExclude set of components to exclude from consideration for bootstrapping
* @return true if there are bootstrap tasks, false otherwise
* @throws ServiceUpdateException if parsing bootstrap tasks from new configuration fails
* @throws ComponentConfigurationValidationException If changed nucleus component configuration is invalid
*/
@SuppressWarnings("PMD.PrematureDeclaration")
public boolean isBootstrapRequired(Map<String, Object> newConfig, Set<String> componentsToExclude)
throws ServiceUpdateException, ComponentConfigurationValidationException {
bootstrapTaskStatusList.clear();
cursor = 0;

Expand All @@ -137,7 +169,9 @@ public boolean isBootstrapRequired(Map<String, Object> newConfig)
Set<String> componentsRequiresBootstrapTask = new HashSet<>();
Map<String, Object> serviceConfig = (Map<String, Object>) newConfig.get(SERVICES_NAMESPACE_TOPIC);
serviceConfig.forEach((name, config) -> {
if (serviceBootstrapRequired(name, (Map<String, Object>) config)) {
if (componentsToExclude.contains(name)) {
logger.atDebug().kv(COMPONENT_NAME_LOG_KEY_NAME, name).log("Excluding bootstrap task");
} else if (serviceBootstrapRequired(name, (Map<String, Object>) config)) {
logger.atDebug().kv(COMPONENT_NAME_LOG_KEY_NAME, name).log("Found pending bootstrap task");
componentsRequiresBootstrapTask.add(name);
}
Expand All @@ -163,6 +197,10 @@ public boolean isBootstrapRequired(Map<String, Object> newConfig)
return nucleusConfigValidAndNeedsRestart || !bootstrapTaskStatusList.isEmpty();
}

private boolean isIncompleteOrErrored(BootstrapTaskStatus task) {
return !DONE.equals(task.getStatus()) || BootstrapSuccessCode.isErrorCode(task.getExitCode());
}

private boolean willRemovePlugins(Map<String, Object> serviceConfig) {
Set<String> pluginsToRemove = kernel.orderedDependencies().stream()
.filter(s -> s instanceof PluginService)
Expand Down Expand Up @@ -429,6 +467,21 @@ public void persistBootstrapTaskList(Path persistedTaskFilePath) throws IOExcept
logger.atInfo().kv("filePath", persistedTaskFilePath).log("Bootstrap task list is saved to file");
}

/**
* Delete the bootstrap task list file, if it exists.
*
* @param persistedTaskFilePath Path to the persisted file of bootstrap tasks
* @throws IOException on I/O error
*/
public void deleteBootstrapTaskList(Path persistedTaskFilePath) throws IOException {
if (persistedTaskFilePath == null) {
logger.atError().log("No bootstrap task list to delete: the provided file path was null");
return;
}
logger.atInfo().kv("filePath", persistedTaskFilePath).log("Deleting bootstrap task list");
Files.deleteIfExists(persistedTaskFilePath);
}

/**
* Persist the bootstrap task list from file.
*
Expand Down Expand Up @@ -512,7 +565,7 @@ public int executeAllBootstrapTasksSequentially(Path persistedTaskFilePath)
public boolean hasNext() {
while (cursor < bootstrapTaskStatusList.size()) {
BootstrapTaskStatus next = bootstrapTaskStatusList.get(cursor);
if (!DONE.equals(next.getStatus()) || BootstrapSuccessCode.isErrorCode(next.getExitCode())) {
if (isIncompleteOrErrored(next)) {
return true;
}
cursor++;
Expand All @@ -526,6 +579,7 @@ public BootstrapTaskStatus next() {
throw new NoSuchElementException();
}
cursor++;
return bootstrapTaskStatusList.get(cursor - 1);
this.activeTask = bootstrapTaskStatusList.get(cursor - 1);
return this.activeTask;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,12 @@ public enum DeploymentStage {
* Deployment tries to rollback to Kernel with previous configuration, after BOOTSTRAP or KERNEL_ACTIVATION
* fails.
*/
KERNEL_ROLLBACK
KERNEL_ROLLBACK,

/**
* Deployment executes component bootstrap steps for the rollback, after BOOTSTRAP or KERNEL_ACTIVATION fails.
* Only used when a specific config flag has been set for one or more components in the rollback set.
*/
ROLLBACK_BOOTSTRAP,
}
}
60 changes: 47 additions & 13 deletions src/main/java/com/aws/greengrass/lifecyclemanager/Kernel.java
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@
import static com.aws.greengrass.dependency.EZPlugins.JAR_FILE_EXTENSION;
import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.REQUEST_REBOOT;
import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.REQUEST_RESTART;
import static com.aws.greengrass.deployment.model.Deployment.DeploymentStage.BOOTSTRAP;
import static com.aws.greengrass.deployment.model.Deployment.DeploymentStage.KERNEL_ROLLBACK;
import static com.aws.greengrass.deployment.model.Deployment.DeploymentStage.ROLLBACK_BOOTSTRAP;
import static com.aws.greengrass.lifecyclemanager.GreengrassService.SERVICES_NAMESPACE_TOPIC;
import static com.aws.greengrass.lifecyclemanager.GreengrassService.SERVICE_DEPENDENCIES_NAMESPACE_TOPIC;
import static com.aws.greengrass.lifecyclemanager.GreengrassService.SERVICE_LIFECYCLE_NAMESPACE_TOPIC;
Expand Down Expand Up @@ -196,24 +199,19 @@ public Kernel launch() {
switch (deploymentStageAtLaunch) {
case BOOTSTRAP:
logger.atInfo().kv("deploymentStage", deploymentStageAtLaunch).log("Resume deployment");
int exitCode;
try {
exitCode = bootstrapManager.executeAllBootstrapTasksSequentially(
deploymentDirectoryManager.getBootstrapTaskFilePath());
if (!bootstrapManager.hasNext()) {
logger.atInfo().log("Completed all bootstrap tasks. Continue to activate deployment changes");
}
// If exitCode is 0, which happens when all bootstrap tasks are completed, restart in new launch
// directories and verify handover is complete. As a result, exit code 0 is treated as 100 here.
logger.atInfo().log((exitCode == REQUEST_REBOOT ? "device reboot" : "Nucleus restart")
+ " requested to complete bootstrap task");

shutdown(30, exitCode == REQUEST_REBOOT ? REQUEST_REBOOT : REQUEST_RESTART);
Path bootstrapTaskFilePath = deploymentDirectoryManager.getBootstrapTaskFilePath();
executeBootstrapTasksAndShutdown(bootstrapManager, bootstrapTaskFilePath);
} catch (ServiceUpdateException | IOException e) {
logger.atError().log("Deployment bootstrap failed", e);
try {
// Bootstrapping for target deployment failed, so check if bootstrap-on-rollback is needed
boolean bootstrapOnRollbackRequired = kernelAlts.prepareBootstrapOnRollbackIfNeeded(
this.context, deploymentDirectoryManager, bootstrapManager);
// Save deployment error information
Deployment deployment = deploymentDirectoryManager.readDeploymentMetadata();
deployment.setDeploymentStage(DeploymentStage.KERNEL_ROLLBACK);
deployment.setDeploymentStage(
bootstrapOnRollbackRequired ? ROLLBACK_BOOTSTRAP : KERNEL_ROLLBACK);
Pair<List<String>, List<String>> errorReport =
DeploymentErrorCodeUtils.generateErrorReportFromExceptionStack(e);
deployment.setErrorStack(errorReport.getLeft());
Expand All @@ -233,6 +231,28 @@ public Kernel launch() {
}
}
break;
case ROLLBACK_BOOTSTRAP:
logger.atInfo().kv("deploymentStage", deploymentStageAtLaunch).log("Resume deployment");
Path bootstrapTaskFilePath;
try {
bootstrapTaskFilePath = deploymentDirectoryManager.getRollbackBootstrapTaskFilePath();
executeBootstrapTasksAndShutdown(bootstrapManager, bootstrapTaskFilePath);
} catch (ServiceUpdateException | IOException e) {
logger.atError().log("Rollback bootstrapping failed", e);
DeploymentQueue deploymentQueue = new DeploymentQueue();
context.put(DeploymentQueue.class, deploymentQueue);
try {
// Deployment error info should already have been saved during the target deployment failure.
Deployment deployment = deploymentDirectoryManager.readDeploymentMetadata();
deployment.setDeploymentStage(deploymentStageAtLaunch);
deploymentQueue.offer(deployment);
} catch (IOException ioException) {
logger.atError().setCause(ioException)
.log("Failed to load information for the ongoing deployment. Proceed as default");
}
kernelLifecycle.launch();
}
break;
case KERNEL_ACTIVATION:
case KERNEL_ROLLBACK:
logger.atInfo().kv("deploymentStage", deploymentStageAtLaunch).log("Resume deployment");
Expand Down Expand Up @@ -401,7 +421,20 @@ public GreengrassService locateIgnoreError(String name) {
config.lookupTopics(DEFAULT_VALUE_TIMESTAMP, SERVICES_NAMESPACE_TOPIC, name), e);
}
});
}

private void executeBootstrapTasksAndShutdown(BootstrapManager bootstrapManager, Path bootstrapTaskFilePath)
throws ServiceUpdateException, IOException {
int exitCode = bootstrapManager.executeAllBootstrapTasksSequentially(bootstrapTaskFilePath);
if (!bootstrapManager.hasNext()) {
logger.atInfo().log("Completed all bootstrap tasks. Continue to activate deployment changes");
}
// If exitCode is 0, which happens when all bootstrap tasks are completed, restart in new launch
// directories and verify handover is complete. As a result, exit code 0 is treated as 100 here.
logger.atInfo().log((exitCode == REQUEST_REBOOT ? "device reboot" : "Nucleus restart")
+ " requested to complete bootstrap task");

shutdown(30, exitCode == REQUEST_REBOOT ? REQUEST_REBOOT : REQUEST_RESTART);
}

@SuppressWarnings(
Expand Down Expand Up @@ -637,6 +670,7 @@ public Kernel parseArgs(String... args) {
.log("Detected ongoing deployment, but failed to load target configuration file", e);
}
break;
case ROLLBACK_BOOTSTRAP:
case KERNEL_ROLLBACK:
try {
Path configPath = deploymentDirectoryManager.getSnapshotFilePath();
Expand Down
Loading

0 comments on commit 4bfbb5a

Please sign in to comment.