Skip to content

Commit da667cc

Browse files
authored
fix: reject before putting deployment in in_progress (#1354)
An IoT job execution can't transition to REJECTED from IN_PROGRESS state. Valid transition would be QUEUED to REJECTED. This change either moves the QUEUED job to IN_PROGRESS or REJECTED state.
1 parent 2f98760 commit da667cc

File tree

4 files changed

+131
-130
lines changed

4 files changed

+131
-130
lines changed

src/main/java/com/aws/greengrass/deployment/DefaultDeploymentTask.java

Lines changed: 0 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@
1414
import com.aws.greengrass.componentmanager.models.ComponentRequirementIdentifier;
1515
import com.aws.greengrass.config.Node;
1616
import com.aws.greengrass.config.Topics;
17-
import com.aws.greengrass.deployment.errorcode.DeploymentErrorCode;
18-
import com.aws.greengrass.deployment.exceptions.DeploymentRejectedException;
1917
import com.aws.greengrass.deployment.exceptions.DeploymentTaskFailureException;
2018
import com.aws.greengrass.deployment.model.Deployment;
2119
import com.aws.greengrass.deployment.model.DeploymentDocument;
@@ -43,8 +41,6 @@
4341
import java.util.stream.Collectors;
4442

4543
import static com.aws.greengrass.deployment.DeploymentConfigMerger.DEPLOYMENT_ID_LOG_KEY;
46-
import static com.aws.greengrass.deployment.DeploymentService.GROUP_TO_LAST_DEPLOYMENT_CONFIG_ARN_KEY;
47-
import static com.aws.greengrass.deployment.DeploymentService.GROUP_TO_LAST_DEPLOYMENT_TIMESTAMP_KEY;
4844
import static com.aws.greengrass.deployment.DeploymentService.GROUP_TO_ROOT_COMPONENTS_VERSION_KEY;
4945
import static com.aws.greengrass.deployment.converter.DeploymentDocumentConverter.LOCAL_DEPLOYMENT_GROUP_NAME;
5046

@@ -115,14 +111,6 @@ public DeploymentResult call() throws InterruptedException {
115111
.kv("Deployment service config", deploymentServiceConfig.toPOJO().toString())
116112
.log("Starting deployment task");
117113

118-
/*
119-
* Enforce deployments are received for a given deployment target (thing or thingGroup) in sequence such
120-
* that old deployments for that target does not override a new deployment.
121-
*/
122-
if (checkIfDeploymentReceivedIsStale(deploymentDocument)) {
123-
return prepareRejectionResult(deploymentDocument);
124-
}
125-
126114
Map<String, Set<ComponentRequirementIdentifier>> nonTargetGroupsToRootPackagesMap =
127115
getNonTargetGroupToRootPackagesMap(deploymentDocument);
128116

@@ -193,80 +181,6 @@ public DeploymentResult call() throws InterruptedException {
193181
}
194182
}
195183

196-
/*
197-
* Enforce deployments are received for a given deployment target (thing or thingGroup) in sequence such
198-
* that old deployments for that target does not override a new deployment.
199-
*
200-
* For thing deployments, we don't consider them here as they are always in sequence and always for only
201-
* one target.
202-
*
203-
* For thingGroup deployments sent to different targets (thingGroup A & B), nucleus allows components from
204-
* both groups to be deployment as long as they don't have a conflicting component versions. This
205-
* behavior is not changed.
206-
*
207-
* For thingGroup deployments sent to the same target (thingGroup A) are always in sequence, however if
208-
* receive a bad/stale deployment due to cloud error we don't want that stale deployment to override a
209-
* new deployment already performed on device.
210-
*
211-
* For a subgroup deployments targeted for a parent fleet group (subgroup A1, A2 & A3 targeted for
212-
* thingGroup A), as there could be multiple subgroup deployments each of these sent as different jobs to
213-
* the device could be received in any order yielding an unpredictable behavior. To resolve this, nucleus
214-
* enforces processing these subgroup deployment in-order of their creation irrespective of when these
215-
* signals are received. For example:
216-
*
217-
* Order of deployment creation is: A1, A2, A3
218-
* So these, have to be processed in this order.
219-
*
220-
* Order of deployments received: A2, A1, A3
221-
* then A2 and A3 deployment will succeed, but A1 would be rejected as nucleus has already processed
222-
* newer deployment A2.
223-
*
224-
* @return true if deployment is considered stale, false otherwise
225-
*/
226-
private boolean checkIfDeploymentReceivedIsStale(DeploymentDocument deploymentDocument) {
227-
// Check if group deployment
228-
boolean isGroupDeployment = Deployment.DeploymentType.IOT_JOBS.equals(deployment.getDeploymentType())
229-
&& deploymentDocument.getGroupName() != null;
230-
231-
// if not a group deployment, then not stale
232-
if (!isGroupDeployment) {
233-
return false;
234-
}
235-
236-
// Get timestamp for the root target group
237-
Topics lastDeployment = deploymentServiceConfig
238-
.lookupTopics(DeploymentService.GROUP_TO_LAST_DEPLOYMENT_TOPICS, deploymentDocument.getGroupName());
239-
240-
long timestamp = Coerce.toLong(lastDeployment.find(GROUP_TO_LAST_DEPLOYMENT_TIMESTAMP_KEY));
241-
242-
// if don't have last deployment detail, then its a new deployment
243-
if (timestamp == 0 || deploymentDocument.getTimestamp() == null) {
244-
return false;
245-
}
246-
247-
// if the new deployment creation timestamp is smaller than last deployment creation timestamp then its stale
248-
return deploymentDocument.getTimestamp() < timestamp;
249-
}
250-
251-
private DeploymentResult prepareRejectionResult(DeploymentDocument deploymentDocument) {
252-
logger.atInfo().setEventType(DEPLOYMENT_TASK_EVENT_TYPE)
253-
.log("Nucleus has a newer deployment for '{}' target. Rejecting the deployment",
254-
deploymentDocument.getGroupName());
255-
256-
Topics lastDeployment =
257-
deploymentServiceConfig.lookupTopics(DeploymentService.GROUP_TO_LAST_DEPLOYMENT_TOPICS,
258-
deploymentDocument.getGroupName());
259-
260-
String lastDeploymentConfigArn =
261-
Coerce.toString(lastDeployment.find(GROUP_TO_LAST_DEPLOYMENT_CONFIG_ARN_KEY));
262-
return new DeploymentResult(DeploymentResult.DeploymentStatus.REJECTED, new DeploymentRejectedException(
263-
String.format("Nucleus has a newer deployment for '%s' target deployed by '%s'. Rejecting the "
264-
+ "deployment from '%s'", deploymentDocument.getGroupName(), lastDeploymentConfigArn,
265-
deploymentDocument.getConfigurationArn()),
266-
DeploymentErrorCode.REJECTED_STALE_DEPLOYMENT));
267-
268-
}
269-
270184
@SuppressWarnings("PMD.AvoidCatchingGenericException")
271185
private Map<String, Set<ComponentRequirementIdentifier>> getNonTargetGroupToRootPackagesMap(
272186
DeploymentDocument deploymentDocument)

src/main/java/com/aws/greengrass/deployment/DeploymentService.java

Lines changed: 94 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import com.aws.greengrass.deployment.errorcode.DeploymentErrorCode;
2626
import com.aws.greengrass.deployment.errorcode.DeploymentErrorCodeUtils;
2727
import com.aws.greengrass.deployment.exceptions.DeploymentException;
28+
import com.aws.greengrass.deployment.exceptions.DeploymentRejectedException;
2829
import com.aws.greengrass.deployment.exceptions.InvalidRequestException;
2930
import com.aws.greengrass.deployment.exceptions.MissingRequiredCapabilitiesException;
3031
import com.aws.greengrass.deployment.model.Deployment;
@@ -555,13 +556,34 @@ private void createNewDeployment(Deployment deployment) {
555556
if (deploymentTask == null) {
556557
return;
557558
}
558-
deploymentStatusKeeper.persistAndPublishDeploymentStatus(deployment.getId(),
559-
deployment.getGreengrassDeploymentId(), deployment.getConfigurationArn(),
560-
deployment.getDeploymentType(), JobStatus.IN_PROGRESS.toString(), new HashMap<>(),
561-
deployment.getDeploymentDocumentObj().getRootPackages());
562559

563-
if (DEFAULT.equals(deployment.getDeploymentStage())) {
560+
/*
561+
* Enforce deployments are received for a given deployment target (thing or thingGroup) in sequence such
562+
* that old deployments for that target does not override a new deployment.
563+
*/
564+
if (checkIfDeploymentReceivedIsStale(deployment.getDeploymentDocumentObj(), deployment.getDeploymentType())) {
565+
logger.atInfo().log("Nucleus has a newer deployment for '{}' target. Rejecting the deployment",
566+
deployment.getDeploymentDocumentObj().getGroupName());
567+
Topics lastDeployment = config.lookupTopics(DeploymentService.GROUP_TO_LAST_DEPLOYMENT_TOPICS,
568+
deployment.getDeploymentDocumentObj().getGroupName());
569+
570+
String lastDeploymentConfigArn =
571+
Coerce.toString(lastDeployment.find(GROUP_TO_LAST_DEPLOYMENT_CONFIG_ARN_KEY));
572+
573+
updateDeploymentResultAsRejected(deployment, deploymentTask, new DeploymentRejectedException(String.format(
574+
"Nucleus has a newer deployment for '%s' target deployed by '%s'. Rejecting the "
575+
+ "deployment from '%s'", deployment.getDeploymentDocumentObj().getGroupName(),
576+
lastDeploymentConfigArn, deployment.getDeploymentDocumentObj().getConfigurationArn()),
577+
DeploymentErrorCode.REJECTED_STALE_DEPLOYMENT));
578+
return;
579+
} else {
580+
deploymentStatusKeeper.persistAndPublishDeploymentStatus(deployment.getId(),
581+
deployment.getGreengrassDeploymentId(), deployment.getConfigurationArn(),
582+
deployment.getDeploymentType(), JobStatus.IN_PROGRESS.toString(), new HashMap<>(),
583+
deployment.getDeploymentDocumentObj().getRootPackages());
584+
}
564585

586+
if (DEFAULT.equals(deployment.getDeploymentStage())) {
565587
try {
566588
context.get(KernelAlternatives.class).cleanupLaunchDirectoryLinks();
567589
deploymentDirectoryManager.createNewDeploymentDirectory(deployment.getGreengrassDeploymentId());
@@ -615,6 +637,73 @@ private void createNewDeployment(Deployment deployment) {
615637
cancellable);
616638
}
617639

640+
/*
641+
* Enforce deployments are received for a given deployment target (thing or thingGroup) in sequence such
642+
* that old deployments for that target does not override a new deployment.
643+
*
644+
* For thing deployments, we don't consider them here as they are always in sequence and always for only
645+
* one target.
646+
*
647+
* For thingGroup deployments sent to different targets (thingGroup A & B), nucleus allows components from
648+
* both groups to be deployment as long as they don't have a conflicting component versions. This
649+
* behavior is not changed.
650+
*
651+
* For thingGroup deployments sent to the same target (thingGroup A) are always in sequence, however if
652+
* receive a bad/stale deployment due to cloud error we don't want that stale deployment to override a
653+
* new deployment already performed on device.
654+
*
655+
* For a subgroup deployments targeted for a parent fleet group (subgroup A1, A2 & A3 targeted for
656+
* thingGroup A), as there could be multiple subgroup deployments each of these sent as different jobs to
657+
* the device could be received in any order yielding an unpredictable behavior. To resolve this, nucleus
658+
* enforces processing these subgroup deployment in-order of their creation irrespective of when these
659+
* signals are received. For example:
660+
*
661+
* Order of deployment creation is: A1, A2, A3
662+
* So these, have to be processed in this order.
663+
*
664+
* Order of deployments received: A2, A1, A3
665+
* then A2 and A3 deployment will succeed, but A1 would be rejected as nucleus has already processed
666+
* newer deployment A2.
667+
*
668+
* @return true if deployment is considered stale, false otherwise
669+
*/
670+
private boolean checkIfDeploymentReceivedIsStale(DeploymentDocument deploymentDocument,
671+
DeploymentType deploymentType) {
672+
// Check if group deployment
673+
boolean isGroupDeployment = Deployment.DeploymentType.IOT_JOBS.equals(deploymentType)
674+
&& deploymentDocument.getGroupName() != null;
675+
676+
// if not a group deployment, then not stale
677+
if (!isGroupDeployment) {
678+
return false;
679+
}
680+
681+
// Get timestamp for the root target group
682+
Topics lastDeployment = config
683+
.lookupTopics(DeploymentService.GROUP_TO_LAST_DEPLOYMENT_TOPICS, deploymentDocument.getGroupName());
684+
685+
long timestamp = Coerce.toLong(lastDeployment.find(GROUP_TO_LAST_DEPLOYMENT_TIMESTAMP_KEY));
686+
687+
// if don't have last deployment detail, then its a new deployment
688+
if (timestamp == 0 || deploymentDocument.getTimestamp() == null) {
689+
return false;
690+
}
691+
692+
// if the new deployment creation timestamp is smaller than last deployment creation timestamp then its stale
693+
return deploymentDocument.getTimestamp() < timestamp;
694+
}
695+
696+
private void updateDeploymentResultAsRejected(Deployment deployment, DeploymentTask deploymentTask,
697+
Throwable rejectionCause) {
698+
699+
DeploymentResult result = new DeploymentResult(DeploymentResult.DeploymentStatus.REJECTED, rejectionCause);
700+
701+
CompletableFuture<DeploymentResult> process = CompletableFuture.completedFuture(result);
702+
703+
currentDeploymentTaskMetadata =
704+
new DeploymentTaskMetadata(deployment, deploymentTask, process, new AtomicInteger(1), false);
705+
}
706+
618707
private void updateDeploymentResultAsFailed(Deployment deployment, DeploymentTask deploymentTask,
619708
boolean completeExceptionally, Throwable e) {
620709
DeploymentResult result = new DeploymentResult(DeploymentStatus.FAILED_NO_STATE_CHANGE, e);

0 commit comments

Comments
 (0)