Skip to content

Commit 5f73712

Browse files
committed
TEZ-4569: SCATTER_GATHER + BROADCAST hangs on DAG Recovery
1 parent deac035 commit 5f73712

File tree

1 file changed

+13
-1
lines changed

1 file changed

+13
-1
lines changed

tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2817,6 +2817,18 @@ private VertexState setupVertex() {
28172817
return VertexState.INITED;
28182818
}
28192819

2820+
private boolean canSkipInitializingParents() {
2821+
// Both cases use RootInputVertexManager. RootInputVertexManager can start tasks even though
2822+
// any parents are not fully initialized.
2823+
if (vertexPlan.hasVertexManagerPlugin()) {
2824+
final VertexManagerPluginDescriptor pluginDesc = DagTypeConverters
2825+
.convertVertexManagerPluginDescriptorFromDAGPlan(vertexPlan.getVertexManagerPlugin());
2826+
return pluginDesc.getClassName().equals(RootInputVertexManager.class.getName());
2827+
} else {
2828+
return inputsWithInitializers != null;
2829+
}
2830+
}
2831+
28202832
private boolean isVertexInitSkippedInParentVertices() {
28212833
for (Map.Entry<Vertex, Edge> entry : sourceVertices.entrySet()) {
28222834
if(!(((VertexImpl) entry.getKey()).isVertexInitSkipped())) {
@@ -2843,7 +2855,7 @@ private void assignVertexManager() throws TezException {
28432855
if (recoveryData != null && recoveryData.shouldSkipInit()
28442856
&& (recoveryData.isVertexTasksStarted() ||
28452857
recoveryData.getVertexConfigurationDoneEvent().isSetParallelismCalled())
2846-
&& isVertexInitSkippedInParentVertices()) {
2858+
&& (canSkipInitializingParents() || isVertexInitSkippedInParentVertices())) {
28472859
// Replace the original VertexManager with NoOpVertexManager if the reconfiguration is done in the last AM attempt
28482860
VertexConfigurationDoneEvent reconfigureDoneEvent = recoveryData.getVertexConfigurationDoneEvent();
28492861
if (LOG.isInfoEnabled()) {

0 commit comments

Comments
 (0)