Skip to content

Commit

Permalink
[GOBBLIN-2144] Prevent GenerateWorkUnitsImpl from inadvertently cle…
Browse files Browse the repository at this point in the history
…aning up temp/staging dirs (#4039)
  • Loading branch information
phet committed Aug 27, 2024
1 parent b28b468 commit 444f266
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ public DagManagementTaskStreamImpl(Config config, Optional<DagActionStore> dagAc
this.dagProcEngineMetrics = dagProcEngineMetrics;
}

@Override
public synchronized void addDagAction(DagActionStore.LeaseParams leaseParams) {
log.info("Adding {} to queue...", leaseParams);
if (!this.leaseParamsQueue.offer(leaseParams)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import io.temporal.failure.ApplicationFailure;
import lombok.extern.slf4j.Slf4j;

import org.apache.gobblin.commit.DeliverySemantics;
import org.apache.gobblin.converter.initializer.ConverterInitializerFactory;
import org.apache.gobblin.destination.DestinationDatasetHandlerService;
import org.apache.gobblin.metrics.event.EventSubmitter;
Expand Down Expand Up @@ -117,9 +116,9 @@ protected static List<WorkUnit> generateWorkUnitsForJobState(JobState jobState,

// TODO: count total bytes for progress tracking!

boolean canCleanUp = canCleanStagingData(jobState);
boolean canCleanUpTempDirs = false; // unlike `AbstractJobLauncher` running the job end-to-end, this is Work Discovery only, so WAY TOO SOON for cleanup
DestinationDatasetHandlerService datasetHandlerService = closer.register(
new DestinationDatasetHandlerService(jobState, canCleanUp, eventSubmitterContext.create()));
new DestinationDatasetHandlerService(jobState, canCleanUpTempDirs, eventSubmitterContext.create()));
WorkUnitStream handledWorkUnitStream = datasetHandlerService.executeHandlers(workUnitStream);

// initialize writer and converter(s)
Expand All @@ -143,13 +142,4 @@ protected static List<WorkUnit> generateWorkUnitsForJobState(JobState jobState,

return AbstractJobLauncher.materializeWorkUnitList(trackedWorkUnitStream);
}

protected static boolean canCleanStagingData(JobState jobState) {
if (DeliverySemantics.EXACTLY_ONCE.equals(DeliverySemantics.parse(jobState))) {
String errMsg = "DeliverySemantics.EXACTLY_ONCE NOT currently supported; job " + jobState.getJobId();
log.error(errMsg);
throw ApplicationFailure.newNonRetryableFailure(errMsg, "Unsupported: DeliverySemantics.EXACTLY_ONCE");
}
return true;
}
}

0 comments on commit 444f266

Please sign in to comment.