Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add exception handling to task running #34

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 32 additions & 28 deletions odc/stats/proc.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,40 +202,44 @@ def _run(self, tasks: Iterable[Task], apply_eodatasets3) -> Iterator[TaskResult]
yield TaskResult(task, path, skipped=True)
continue

_log.debug("Building Dask Graph")
ds = proc.reduce(proc.input_data(task.datasets, task.geobox))
try:
_log.debug("Building Dask Graph")
ds = proc.reduce(proc.input_data(task.datasets, task.geobox))

_log.debug(f"Submitting to Dask ({task.location})")
ds = client.persist(ds, fifo_timeout="1ms")
_log.debug(f"Submitting to Dask ({task.location})")
ds = client.persist(ds, fifo_timeout="1ms")

aux: Optional[xr.Dataset] = None
aux: Optional[xr.Dataset] = None

# if no rgba setting in cog_ops:overrides, no rgba tif as ouput
if 'overrides' in cfg.cog_opts and 'rgba' in cfg.cog_opts['overrides']:
rgba = proc.rgba(ds)
if rgba is not None:
aux = xr.Dataset(dict(rgba=rgba))
# if no rgba setting in cog_ops:overrides, no rgba tif as ouput
if 'overrides' in cfg.cog_opts and 'rgba' in cfg.cog_opts['overrides']:
rgba = proc.rgba(ds)
if rgba is not None:
aux = xr.Dataset(dict(rgba=rgba))

cog = sink.dump(task, ds, aux, proc, apply_eodatasets3)
cog = client.compute(cog, fifo_timeout="1ms")
cog = sink.dump(task, ds, aux, proc, apply_eodatasets3)
cog = client.compute(cog, fifo_timeout="1ms")

_log.debug("Waiting for completion")
cancelled = False
_log.debug("Waiting for completion")
cancelled = False

for (dt, t_now) in wait_for_future(cog, cfg.future_poll_interval, t0=t0):
if cfg.heartbeat_filepath is not None:
self._register_heartbeat(cfg.heartbeat_filepath)
if tk:
tk.extend_if_needed(
cfg.job_queue_max_lease, cfg.renew_safety_margin
)
if cfg.max_processing_time > 0 and dt > cfg.max_processing_time:
_log.error(
f"Task {task.location} failed to finish on time: {dt}>{cfg.max_processing_time}"
)
cancelled = True
cog.cancel()
break
for (dt, t_now) in wait_for_future(cog, cfg.future_poll_interval, t0=t0):
if cfg.heartbeat_filepath is not None:
self._register_heartbeat(cfg.heartbeat_filepath)
if tk:
tk.extend_if_needed(
cfg.job_queue_max_lease, cfg.renew_safety_margin
)
if cfg.max_processing_time > 0 and dt > cfg.max_processing_time:
_log.error(
f"Task {task.location} failed to finish on time: {dt}>{cfg.max_processing_time}"
)
cancelled = True
cog.cancel()
break
except Exception as e:
_log.exception(f"Error during processing of {task.location} {e}")
result = TaskResult(task, error=str(e))

if cancelled:
result = TaskResult(task, error="Cancelled due to timeout")
Expand Down