Skip to content

Commit

Permalink
misc tweaks:
Browse files Browse the repository at this point in the history
- better error handling for not found resources, ensure 404
- typo in k8smanager
- add pylintrc
- ensure manual job ares deleted when complete
- fix typos, reformat
  • Loading branch information
ikreymer committed Aug 26, 2021
1 parent f1a816b commit 223658c
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 42 deletions.
18 changes: 12 additions & 6 deletions backend/crawlconfigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,11 @@ async def add_crawl_config(

async def update_crawl_schedule(self, cid: str, update: UpdateSchedule):
""" Update schedule for existing crawl config"""

if not await self.crawl_configs.find_one_and_update(
{"_id": cid}, {"$set": {"schedule": update.schedule}}
):
return None
return False

await self.crawl_manager.update_crawl_schedule(cid, update.schedule)
return True
Expand Down Expand Up @@ -222,18 +223,21 @@ async def update_crawl_schedule(
cid: str,
):

success = False
try:
if not await ops.update_crawl_schedule(cid, update):
raise HTTPException(
status_code=404, detail=f"Crawl Config '{cid}' not found"
)
success = await ops.update_crawl_schedule(cid, update)

except Exception as e:
# pylint: disable=raise-missing-from
raise HTTPException(
status_code=403, detail=f"Error updating crawl config: {e}"
)

if not success:
raise HTTPException(
status_code=404, detail=f"Crawl Config '{cid}' not found"
)

return {"updated": cid}

@router.post("/{cid}/run")
Expand Down Expand Up @@ -265,7 +269,9 @@ async def delete_crawl_config(
):
result = await ops.delete_crawl_config(cid, archive)
if not result or not result.deleted_count:
raise HTTPException(status_code=404, detail="Crawl Config Not Found")
raise HTTPException(
status_code=404, detail=f"Crawl Config '{cid}' Not Found"
)

return {"deleted": 1}

Expand Down
39 changes: 21 additions & 18 deletions backend/crawls.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def init_crawls_api(app, mdb, crawl_manager, crawl_config_ops, archives):

archive_crawl_dep = archives.archive_crawl_dep

@app.post("/crawls/done", tags=["crawls"])
@app.post("/_crawls/done", tags=["_internal"])
async def crawl_done(msg: CrawlCompleteIn):
loop = asyncio.get_running_loop()
loop.create_task(ops.on_handle_crawl_complete(msg))
Expand All @@ -152,25 +152,22 @@ async def list_crawls(archive: Archive = Depends(archive_crawl_dep)):
"/archives/{aid}/crawls/{crawl_id}/cancel",
tags=["crawls"],
)
async def crawl_cancel_stop(
async def crawl_cancel_immediately(
crawl_id, archive: Archive = Depends(archive_crawl_dep)
):
crawl = None
try:
crawl = await crawl_manager.stop_crawl(crawl_id, archive.id, graceful=False)
if not crawl:
raise HTTPException(
status_code=404, detail=f"Crawl not found: {crawl_id}"
)

await ops.store_crawl(crawl)

except HTTPException as httpe:
raise httpe

except Exception as exc:
# pylint: disable=raise-missing-from
raise HTTPException(status_code=400, detail=f"Error Canceling Crawl: {exc}")

if not crawl:
raise HTTPException(status_code=404, detail=f"Crawl not found: {crawl_id}")

await ops.store_crawl(crawl)

return {"canceled": True}

@app.post(
Expand All @@ -180,27 +177,33 @@ async def crawl_cancel_stop(
async def crawl_graceful_stop(
crawl_id, archive: Archive = Depends(archive_crawl_dep)
):
canceled = False
try:
canceled = await crawl_manager.stop_crawl(
crawl_id, archive.id, graceful=True
)
if not canceled:
raise HTTPException(
status_code=404, detail=f"Crawl not found: {crawl_id}"
)

except HTTPException as httpe:
raise httpe

except Exception as exc:
# pylint: disable=raise-missing-from
raise HTTPException(status_code=400, detail=f"Error Stopping Crawl: {exc}")

if not canceled:
raise HTTPException(status_code=404, detail=f"Crawl not found: {crawl_id}")

return {"stopped_gracefully": True}

@app.post("/archives/{aid}/crawls/delete", tags=["crawls"])
async def delete_crawls(
delete_list: DeleteCrawlList, archive: Archive = Depends(archive_crawl_dep)
):
try:
for crawl_id in delete_list:
await crawl_manager.stop_crawl(crawl_id, archive.id, graceful=False)

except Exception as exc:
# pylint: disable=raise-missing-from
raise HTTPException(status_code=400, detail=f"Error Stopping Crawl: {exc}")

res = await ops.delete_crawls(archive.id, delete_list)

return {"deleted": res}
30 changes: 20 additions & 10 deletions backend/dockerman.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ async def cleanup_loop(self):
timeout = int(container["Labels"]["btrix.timeout"])
actual = int(time.time()) - int(container["Created"])
if actual >= timeout:
# pylint: disable=line-too-long
print(
f"Crawl {container['Id']} running for {actual} seconds, exceeded timeout {timeout}, stopping..."
)
Expand Down Expand Up @@ -181,18 +182,27 @@ async def list_running_crawls(self, aid):

async def stop_crawl(self, crawl_id, aid, graceful=True):
""" Stop crawl, if not graceful, issue SIGUSR1 to indicate cancelation """
container = await self.client.containers.get(crawl_id)

if container["Config"]["Labels"]["btrix.archive"] != aid:
return None
result = None

if not graceful:
await container.kill(signal="SIGUSR1")
result = self._make_crawl_for_container(container, "canceled", True)
else:
result = True
try:
container = await self.client.containers.get(crawl_id)

if container["Config"]["Labels"]["btrix.archive"] != aid:
return None

if not graceful:
await container.kill(signal="SIGUSR1")
result = self._make_crawl_for_container(container, "canceled", True)
else:
result = True

await container.kill(signal="SIGTERM")
except aiodocker.exceptions.DockerError as exc:
if exc.status == 404:
return None

await container.kill(signal="SIGTERM")
raise exc

return result

Expand Down Expand Up @@ -351,7 +361,7 @@ async def _run_crawl_now(self, storage, labels, volume, schedule="", manual=True
f"STORE_ENDPOINT_URL={endpoint_with_coll_url}",
f"STORE_ACCESS_KEY={storage.access_key}",
f"STORE_SECRET_KEY={storage.secret_key}",
"WEBHOOK_URL=http://backend:8000/crawls/done",
"WEBHOOK_URL=http://backend:8000/_crawls/done",
]

labels["btrix.run.schedule"] = schedule
Expand Down
14 changes: 6 additions & 8 deletions backend/k8sman.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ async def add_crawl_config(
"STORE_ENDPOINT_URL": endpoint_with_coll_url,
"STORE_ACCESS_KEY": storage.access_key,
"STORE_SECRET_KEY": storage.secret_key,
"WEBHOOK_URL": "http://browsertrix-cloud.default:8000/crawls/done",
"WEBHOOK_URL": "http://browsertrix-cloud.default:8000/_crawls/done",
},
)

Expand Down Expand Up @@ -169,7 +169,7 @@ async def add_crawl_config(

return cron_job

async def update_crawl_config(self, cid, schedule):
async def update_crawl_schedule(self, cid, schedule):
""" Update the schedule for existing crawl config """

cron_jobs = await self.batch_beta_api.list_namespaced_cron_job(
Expand All @@ -195,15 +195,12 @@ async def update_crawl_config(self, cid, schedule):
name=cron_job.metadata.name, namespace=self.namespace, body=cron_job
)

async def run_crawl_config(self, cid, manual=True, schedule=""):
async def run_crawl_config(self, cid):
""" Run crawl job for cron job based on specified crawlconfig id (cid) """
cron_jobs = await self.batch_beta_api.list_namespaced_cron_job(
namespace=self.namespace, label_selector=f"btrix.crawlconfig={cid}"
)

if not manual or schedule:
raise Exception("Manual trigger not supported")

if len(cron_jobs.items) != 1:
raise Exception("Crawl Config Not Found")

Expand Down Expand Up @@ -245,8 +242,8 @@ async def validate_crawl_complete(self, crawlcomplete):
return None

manual = job.metadata.annotations.get("btrix.run.manual") == "1"
if not manual:
await self._delete_job(job.metadata.name)
if manual:
self.loop.create_task(self._delete_job(job.metadata.name))

return self._make_crawl_for_job(
job,
Expand Down Expand Up @@ -426,6 +423,7 @@ async def _create_run_now_job(self, cron_job):
"""Create new job from cron job to run instantly"""
annotations = cron_job.spec.job_template.metadata.annotations
annotations["btrix.run.manual"] = "1"
annotations["btrix.run.schedule"] = ""

# owner_ref = client.V1OwnerReference(
# kind="CronJob",
Expand Down
2 changes: 2 additions & 0 deletions pylintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[MESSAGE CONTROL]
disable=duplicate-code

0 comments on commit 223658c

Please sign in to comment.