Skip to content

Commit d207937

Browse files
committed
support stopping paused crawls
1 parent 5d09fb9 commit d207937

File tree

2 files changed

+18
-11
lines changed

2 files changed

+18
-11
lines changed

backend/btrixcloud/models.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -218,11 +218,13 @@ class UserOrgInfoOut(BaseModel):
218218

219219
# ============================================================================
220220
TYPE_RUNNING_STATES = Literal[
221-
"running", "pending-wait", "generate-wacz", "uploading-wacz", "paused"
221+
"running", "pending-wait", "generate-wacz", "uploading-wacz"
222222
]
223223
RUNNING_STATES = get_args(TYPE_RUNNING_STATES)
224224

225-
TYPE_WAITING_STATES = Literal["starting", "waiting_capacity", "waiting_org_limit"]
225+
TYPE_WAITING_STATES = Literal[
226+
"starting", "waiting_capacity", "waiting_org_limit", "paused"
227+
]
226228
WAITING_STATES = get_args(TYPE_WAITING_STATES)
227229

228230
TYPE_FAILED_STATES = Literal[

backend/btrixcloud/operator/crawls.py

+14-9
Original file line numberDiff line numberDiff line change
@@ -262,13 +262,15 @@ async def sync_crawls(self, data: MCSyncData):
262262
)
263263

264264
else:
265-
if crawl.paused:
266-
await self.set_state(
267-
"paused", status, crawl, allowed_from=RUNNING_AND_WAITING_STATES
268-
)
269-
270265
status.scale = 1
271266

267+
# stopping paused crawls
268+
if crawl.paused and crawl.stopping:
269+
status.stopReason = "stopped_by_user"
270+
status.stopping = True
271+
print(f"Paused crawl stopped by user, id: {crawl.id}")
272+
await self.mark_finished(crawl, status, "stopped_by_user")
273+
272274
children = self._load_redis(params, status, data.children)
273275

274276
storage_path = crawl.storage.get_storage_extra_path(oid)
@@ -881,7 +883,7 @@ async def sync_crawl_state(
881883
status.initRedis = False
882884

883885
# crawler pods already shut down, remove redis pause key to allow unpausing later
884-
if crawl.paused or not status.initRedis:
886+
if crawl.paused:
885887
await redis.delete(f"{crawl.id}:paused")
886888

887889
elif crawler_running and not redis:
@@ -892,6 +894,10 @@ async def sync_crawl_state(
892894
status.resync_after = self.fast_retry_secs
893895
return status
894896

897+
# only get here if at least one crawler pod is running
898+
if crawl.paused:
899+
await redis.set(f"{crawl.id}:paused", "1")
900+
895901
# update lastActiveTime if crawler is running
896902
if crawler_running:
897903
status.lastActiveTime = date_to_str(dt_now())
@@ -1299,6 +1305,8 @@ async def add_file_to_crawl(self, cc_data, crawl: CrawlSpec, redis):
12991305
crawl.db_crawl_id, crawl.is_qa, crawl_file, filecomplete.size
13001306
)
13011307

1308+
print("FILE ADDED", filecomplete.size)
1309+
13021310
# no replicas for QA for now
13031311
if crawl.is_qa:
13041312
return True
@@ -1454,9 +1462,6 @@ async def update_crawl_state(
14541462
# backwards compatibility with older crawler
14551463
await redis.set("crawl-stop", "1")
14561464

1457-
if crawl.paused:
1458-
await redis.set(f"{crawl.id}:paused", "1")
1459-
14601465
# resolve scale
14611466
if crawl.scale != status.scale:
14621467
status.scale = await self._resolve_scale(

0 commit comments

Comments
 (0)