Skip to content

Commit 7ff517d

Browse files
committed
[wip][tosquash] review comments and minor changes
to be squashed into the existing commits when we are happy with the PR
1 parent 0bd4202 commit 7ff517d

File tree

2 files changed

+21
-18
lines changed

2 files changed

+21
-18
lines changed

networking_nsxv3/common/synchronization.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -216,9 +216,9 @@ def _get(self):
216216

217217

218218
class JobRerunner():
219-
""" Thread save data structure to reschedule jobs when it is already running
219+
""" Thread save data structure to reschedule jobs when they are already running
220220
221-
When a job is retrieved from the active queue and running in a worker thread,
221+
When a job is retrieved from the active queue and already running in a worker thread,
222222
there is a chance that another job for the same object is added to the active
223223
queue and also started in a worker. While there then is some locking happening
224224
that should prevent race conditions, this still blocks the worker thread(s),
@@ -248,18 +248,21 @@ class JobRerunner():
248248
to pick up changes quickly.
249249
"""
250250

251+
_lname_running = 'JobRerunner-running'
252+
_lname_torerun = 'JobRerunner-torerun'
253+
251254
def __init__(self):
252-
self._ready = collections.deque()
253255
self._running = dict()
256+
self._to_rerun = collections.deque()
254257

255-
def pop(self) -> Runnable:
258+
def get_rerunnable(self) -> Runnable:
256259
# Let's also use the LockManager that is used for locking
257260
# in the code already. A simpler lock might do fine, but
258261
# for the LockManager we know it is working.
259262

260-
with LockManager.get_lock("JobRerunner-deque"):
263+
with LockManager.get_lock(self._lname_torerun):
261264
try:
262-
job = self._ready.popleft()
265+
job = self._to_rerun.popleft()
263266
LOG.debug("JobRerunner had rerunnable job: %s", job)
264267
LOG.info("JobRerunner (rerun) %s", job.get_statline())
265268
except IndexError:
@@ -270,7 +273,7 @@ def pop(self) -> Runnable:
270273

271274
def job_done(self, job: Runnable):
272275
LOG.debug("JobRerunner job_done called for %s", job)
273-
with LockManager.get_lock("JobRerunner-running"):
276+
with LockManager.get_lock(self._lname_running):
274277
count = self._running.get(job, 0)
275278

276279
if count == 1:
@@ -286,8 +289,8 @@ def job_done(self, job: Runnable):
286289
LOG.info("JobRerunner (requeue) %s", job.get_statline())
287290
LOG.debug("JobRerunner job %s is done, %d reruns requested, marking it for re-execution", job, count)
288291
del self._running[job]
289-
with LockManager.get_lock("JobRerunner-deque"):
290-
self._ready.append(job)
292+
with LockManager.get_lock(self._lname_torerun):
293+
self._to_rerun.append(job)
291294
else:
292295
# prevent the error from spreading
293296
del self._running[job]
@@ -302,7 +305,7 @@ def add_job(self, job: Runnable) -> bool:
302305
returns False if the job is already running and was marked for re-execution
303306
304307
"""
305-
with LockManager.get_lock("JobRerunner-running"):
308+
with LockManager.get_lock(self._lname_running):
306309
count = self._running.get(job, 0)
307310
if count <=0:
308311
# no job running, we can run the job
@@ -323,7 +326,7 @@ def add_job(self, job: Runnable) -> bool:
323326
# let's log these as info for debugging, they should be sufficient in prod
324327
# to find issues with the JobRerunner:
325328
LOG.info("JobRerunner stat: %d jobs waiting, total submission count: %d", len(self._running), sum)
326-
LOG.info("JobRerunner stat: %d jobs ready for re-execution", len(self._ready))
329+
LOG.info("JobRerunner stat: %d jobs ready for re-execution", len(self._to_rerun))
327330
return False
328331

329332

@@ -386,11 +389,11 @@ def _start(self):
386389
if self.active() < self._idle and self.passive() > 0:
387390
self._active.put_nowait(self._passive.get_nowait())
388391
self._passive.task_done()
389-
job = self._rerunner.pop()
390-
from_queue = False
392+
pulled_from_queue = False
393+
job = self._rerunner.get_rerunnable()
391394
if not job:
392395
job = self._active.get(block=True, timeout=TIMEOUT)
393-
from_queue = True
396+
pulled_from_queue = True
394397

395398
# check if we are allowed to run it,
396399
# if yes mark it as running and spawn it
@@ -408,7 +411,7 @@ def wrap(rerun, ajob):
408411

409412
self._workers.spawn(wrap, self._rerunner, job)
410413

411-
if from_queue:
414+
if pulled_from_queue:
412415
self._active.task_done()
413416
except eventlet.queue.Empty:
414417
LOG.info("No activity for the last {} seconds.".format(TIMEOUT))

networking_nsxv3/tests/unit/realization/test_coordination.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def nop(item):
4040
rerunner.job_done(job)
4141

4242
# no job was added twice, so no job should be re-executed
43-
self.assertEqual(rerunner.pop(), None)
43+
self.assertEqual(rerunner.get_rerunnable(), None)
4444

4545
def test_rerunner_rerun(self):
4646
from networking_nsxv3.common.synchronization import JobRerunner, Runnable
@@ -66,14 +66,14 @@ def nop(item):
6666
self.assertFalse(ret)
6767

6868
# mark them all done, then expect them
69-
# all to be returned once in pop,
69+
# all to be returned once in get_rerunnable,
7070
# although added twice again above
7171
for some_id in range(100):
7272
job = Runnable(str(some_id), nop)
7373
rerunner.job_done(job)
7474

7575
alljobs = []
76-
while job := rerunner.pop():
76+
while job := rerunner.get_rerunnable():
7777
alljobs.append(job)
7878

7979
# each job should be returned once

0 commit comments

Comments
 (0)