Skip to content

Commit 9444e90

Browse files
committed
[wip][tosquash] review comments and minor changes
to be squashed into the existing commits when we are happy with the PR
1 parent 2e7de74 commit 9444e90

File tree

2 files changed

+25
-21
lines changed

2 files changed

+25
-21
lines changed

networking_nsxv3/common/synchronization.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,9 @@ def __repr__(self):
213213

214214

215215
class JobRerunner():
216-
""" Thread save data structure to reschedule jobs when it is already running
216+
""" Thread save data structure to reschedule jobs when they are already running
217217
218-
When a job is retrieved from the active queue and running in a worker thread,
218+
When a job is retrieved from the active queue and already running in a worker thread,
219219
there is a chance that another job for the same object is added to the active
220220
queue and also started in a worker. While there then is some locking happening
221221
that should prevent race conditions, this still blocks the worker thread(s),
@@ -245,28 +245,32 @@ class JobRerunner():
245245
to pick up changes quickly.
246246
"""
247247

248+
_lname_running = 'JobRerunner-running'
249+
_lname_torerun = 'JobRerunner-torerun'
250+
_lname_statdct = 'JobRerunner-statdct'
251+
248252
def __init__(self):
249-
self._ready = collections.deque()
250253
self._running = dict()
254+
self._to_rerun = collections.deque()
251255
self._stats = dict()
252256

253-
def pop(self) -> Runnable:
257+
def get_rerunnable(self) -> Runnable:
254258
# Let's also use the LockManager that is used for locking
255259
# in the code already. A simpler lock might do fine, but
256260
# for the LockManager we know it is working.
257261

258262

259-
with LockManager.get_lock("JobRerunner-deque"):
263+
with LockManager.get_lock(self._lname_torerun):
260264
try:
261-
job = self._ready.popleft()
265+
job = self._to_rerun.popleft()
262266
LOG.debug("JobRerunner had rerunnable job: %s", job)
263267
except IndexError:
264268
job = None
265269
LOG.debug("JobRerunner had no rerunnable job")
266270

267271
if job:
268272
# no nested locks if not necessary
269-
with LockManager.get_lock("JobRerunner-stats"):
273+
with LockManager.get_lock(self._lname_statdct):
270274
stats = self._stats.get(job)
271275
if stats is not None:
272276
stats.restarted()
@@ -276,7 +280,7 @@ def pop(self) -> Runnable:
276280

277281
def job_done(self, job: Runnable):
278282
LOG.debug("JobRerunner job_done called for %s", job)
279-
with LockManager.get_lock("JobRerunner-running"):
283+
with LockManager.get_lock(self._lname_running):
280284
count = self._running.get(job, 0)
281285

282286
if count == 1:
@@ -290,12 +294,12 @@ def job_done(self, job: Runnable):
290294
# re-appear and so we can forget about the counter.
291295
LOG.debug("JobRerunner job %s is done, %d reruns requested, marking it for re-execution", job, count)
292296
del self._running[job]
293-
with LockManager.get_lock("JobRerunner-deque"):
294-
self._ready.append(job)
297+
with LockManager.get_lock(self._lname_torerun):
298+
self._to_rerun.append(job)
295299
else:
296300
LOG.warning("JobRerunner job_done called too often for job %s", job)
297301

298-
with LockManager.get_lock("JobRerunner-stats"):
302+
with LockManager.get_lock(self._lname_statdct):
299303
stats = self._stats.get(job)
300304
if stats is not None:
301305
stats.done()
@@ -313,7 +317,7 @@ def add_job(self, job: Runnable) -> bool:
313317
returns False if the job is already running and was marked for re-execution
314318
315319
"""
316-
with LockManager.get_lock("JobRerunner-running"):
320+
with LockManager.get_lock(self._lname_running):
317321
count = self._running.get(job, 0)
318322
if count > 0:
319323
count += 1
@@ -327,15 +331,15 @@ def add_job(self, job: Runnable) -> bool:
327331
# let's log these 2 with info for debugging, they should be sufficient in prod
328332
# to find issues with the JobRerunner:
329333
LOG.info("JobRerunner stat: %d jobs waiting, total submission count: %d", len(self._running), sum)
330-
LOG.info("JobRerunner stat: %d jobs ready for re-execution", len(self._ready))
334+
LOG.info("JobRerunner stat: %d jobs ready for re-execution", len(self._to_rerun))
331335
return False
332336

333337
# no job running, we can run the job
334338
self._running[job] = 1
335339

336340
LOG.debug("JobRerunner no identical job is currently running, can start %s", job)
337341

338-
with LockManager.get_lock("JobRerunner-stats"):
342+
with LockManager.get_lock(self._lname_statdct):
339343
stats = self._stats.get(job)
340344
self._stats[job] = JobStats(str(job))
341345

@@ -404,11 +408,11 @@ def _start(self):
404408
if self.active() < self._idle and self.passive() > 0:
405409
self._active.put_nowait(self._passive.get_nowait())
406410
self._passive.task_done()
407-
job = self._rerunner.pop()
408-
from_queue = False
411+
pulled_from_queue = False
412+
job = self._rerunner.get_rerunnable()
409413
if not job:
410414
job = self._active.get(block=True, timeout=TIMEOUT)
411-
from_queue = True
415+
pulled_from_queue = True
412416

413417
# check if we are allowed to run it,
414418
# if yes mark it as running and spawn it
@@ -424,7 +428,7 @@ def wrap(rerun, ajob):
424428

425429
self._workers.spawn(wrap, self._rerunner, job)
426430

427-
if from_queue:
431+
if pulled_from_queue:
428432
self._active.task_done()
429433
except eventlet.queue.Empty:
430434
LOG.info("No activity for the last {} seconds.".format(TIMEOUT))

networking_nsxv3/tests/unit/realization/test_coordination.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def nop(item):
4040
rerunner.job_done(job)
4141

4242
# no job was added twice, so no job should be re-executed
43-
self.assertEqual(rerunner.pop(), None)
43+
self.assertEqual(rerunner.get_rerunnable(), None)
4444

4545
def test_rerunner_rerun(self):
4646
from networking_nsxv3.common.synchronization import JobRerunner, Runnable
@@ -66,14 +66,14 @@ def nop(item):
6666
self.assertFalse(ret)
6767

6868
# mark them all done, then expect them
69-
# all to be returned once in pop,
69+
# all to be returned once in get_rerunnable,
7070
# although added twice again above
7171
for some_id in range(100):
7272
job = Runnable(str(some_id), nop)
7373
rerunner.job_done(job)
7474

7575
alljobs = []
76-
while job := rerunner.pop():
76+
while job := rerunner.get_rerunnable():
7777
alljobs.append(job)
7878

7979
# each job should be returned once

0 commit comments

Comments
 (0)