[squashme] second round or PR comments, logline, lt in Runnable

mutax · mutax · commit 69f5e8fb0c7d · 2025-02-21T16:21:32.000+01:00
making Runnable order items with same priority by age
diff --git a/networking_nsxv3/common/synchronization.py b/networking_nsxv3/common/synchronization.py
@@ -114,7 +114,7 @@ def get_statline(self) -> str:
 
         scheduled = '-'
         started = '-'
-        jobdone = '-'
+        runtime = '-'
         rescheduled = '-'
 
         if self._scheduled and self._created:
@@ -124,13 +124,13 @@ def get_statline(self) -> str:
             started = f"{self._started - self._scheduled:0.4f}"
 
         if self._jobdone and self._started:
-            jobdone = f"{self._jobdone - self._started:0.4f}"
+            runtime = f"{self._jobdone - self._started:0.4f}"
 
         if self._rescheduled and self._jobdone:
             rescheduled = f"{self._rescheduled - self._jobdone:0.4f}"
 
-        return (f"jobtimings: {self.identifier}: runcount: {self._runcount} age: {age} "
-                f"scheduled: {scheduled} started: {started} runtime: {jobdone} rescheduled: {rescheduled}")
+        return (f"timings for job {self} - runcount: {self._runcount} age: {age} "
+                f"scheduled: {scheduled} started: {started} runtime: {runtime} rescheduled: {rescheduled}")
 
     def __repr__(self):
         # lets not just use the object id, maybe
@@ -155,6 +155,11 @@ def __lt__(self, other):
         Only the passive queue is ordered by priority.
         The active queue is FiFo.
         """
+        # if the priority is equal, we want to order
+        # by creation time to handle oldest jobs first
+        if self.priority == other.priority:
+            return self._created < other._created
+
         return self.priority < other.priority
 
     def __hash__(self):
@@ -282,8 +287,7 @@ def get_rerunnable(self) -> Runnable:
             try:
                 job = self._to_rerun.popleft()
                 job.set_rescheduled()
-                LOG.debug("JobRerunner had rerunnable job: %s", job)
-                LOG.info("JobRerunner (rerun) %s", job.get_statline())
+                LOG.info("JobRerunner (about to rerun) %s", job.get_statline())
             except IndexError:
                 job = None
                 LOG.debug("JobRerunner had no rerunnable job")
@@ -297,16 +301,14 @@ def job_done(self, job: Runnable):
 
             if count == 1:
                 del self._running[job]
-                LOG.debug("JobRerunner job %s is done, no reruns requested", job)
-                LOG.info("JobRerunner (done) %s", job.get_statline())
+                LOG.info("JobRerunner (done, no reruns requested) %s", job.get_statline())
             elif count > 1:
                 # we only allow exactly one job to run at a time,
                 # all jobs arriving later will increase the counter while
                 # the job is still running or they get re-queued.
                 # if a job is in the ready deque it will at some point
                 # re-appear and so we can forget about the counter.
-                LOG.info("JobRerunner (requeue) %s", job.get_statline())
-                LOG.debug("JobRerunner job %s is done, %d reruns requested, marking it for re-execution", job, count)
+                LOG.info("JobRerunner (done, %d reruns requested) %s", count - 1, job.get_statline())
                 del self._running[job]
                 with LockManager.get_lock(self._lname_torerun):
                     self._to_rerun.append(job)
@@ -344,8 +346,8 @@ def add_job(self, job: Runnable) -> bool:
 
         # let's log these as info for debugging, they should be sufficient in prod
         # to find issues with the JobRerunner:
-        LOG.info("JobRerunner stat: %d jobs waiting, total submission count: %d", len(self._running), sum)
-        LOG.info("JobRerunner stat: %d jobs ready for re-execution", len(self._to_rerun))
+        LOG.info("JobRerunner stat: %d jobs tracked, total submission count: %d, ready for re-exection: %d",
+                 len(self._running), sum, len(self._to_rerun))
         return False