sapcc
diff --git a/‎networking_nsxv3/common/synchronization.py
Lines changed: 276 additions & 14 deletions b/‎networking_nsxv3/common/synchronization.py
Lines changed: 276 additions & 14 deletions
@@ -5,6 +5,7 @@
 eventlet.monkey_patch()
 
 import networking_nsxv3.prometheus.exporter as EXPORTER
+from networking_nsxv3.common.locking import LockManager
 from oslo_log import log as logging
 from oslo_config import cfg
 import enum
@@ -71,9 +72,69 @@ def __init__(self, idn, fn, priority=Priority.LOWEST):
         self.priority = priority
         self.idn = idn
         self.fn = fn
+        self._runcount = 0
+        self._created = time.time()
+        self._scheduled = None
+        self._started = None
+        self._jobdone = None
+        self._rescheduled = None
+
+    @property
+    def identifier(self) -> tuple:
+        return (self.idn, self.fn.__name__)
+
+    def set_scheduled(self):
+        """ called when we submit the job to the worker pool """
+        self._scheduled = time.time()
+        self._started = None
+        self._jobdone = None
+        self._rescheduled = None
+
+    def set_start(self):
+        """ called in our wrapper when we actually start fn """
+        # we need to reset the other timings
+        # because we use the same job for rerunning
+        self._started = time.time()
+        self._jobdone = None
+        self._rescheduled = None
+        self._runcount += 1
+
+    def set_done(self):
+        """ called in our wrapper when fn returns """
+        self._jobdone = time.time()
+        self._rescheduled = None
+
+    def set_rescheduled(self):
+        """ called when the job is taken out of the rerun-queue """
+        self._rescheduled = time.time()
+
+    def get_statline(self) -> str:
+
+        age = f"{time.time() - self._created:0.4f}"
+
+        scheduled = '-'
+        started = '-'
+        runtime = '-'
+        rescheduled = '-'
+
+        if self._scheduled and self._created:
+            scheduled = f"{self._scheduled - self._created:0.4f}"
+
+        if self._started and self._scheduled:
+            started = f"{self._started - self._scheduled:0.4f}"
+
+        if self._jobdone and self._started:
+            runtime = f"{self._jobdone - self._started:0.4f}"
+
+        if self._rescheduled and self._jobdone:
+            rescheduled = f"{self._rescheduled - self._jobdone:0.4f}"
+
+        return (f"timings for job {self} - runcount: {self._runcount} age: {age} "
+                f"scheduled: {scheduled} started: {started} runtime: {runtime} rescheduled: {rescheduled}")
 
     def __repr__(self):
-        return str(self.idn)
+        # lets not just use the object id, maybe
+        return str(self.identifier)
 
     def __eq__(self, other):
         """
@@ -90,25 +151,204 @@ def __ne__(self, other):
         return (not self.__eq__(other))
 
     def __lt__(self, other):
-        """ Order Runnable by their priority """
+        """ Order Runnable by their priority
+        Only the passive queue is ordered by priority.
+        The active queue is FiFo.
+        """
+        # if the priority is equal, we want to order
+        # by creation time to handle oldest jobs first
+        if self.priority == other.priority:
+            return self._created < other._created
+
         return self.priority < other.priority
 
     def __hash__(self):
-        return hash(self.__repr__())
+        # with the original repr this is broken, as __eq__ takes the fn into
+        # account as well!
+        return hash((self.idn, self.fn))
+
+
+class UniqFiFoQueue(eventlet.queue.Queue):
+    """
+    A subclass of :class:`Queue` that maintains job order by insertion.
+
+    Problem with the old approach:
+    Jobs may starve in the active queue.
+
+    - The internal sync run adds up to 20 "outdated" objects to the passive queue.
+    - If space is available, jobs move from the passive to the active queue.
+    - The active queue prioritizes jobs based on priority while enforcing uniqueness
+      (determined by OpenStack ID and execution method).
+
+    Issues:
+    1. Job starvation – If high-priority jobs keep arriving and the agent is at full capacity,
+       low-priority jobs may never get processed.
+    2. Blocking of new high-priority jobs – Lower-priority jobs in the passive queue
+       can prevent new high-priority jobs from being added due to the uniqueness constraint
+       (see the `__eq__` method of the `Runnable` class).
+    3. Adding a job with the **HIGHEST** priority does not
+       guarantee execution in insertion order. See
+       https://docs.python.org/3/library/heapq.html#priority-queue-implementation-notes
+       for details.
+    """
+
+    def _init(self, maxsize):
+        self.queue = collections.deque()
+
+    def _put(self, item):
+        if item not in self.queue:
+            # Add item to the right side of the deque
+            self.queue.append(item)
+            self._put_bookkeeping()
+        else:
+            LOG.info("Not adding item %s to fifo queue, already present!", item)
+
+    def _get(self):
+        return self.queue.popleft()
 
 
 class UniqPriorityQueue(eventlet.queue.Queue):
 
     def _init(self, maxsize):
         self.queue = []
 
-    def _put(self, item, heappush=heapq.heappush):
-        if item not in self.queue:
-            heappush(self.queue, item)
+    def _put(self, item):
+
+        try:
+            x = self.queue[self.queue.index(item)]
+            # if the prio of the new item is higher (smaller value)
+            # update the prio of the existing job and repair the heap
+            if item.priority < x.priority:
+                LOG.debug("Not adding item %s to prio queue, already present, but updating prio %s -> %s",
+                          item, x.priority, item.priority)
+                x.priority = item.priority
+                heapq.heapify(self.queue)
+            else:
+                LOG.debug("Not adding item %s to prio queue, already present!", item)
+            return
+        except ValueError:
+            # item is not in list
+            pass
+
+        # item not found, add it
+        heapq.heappush(self.queue, item)
         self._put_bookkeeping()
 
-    def _get(self, heappop=heapq.heappop):
-        return heappop(self.queue)
+    def _get(self):
+        return heapq.heappop(self.queue)
+
+
+class JobRerunner():
+    """ Thread save data structure to reschedule jobs when they are already running
+
+    When a job is retrieved from the active queue and already running in a worker thread,
+    there is a chance that another job for the same object is added to the active
+    queue and also started in a worker. While there then is some locking happening
+    that should prevent race conditions, this still blocks the worker thread(s),
+    which degrades performance and re-executes the job unnecessarily often.
+
+    To prevent this, we use this JobRerunner:
+
+    In this class we keep track of all jobs currently running in the workers.
+
+    When taking a new job from the active queue, we check if this job is
+    already running. If this is not the case, we will add it to our book keeping
+    here and it will be sent to a worker, wrapped in a function that will call
+    the JobRunner on job completion to update the book keeping.
+
+    If we find the job already running, the new job will be dropped, but a counter
+    for the job will be increased to mark the job for re-execution once its done,
+    in case a change to the object had occured while the worker was running.
+
+    When a worker is done with a job, it will notify us and we can either
+    remove the id from our book keeping or, if a new job was added in the meantime,
+    mark it as ready for re-execution. Note that a job will intentionally only be
+    re-executed once, independently of how often it was added while a worker was
+    already running it.
+
+    Before taking a new job from the active queue in the runner, we first check in
+    the JobRerunner for a ready job, making sure that these jobs are prioritized
+    to pick up changes quickly.
+    """
+
+    _lname_running = 'JobRerunner-running'
+    _lname_torerun = 'JobRerunner-torerun'
+
+    def __init__(self):
+        self._running = dict()
+        self._to_rerun = collections.deque()
+
+    def get_rerunnable(self) -> Runnable:
+        # Let's also use the LockManager that is used for locking
+        # in the code already. A simpler lock might do fine, but
+        # for the LockManager we know it is working.
+
+        with LockManager.get_lock(self._lname_torerun):
+            try:
+                job = self._to_rerun.popleft()
+                job.set_rescheduled()
+                LOG.info("JobRerunner (about to rerun) %s", job.get_statline())
+            except IndexError:
+                job = None
+                LOG.debug("JobRerunner had no rerunnable job")
+
+        return job
+
+    def job_done(self, job: Runnable):
+        LOG.debug("JobRerunner job_done called for %s", job)
+        with LockManager.get_lock(self._lname_running):
+            count = self._running.get(job, 0)
+
+            if count == 1:
+                del self._running[job]
+                LOG.info("JobRerunner (done, no reruns requested) %s", job.get_statline())
+            elif count > 1:
+                # we only allow exactly one job to run at a time,
+                # all jobs arriving later will increase the counter while
+                # the job is still running or they get re-queued.
+                # if a job is in the ready deque it will at some point
+                # re-appear and so we can forget about the counter.
+                LOG.info("JobRerunner (done, %d reruns requested) %s", count - 1, job.get_statline())
+                del self._running[job]
+                with LockManager.get_lock(self._lname_torerun):
+                    self._to_rerun.append(job)
+            else:
+                # prevent the error from spreading
+                del self._running[job]
+                LOG.warning("JobRerunner job_done called too often for job %s", job)
+
+    def add_job(self, job: Runnable) -> bool:
+        """ Add job to list of jobs running/to be started or mark it for re-execution
+
+        returns True if the job is currently not running and should
+        be scheduled next / added to the workers.
+
+        returns False if the job is already running and was marked for re-execution
+
+        """
+        with LockManager.get_lock(self._lname_running):
+            count = self._running.get(job, 0)
+            if count <= 0:
+                # no job running, we can run the job
+                # if the counter is < 0 our accounting is wrong,
+                # so we fix it and run the job.
+                self._running[job] = 1
+                LOG.debug("JobRerunner no identical job is currently running, can start %s", job)
+                return True
+
+            self._running[job] = count + 1
+            LOG.debug("JobRerunner job %s already running, marked for rescheduling, count: %d ", job, count)
+
+            sum = 0
+            for job, scount in self._running.items():
+                sum += scount
+                LOG.debug("JobRerunner stat: job %s is running, submission count: %d", job, scount)
+
+        # let's log these as info for debugging, they should be sufficient in prod
+        # to find issues with the JobRerunner:
+        LOG.info("JobRerunner stat: %d jobs tracked, total submission count: %d, ready for re-exection: %d",
+                 len(self._running), sum, len(self._to_rerun))
+        return False
 
 
 class Runner(object):
@@ -129,11 +369,12 @@ class Runner(object):
     def __init__(self, active_size=INFINITY, passive_size=INFINITY,
                  workers_size=1):
         # if queue_size is < 0, the queue size is infinite.
-        self._active = UniqPriorityQueue(maxsize=active_size)
+        self._active = UniqFiFoQueue(active_size)
         self._passive = UniqPriorityQueue(maxsize=passive_size)
         self._workers = eventlet.greenpool.GreenPool(size=workers_size)
         self._idle = workers_size
         self._state = "not started"
+        self._rerunner = JobRerunner()
 
     def run(self, priority, ids, fn):
         """ Submit a job with priority
@@ -150,7 +391,7 @@ def run(self, priority, ids, fn):
 
         for jid in ids:
             try:
-                LOG.info(MESSAGE.format("Enqueued", jid, priority.name, fn.__name__))
+                LOG.info(MESSAGE.format("About to enqueue", jid, priority.name, fn.__name__))
 
                 job = Runnable(jid, fn, priority.value)
                 if priority.value == Priority.HIGHEST:
@@ -169,10 +410,31 @@ def _start(self):
                 if self.active() < self._idle and self.passive() > 0:
                     self._active.put_nowait(self._passive.get_nowait())
                     self._passive.task_done()
-                job = self._active.get(block=True, timeout=TIMEOUT)
-                LOG.info(MESSAGE.format("Processing", job.idn, Priority(job.priority).name, job.fn.__name__))
-                self._workers.spawn(job.fn, job.idn)#.wait()
-                self._active.task_done()
+                pulled_from_queue = False
+                job = self._rerunner.get_rerunnable()
+                if not job:
+                    job = self._active.get(block=True, timeout=TIMEOUT)
+                    pulled_from_queue = True
+
+                # check if we are allowed to run it,
+                # if yes mark it as running and spawn it
+                if self._rerunner.add_job(job):
+                    LOG.info(MESSAGE.format("Processing", job.idn, Priority(job.priority).name, job.fn.__name__))
+
+                    # ideally we would be able to add a callback to the
+                    # greenthread, but this is hidden in the pool, so
+                    # let's wrap the function once more.
+                    def wrap(rerun, ajob):
+                        ajob.set_start()
+                        ajob.fn(ajob.idn)
+                        ajob.set_done()
+                        rerun.job_done(ajob)
+
+                    job.set_scheduled()
+                    self._workers.spawn(wrap, self._rerunner, job)
+
+                if pulled_from_queue:
+                    self._active.task_done()
             except eventlet.queue.Empty:
                 LOG.info("No activity for the last {} seconds.".format(TIMEOUT))
                 LOG.info("Sizes Queue[Active=%s, Passive=%s], Jobs=%s",