diff --git a/changelog.d/18499.bugfix b/changelog.d/18499.bugfix new file mode 100644 index 00000000000..b07601a35d9 --- /dev/null +++ b/changelog.d/18499.bugfix @@ -0,0 +1 @@ +Fix CPU and database spinning when retrying sending events to servers whilst at the same time purging those events. \ No newline at end of file diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index b3f65e8237e..8d6c77faee9 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -129,6 +129,8 @@ def __init__( # The stream_ordering of the most recent PDU that was discarded due to # being in catch-up mode. + # Can be set to zero if no PDU has been discarded since the last time + # we queried for new PDUs during catch-up. self._catchup_last_skipped: int = 0 # Cache of the last successfully-transmitted stream ordering for this @@ -462,8 +464,18 @@ async def _catch_up_transmission_loop(self) -> None: # of a race condition, so we check that no new events have been # skipped due to us being in catch-up mode - if self._catchup_last_skipped > last_successful_stream_ordering: + if ( + self._catchup_last_skipped != 0 + and self._catchup_last_skipped > last_successful_stream_ordering + ): # another event has been skipped because we were in catch-up mode + # As an exception to this case: we can hit this branch if the + # room has been purged whilst we have been looping. + # In that case we avoid hot-looping by resetting the 'catch-up skipped + # PDU' flag. + # Then if there is still no progress to be made at the next iteration, + # we can exit catch-up mode. + self._catchup_last_skipped = 0 continue # we are done catching up!