-
Notifications
You must be signed in to change notification settings - Fork 1.1k
fix(utils): bound broadcast queues and cap web store cache to prevent memory leaks #2236
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4359a8a
12c1771
bd9fdc9
644ec22
4a0ab17
92c526a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,70 +1,86 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import asyncio | ||
| import contextlib | ||
| import sys | ||
|
|
||
| if sys.version_info >= (3, 13): | ||
| QueueShutDown = asyncio.QueueShutDown # type: ignore[assignment] | ||
|
|
||
| class Queue[T](asyncio.Queue[T]): | ||
| """Asyncio Queue with shutdown support.""" | ||
| """Asyncio Queue with shutdown support (Python 3.13+ native).""" | ||
|
|
||
| def __init__(self, *, maxsize: int = 0) -> None: | ||
| super().__init__(maxsize=maxsize) | ||
|
|
||
| else: | ||
|
|
||
| class QueueShutDown(Exception): | ||
| """Raised when operating on a shut down queue.""" | ||
|
|
||
| class _Shutdown: | ||
| """Sentinel for queue shutdown.""" | ||
|
|
||
| _SHUTDOWN = _Shutdown() | ||
|
|
||
| class Queue[T](asyncio.Queue[T | _Shutdown]): | ||
| class Queue[T](asyncio.Queue[T]): | ||
| """Asyncio Queue with shutdown support for Python < 3.13.""" | ||
|
|
||
| def __init__(self) -> None: | ||
| super().__init__() | ||
| def __init__(self, *, maxsize: int = 0) -> None: | ||
| super().__init__(maxsize=maxsize) | ||
| self._shutdown = False | ||
|
|
||
| def shutdown(self, immediate: bool = False) -> None: | ||
| if self._shutdown: | ||
| return | ||
| self._shutdown = True | ||
| if immediate: | ||
| self._queue.clear() | ||
|
|
||
| getters = list(getattr(self, "_getters", [])) | ||
| count = max(1, len(getters)) | ||
| self._enqueue_shutdown(count) | ||
|
|
||
| def _enqueue_shutdown(self, count: int) -> None: | ||
| for _ in range(count): | ||
| try: | ||
| super().put_nowait(_SHUTDOWN) | ||
| except asyncio.QueueFull: | ||
| self._queue.clear() | ||
| super().put_nowait(_SHUTDOWN) | ||
| self._queue.clear() # type: ignore[attr-defined] | ||
|
|
||
| # Wake all getters so they can check the shutdown flag and | ||
| # raise QueueShutDown instead of re-blocking forever. | ||
| # NOTE: _wakeup_next is a private asyncio.Queue method that has | ||
| # been stable since Python 3.7. We use it because there is no | ||
| # public API to wake a specific waiter. | ||
| while getattr(self, "_getters", []): | ||
| with contextlib.suppress(IndexError): | ||
| self._wakeup_next(self._getters) # type: ignore[attr-defined] | ||
|
Comment on lines
+40
to
+42
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
On supported Python 3.12, a consumer that is already suspended inside Useful? React with 👍 / 👎.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shutdown() enqueues _Shutdown sentinel(s) and wakes all blocked getters. A getter blocked on await queue.get() receives the sentinel, raises QueueShutDown, and exits cleanly.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in bd9fdc9. Removed the sentinel approach entirely and instead overrode get() to add the post-wake shutdown check, matching Python 3.13's native Queue behavior. After shutdown() wakes blocked getters, get() now checks self._shutdown before re-blocking and raises QueueShutDown immediately. |
||
|
|
||
| # Wake all putters so they re-check shutdown instead of | ||
| # hanging on a full bounded queue. | ||
| while getattr(self, "_putters", []): | ||
| with contextlib.suppress(IndexError): | ||
| self._wakeup_next(self._putters) # type: ignore[attr-defined] | ||
|
|
||
| async def get(self) -> T: | ||
| if self._shutdown and self.empty(): | ||
| raise QueueShutDown | ||
| item = await super().get() | ||
| if isinstance(item, _Shutdown): | ||
| raise QueueShutDown | ||
| return item | ||
| while self.empty(): | ||
| getter = asyncio.get_running_loop().create_future() | ||
| self._getters.append(getter) | ||
| try: | ||
| await getter | ||
| finally: | ||
| with contextlib.suppress(ValueError): | ||
| self._getters.remove(getter) | ||
| if self._shutdown and self.empty(): | ||
| raise QueueShutDown | ||
| return super().get_nowait() | ||
|
|
||
| def get_nowait(self) -> T: | ||
| if self._shutdown and self.empty(): | ||
| raise QueueShutDown | ||
| item = super().get_nowait() | ||
| if isinstance(item, _Shutdown): | ||
| raise QueueShutDown | ||
| return item | ||
| return super().get_nowait() | ||
|
|
||
| async def put(self, item: T) -> None: | ||
| if self._shutdown: | ||
| raise QueueShutDown | ||
| await super().put(item) | ||
| while self.full(): | ||
| putter = asyncio.get_running_loop().create_future() | ||
| self._putters.append(putter) | ||
| try: | ||
| await putter | ||
| finally: | ||
| with contextlib.suppress(ValueError): | ||
| self._putters.remove(putter) | ||
| if self._shutdown: | ||
| raise QueueShutDown | ||
| super().put_nowait(item) | ||
|
|
||
| def put_nowait(self, item: T) -> None: | ||
| if self._shutdown: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,19 +1,33 @@ | ||
| import asyncio | ||
| import contextlib | ||
|
|
||
| from kimi_cli.utils.aioqueue import Queue | ||
|
|
||
|
|
||
| class BroadcastQueue[T]: | ||
| """ | ||
| A broadcast queue that allows multiple subscribers to receive published items. | ||
| """A broadcast queue that allows multiple subscribers to receive published items. | ||
|
|
||
| Each subscriber gets its own queue. By default queues are bounded | ||
| (``maxsize=1000``) to prevent unbounded memory growth; an unbounded | ||
| queue can be requested with ``maxsize=0``. Critical consumers | ||
| (e.g. wire recorders and waitable request paths) should use an | ||
| unbounded queue. | ||
| """ | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
With the default still set to Useful? React with 👍 / 👎.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Production queues are bounded. RootWireHub.subscribe() defaults to maxsize=1000. Wire.ui_side() explicitly passes maxsize=1000 for UI consumers. Only the internal recorder uses maxsize=0 (unbounded) by design. |
||
| def __init__(self) -> None: | ||
| def __init__(self, *, maxsize: int = 1000) -> None: | ||
| self._queues: set[Queue[T]] = set() | ||
|
|
||
| def subscribe(self) -> Queue[T]: | ||
| """Create a new subscription queue.""" | ||
| queue: Queue[T] = Queue() | ||
| self._maxsize = maxsize | ||
|
|
||
| def subscribe(self, *, maxsize: int | None = None) -> Queue[T]: | ||
| """Create a new subscription queue. | ||
|
|
||
| Args: | ||
| maxsize: Maximum queue size. ``None`` uses the broadcast | ||
| queue's default (``1000``). ``0`` means unbounded. | ||
| Pass a positive value for lossy consumers that may fall | ||
| behind and can tolerate dropped messages. | ||
| """ | ||
| queue: Queue[T] = Queue(maxsize=maxsize if maxsize is not None else self._maxsize) | ||
| self._queues.add(queue) | ||
| return queue | ||
|
|
||
|
|
@@ -22,16 +36,28 @@ def unsubscribe(self, queue: Queue[T]) -> None: | |
| self._queues.discard(queue) | ||
|
|
||
| async def publish(self, item: T) -> None: | ||
| """Publish an item to all subscription queues.""" | ||
| await asyncio.gather(*(queue.put(item) for queue in self._queues)) | ||
| """Publish an item to all subscription queues, awaiting space. | ||
|
|
||
| This blocks until every subscriber has room for the item. | ||
| """ | ||
| for queue in list(self._queues): | ||
| await queue.put(item) | ||
|
|
||
| def publish_nowait(self, item: T) -> None: | ||
| """Publish an item to all subscription queues without waiting.""" | ||
| for queue in self._queues: | ||
| queue.put_nowait(item) | ||
| """Publish an item to all subscription queues without waiting. | ||
|
|
||
| If a single subscriber's queue is full, that subscriber is | ||
| skipped so that later subscribers still receive the item. | ||
| Callers that require guaranteed delivery (e.g. waitable | ||
| requests) should use an unbounded queue so no subscriber is | ||
| ever skipped. | ||
| """ | ||
| for queue in list(self._queues): | ||
| with contextlib.suppress(asyncio.QueueFull): | ||
| queue.put_nowait(item) | ||
|
|
||
| def shutdown(self, immediate: bool = False) -> None: | ||
| """Close all subscription queues.""" | ||
| for queue in self._queues: | ||
| for queue in list(self._queues): | ||
| queue.shutdown(immediate=immediate) | ||
| self._queues.clear() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When
cancel_by_source()cancels requests it now calls_prune_requests(), but this path never inserts those cancellations into_resolved_cachethe wayresolve()and_cancel_request()do. If a source cleanup cancels more than the retained-request window before a consumer callswait_for_response()for one of the older request IDs, pruning can remove the request and the later wait raisesKeyErrorinstead of the expectedApprovalCancelledError, breaking callers that treat source lifecycle cancellation as a rejected approval.Useful? React with 👍 / 👎.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Addressed in this commit. cancel_by_source() calls self._cache_cancelled_requests(to_remove) before _prune_requests(). Newly cancelled requests are cached with resolved_at=time.time(), so they survive the grace-period check.