Backport PR ipython#14910 on branch 8.x (Eliminate startup delay when slow-starting LLM completion provider is configured) (ipython#14912)

krassowski · web-flow · commit fcc41fa05711 · 2025-05-31T13:40:52.000+01:00
Backport PR ipython#14910: Eliminate startup delay when slow-starting LLM completion provider is configured
diff --git a/IPython/terminal/interactiveshell.py b/IPython/terminal/interactiveshell.py
@@ -502,13 +502,16 @@ def _set_autosuggestions(self, provider=None):
         elif provider == "NavigableAutoSuggestFromHistory":
             # LLM stuff are all Provisional in 8.32
             if self._llm_provider_class:
-                llm_provider_constructor = import_item(self._llm_provider_class)
-                llm_provider = llm_provider_constructor(**self.llm_constructor_kwargs)
+
+                def init_llm_provider():
+                    llm_provider_constructor = import_item(self._llm_provider_class)
+                    return llm_provider_constructor(**self.llm_constructor_kwargs)
+
             else:
-                llm_provider = None
+                init_llm_provider = None
             self.auto_suggest = NavigableAutoSuggestFromHistory()
             # Provisinal in 8.32
-            self.auto_suggest._llm_provider = llm_provider
+            self.auto_suggest._init_llm_provider = init_llm_provider
 
             name = self.llm_prefix_from_history
 
diff --git a/IPython/terminal/shortcuts/auto_suggest.py b/IPython/terminal/shortcuts/auto_suggest.py
@@ -171,16 +171,19 @@ class NavigableAutoSuggestFromHistory(AutoSuggestFromHistory):
     # another request.
     _llm_task: asyncio.Task | None = None
 
-    # This is the instance of the LLM provider from jupyter-ai to which we forward the request
-    # to generate inline completions.
-    _llm_provider: Any | None
+    # This is the constructor of the LLM provider from jupyter-ai
+    # to which we forward the request to generate inline completions.
+    _init_llm_provider: Callable | None
+
+    _llm_provider_instance: Any | None
     _llm_prefixer: Callable = lambda self, x: "wrong"
 
     def __init__(self):
         super().__init__()
         self.skip_lines = 0
         self._connected_apps = []
-        self._llm_provider = None
+        self._llm_provider_instance = None
+        self._init_llm_provider = None
         self._request_number = 0
 
     def reset_history_position(self, _: Buffer):
@@ -317,6 +320,16 @@ def _cancel_running_llm_task(self) -> None:
                     "LLM task not cancelled, does your provider support cancellation?"
                 )
 
+    @property
+    def _llm_provider(self):
+        """Lazy-initialized instance of the LLM provider.
+
+        Do not use in the constructor, as `_init_llm_provider` can trigger slow side-effects.
+        """
+        if self._llm_provider_instance is None and self._init_llm_provider:
+            self._llm_provider_instance = self._init_llm_provider()
+        return self._llm_provider_instance
+
     async def _trigger_llm(self, buffer) -> None:
         """
         This will ask the current llm provider a suggestion for the current buffer.
@@ -325,14 +338,14 @@ async def _trigger_llm(self, buffer) -> None:
         """
         # we likely want to store the current cursor position, and cancel if the cursor has moved.
         try:
-            import jupyter_ai.completions.models as jai_models
+            import jupyter_ai_magics
         except ModuleNotFoundError:
-            jai_models = None
+            jupyter_ai_magics = None
         if not self._llm_provider:
             warnings.warn("No LLM provider found, cannot trigger LLM completions")
             return
-        if jai_models is None:
-            warnings.warn("LLM Completion requires `jupyter_ai` to be installed")
+        if jupyter_ai_magics is None:
+            warnings.warn("LLM Completion requires `jupyter_ai_magics` to be installed")
 
         self._cancel_running_llm_task()
 
@@ -359,7 +372,7 @@ async def _trigger_llm_core(self, buffer: Buffer):
         provider to stream it's response back to us iteratively setting it as
         the suggestion on the current buffer.
 
-        Unlike with JupyterAi, as we do not have multiple cell, the cell id
+        Unlike with JupyterAi, as we do not have multiple cells, the cell id
         is always set to `None`.
 
         We set the prefix to the current cell content, but could also insert the
diff --git a/IPython/terminal/tests/fake_llm.py b/IPython/terminal/tests/fake_llm.py
@@ -1,7 +1,8 @@
 import asyncio
+from time import sleep
 
 try:
-    from jupyter_ai_magics import BaseProvider
+    from jupyter_ai_magics.providers import BaseProvider
     from langchain_community.llms import FakeListLLM
 except ImportError:
 
@@ -87,3 +88,16 @@ async def _stream(self, sentence, request_number, token, start_with=""):
             reply_to=request_number,
             done=True,
         )
+
+
+class SlowStartingCompletionProvider(BaseProvider, FakeListLLM):  # type: ignore[misc, valid-type]
+    id = "slow_provider"
+    name = "Slow Provider"
+    model_id_key = "model"
+    models = ["model_a"]
+
+    def __init__(self, **kwargs):
+        kwargs["responses"] = ["This fake response will be used for completion"]
+        kwargs["model_id"] = "model_a"
+        sleep(10)
+        super().__init__(**kwargs)
diff --git a/IPython/terminal/tests/test_shortcuts.py b/IPython/terminal/tests/test_shortcuts.py
@@ -1,4 +1,5 @@
 import pytest
+import time
 from IPython.terminal.interactiveshell import PtkHistoryAdapter
 from IPython.terminal.shortcuts.auto_suggest import (
     accept,
@@ -68,6 +69,17 @@ async def test_llm_autosuggestion():
     assert event.current_buffer.suggestion.text == FIBONACCI[len(text) :]
 
 
+def test_slow_llm_provider_should_not_block_init():
+    ip = get_ipython()
+    provider = NavigableAutoSuggestFromHistory()
+    ip.auto_suggest = provider
+    start = time.perf_counter()
+    ip.llm_provider_class = "tests.fake_llm.SlowStartingCompletionProvider"
+    end = time.perf_counter()
+    elapsed = end - start
+    assert elapsed < 0.1
+
+
 @pytest.mark.parametrize(
     "text, suggestion, expected",
     [
diff --git a/examples/auto_suggest_llm.py b/examples/auto_suggest_llm.py
@@ -68,13 +68,13 @@
 import textwrap
 from typing import Any, AsyncIterable, AsyncIterator
 
-from jupyter_ai.completions.models import (
+from jupyter_ai_magics.models.completion import (
     InlineCompletionList,
     InlineCompletionReply,
     InlineCompletionRequest,
     InlineCompletionStreamChunk,
 )
-from jupyter_ai_magics import BaseProvider
+from jupyter_ai_magics.providers import BaseProvider
 from langchain_community.llms import FakeListLLM