Merge pull request #67 from shcherbak-ai/dev

SergiiShcherbak · web-flow · commit 428feb911942 · 2025-08-19T03:31:53.000+02:00
Dev
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 - **Refactor**: Code reorganization that doesn't change functionality but improves structure or maintainability
 
+## [0.16.1](https://github.com/shcherbak-ai/contextgem/releases/tag/v0.16.1) - 2025-08-19
+### Fixed
+- Added support for `"minimal"` reasoning effort for gpt-5 models.
+
 ## [0.16.0](https://github.com/shcherbak-ai/contextgem/releases/tag/v0.16.0) - 2025-08-19
 ### Added
 - Reasoning-aware extraction prompts: Automatically enables private chain-of-thought guidance on models that support reasoning, yielding higher-quality outputs (no change for other models).
diff --git a/contextgem/__init__.py b/contextgem/__init__.py
@@ -20,7 +20,7 @@
 ContextGem - Effortless LLM extraction from documents
 """
 
-__version__ = "0.16.0"
+__version__ = "0.16.1"
 __author__ = "Shcherbak AI AS"
 
 from contextgem.public import (
diff --git a/contextgem/internal/base/llms.py b/contextgem/internal/base/llms.py
@@ -2832,7 +2832,10 @@ class _DocumentLLM(_GenericLLMProcessor):
     )  # for reasoning (CoT-capable) models
     reasoning_effort: ReasoningEffort | None = Field(
         default=None,
-        description="Reasoning effort for CoT models: 'low' | 'medium' | 'high'.",
+        description=(
+            "Reasoning effort for CoT-capable models: 'minimal' (gpt-5 models only) | "
+            "'low' | 'medium' | 'high'."
+        ),
     )  # for reasoning (CoT-capable) models
     num_retries_failed_request: StrictInt = Field(
         default=3,
@@ -3000,6 +3003,15 @@ def _post_init(self, __context: Any):
                 stacklevel=2,
             )
 
+        # "minimal" reasoning effort is supported only for gpt-5 models
+        if self.reasoning_effort == "minimal" and not (
+            self.model.startswith("azure/gpt-5")
+            or self.model.startswith("openai/gpt-5")
+        ):
+            raise ValueError(
+                "`reasoning_effort='minimal'` is supported only for gpt-5 models."
+            )
+
     def _set_private_attrs(self) -> None:
         """
         Initialize and configure private attributes for the LLM instance.
@@ -3457,12 +3469,14 @@ def _validate_document_llm_post(self) -> Self:
                 stacklevel=2,
             )
 
-        # Extractor role with reasoning-capable model - suggest using a reasoner role
+        # Extractor role with reasoning-capable model - suggest aligning role for routing clarity
         if self.role.startswith("extractor") and self._supports_reasoning:
             warnings.warn(
                 f"Model `{self.model}` is assigned extractor role `{self.role}`, "
-                f"while the model is reasoning-capable. Consider using a reasoner role "
-                f"to enable reasoning-related instructions for higher quality responses.",
+                f"while the model is reasoning-capable. If you intend to route reasoning tasks "
+                f"to this model, consider using a `reasoner_*` role to match aspect/concept `llm_role` "
+                f"and keep pipeline roles consistent. See "
+                f"https://contextgem.dev/optimizations/optimization_choosing_llm.html",
                 stacklevel=2,
             )
 
diff --git a/contextgem/internal/typings/aliases.py b/contextgem/internal/typings/aliases.py
@@ -84,6 +84,6 @@
     default_factory=lambda: Decimal("0.00000"), ge=Decimal("0.00000")
 )
 
-ReasoningEffort = Literal["low", "medium", "high"]
+ReasoningEffort = Literal["minimal", "low", "medium", "high"]
 
 TextMode = Literal["raw", "markdown"]
diff --git a/contextgem/public/llms.py b/contextgem/public/llms.py
@@ -102,8 +102,8 @@ class DocumentLLM(_DocumentLLM):
         (CoT-capable) models. Defaults to 16000.
     :vartype max_completion_tokens: int
     :ivar reasoning_effort: The effort level for the LLM to reason about the input. Can be set to
-        ``"low"``, ``"medium"``, or ``"high"``. Relevant for reasoning (CoT-capable) models.
-        Defaults to None.
+        ``"minimal"`` (gpt-5 models only), ``"low"``, ``"medium"``, or ``"high"``.
+        Relevant for reasoning (CoT-capable) models. Defaults to None.
     :vartype reasoning_effort: ReasoningEffort | None
     :ivar top_p: Nucleus sampling value (0.0 to 1.0) controlling output focus/randomness.
         Lower values make output more deterministic, higher values produce more diverse outputs.
diff --git a/dev/usage_examples/docs/llm_config/o1_o4.py b/dev/usage_examples/docs/llm_config/o1_o4.py
@@ -5,5 +5,5 @@
     model="openai/o3-mini",
     api_key="<your-openai-api-key>",
     max_completion_tokens=8000,  # Specific to reasoning (CoT-capable) models
-    reasoning_effort="medium",  # Optional: "low", "medium", "high"
+    reasoning_effort="medium",  # Optional
 )
diff --git a/docs/docs-raw-for-llm.txt b/docs/docs-raw-for-llm.txt
@@ -8763,7 +8763,8 @@ The "DocumentLLM" class accepts the following parameters:
 | ens"                 |                 |                 | (CoT-capable) models.                              |
 +----------------------+-----------------+-----------------+----------------------------------------------------+
 | "reasoning_effort"   | "str | None"    | "None"          | Reasoning effort for reasoning (CoT-capable)       |
-|                      |                 |                 | models. Values: ""low"", ""medium"", ""high"".     |
+|                      |                 |                 | models. Values: ""minimal"" (gpt-5 models only),   |
+|                      |                 |                 | ""low"", ""medium"", ""high"".                     |
 +----------------------+-----------------+-----------------+----------------------------------------------------+
 | "timeout"            | "int"           | "120"           | Timeout in seconds for LLM API calls.              |
 +----------------------+-----------------+-----------------+----------------------------------------------------+
@@ -8935,7 +8936,7 @@ Using model-specific parameters
        model="openai/o3-mini",
        api_key="<your-openai-api-key>",
        max_completion_tokens=8000,  # Specific to reasoning (CoT-capable) models
-       reasoning_effort="medium",  # Optional: "low", "medium", "high"
+       reasoning_effort="medium",  # Optional
    )
 
 
@@ -15967,8 +15968,9 @@ class contextgem.public.llms.DocumentLLM(**data)
 
       * **reasoning_effort** (*ReasoningEffort** | **None*) -- The
         effort level for the LLM to reason about the input. Can be set
-        to ""low"", ""medium"", or ""high"". Relevant for reasoning
-        (CoT-capable) models. Defaults to None.
+        to ""minimal"" (gpt-5 models only), ""low"", ""medium"", or
+        ""high"". Relevant for reasoning (CoT-capable) models.
+        Defaults to None.
 
       * **top_p** (*float** | **None*) -- Nucleus sampling value (0.0
         to 1.0) controlling output focus/randomness. Lower values make
@@ -16072,8 +16074,8 @@ class contextgem.public.llms.DocumentLLM(**data)
       * **max_completion_tokens** (*Annotated**[**int**,
         **Strict**(**strict=True**)**]*)
 
-      * **reasoning_effort** (*Literal**[**'low'**, **'medium'**,
-        **'high'**] **| **None*)
+      * **reasoning_effort** (*Literal**[**'minimal'**, **'low'**,
+        **'medium'**, **'high'**] **| **None*)
 
       * **num_retries_failed_request** (*Annotated**[**int**,
         **Strict**(**strict=True**)**]*)
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -25,7 +25,7 @@
 project = "ContextGem"
 copyright = "2025, Shcherbak AI AS"
 author = "Sergii Shcherbak"
-release = "0.16.0"
+release = "0.16.1"
 
 
 # Add path to the package
diff --git a/docs/source/llms/llm_config.rst b/docs/source/llms/llm_config.rst
@@ -102,7 +102,7 @@ The :class:`~contextgem.public.llms.DocumentLLM` class accepts the following par
    * - ``reasoning_effort``
      - ``str | None``
      - ``None``
-     - Reasoning effort for reasoning (CoT-capable) models. Values: ``"low"``, ``"medium"``, ``"high"``.
+     - Reasoning effort for reasoning (CoT-capable) models. Values: ``"minimal"`` (gpt-5 models only), ``"low"``, ``"medium"``, ``"high"``.
    * - ``timeout``
      - ``int``
      - ``120``
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "contextgem"
-version = "0.16.0"
+version = "0.16.1"
 description = "Effortless LLM extraction from documents"
 authors = [{ name = "shcherbak-ai", email = "sergii@shcherbak.ai" }]
 requires-python = ">=3.10,<3.14"
diff --git a/tests/cassettes/TestAll.test_minimal_reasoning_effort_for_gpt_5.yaml b/tests/cassettes/TestAll.test_minimal_reasoning_effort_for_gpt_5.yaml
@@ -0,0 +1,108 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"List synonyms for the word ''confidentiality''."}],"model":"gpt-5-mini","max_completion_tokens":16000,"reasoning_effort":"minimal","stream":false}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      api-key:
+      - DUMMY
+      authorization:
+      - DUMMY
+      connection:
+      - keep-alive
+      content-length:
+      - '183'
+      content-type:
+      - application/json
+      host:
+      - DUMMY
+      user-agent:
+      - AzureOpenAI/Python 1.99.9
+      x-stainless-arch:
+      - other:amd64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - DUMMY
+      x-stainless-package-version:
+      - 1.99.9
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '120.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - DUMMY
+    method: POST
+    uri: https://<DUMMY-ENDPOINT>/openai/deployments/DUMMY-DEPLOYMENT/chat/completions?api-version=2025-03-01-preview
+  response:
+    body:
+      string: '{"choices": [{"content_filter_results": {"hate": {"filtered": false,
+        "severity": "safe"}, "protected_material_code": {"detected": false, "filtered":
+        false}, "protected_material_text": {"detected": false, "filtered": false},
+        "self_harm": {"filtered": false, "severity": "safe"}, "sexual": {"filtered":
+        false, "severity": "safe"}, "violence": {"filtered": false, "severity": "safe"}},
+        "finish_reason": "stop", "index": 0, "logprobs": null, "message": {"annotations":
+        [], "content": "Here are synonyms for \"confidentiality\":\n\n- secrecy\n-
+        privacy\n- non-disclosure\n- secrecy of information\n- discretion\n- secrecy
+        of communications\n- confidentiality of data\n- concealment\n- secrecy of
+        sources\n- hush-hush (informal)\n- secrecy of records\n- confidence\n- restricted
+        access\n- protected information\n- keeping secret\n- privacy protection\n-
+        seclusion (contextual)\n- clandestineness (rare)\n- confidentiality obligations
+        (phrase)\n- confidentiality assurance\n\nIf you need synonyms suitable for
+        legal, technical, or casual contexts, tell me which and I\u2019ll narrow the
+        list.", "refusal": null, "role": "assistant"}}], "created": 1755563499, "id":
+        "chatcmpl-DUMMY", "model": "gpt-5-mini-2025-08-07", "object": "chat.completion",
+        "prompt_filter_results": [{"prompt_index": 0, "content_filter_results": {"hate":
+        {"filtered": false, "severity": "safe"}, "jailbreak": {"detected": false,
+        "filtered": false}, "self_harm": {"filtered": false, "severity": "safe"},
+        "sexual": {"filtered": false, "severity": "safe"}, "violence": {"filtered":
+        false, "severity": "safe"}}}], "system_fingerprint": null, "usage": {"completion_tokens":
+        137, "completion_tokens_details": {"accepted_prediction_tokens": 0, "audio_tokens":
+        0, "reasoning_tokens": 0, "rejected_prediction_tokens": 0}, "prompt_tokens":
+        16, "prompt_tokens_details": {"audio_tokens": 0, "cached_tokens": 0}, "total_tokens":
+        153}}'
+    headers:
+      Content-Length:
+      - '1802'
+      Content-Type:
+      - application/json
+      Date:
+      - DUMMY
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      apim-request-id:
+      - DUMMY
+      azureml-model-session:
+      - DUMMY
+      x-accel-buffering:
+      - 'no'
+      x-content-type-options:
+      - nosniff
+      x-ms-deployment-name:
+      - DUMMY
+      x-ms-rai-invoked:
+      - DUMMY
+      x-ms-region:
+      - DUMMY
+      x-ratelimit-limit-requests:
+      - DUMMY
+      x-ratelimit-limit-tokens:
+      - DUMMY
+      x-ratelimit-remaining-requests:
+      - DUMMY
+      x-ratelimit-remaining-tokens:
+      - DUMMY
+      x-request-id:
+      - DUMMY
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/test_all.py b/tests/test_all.py
@@ -1142,6 +1142,44 @@ def extract_with_local_llm(llm: DocumentLLM):
 
         check_locals_memory_usage(locals(), test_name="test_local_llms_text_gpt_oss")
 
+    @pytest.mark.vcr
+    @memory_profile_and_capture
+    def test_minimal_reasoning_effort_for_gpt_5(self):
+        """
+        Tests for setting the minimal reasoning effort in gpt-5 models.
+        """
+
+        # "minimal" reasoning effort is supported only for gpt-5 models
+        llm = DocumentLLM(
+            model="azure/gpt-5-mini",
+            api_key=os.getenv("CONTEXTGEM_AZURE_OPENAI_API_KEY"),
+            api_base=os.getenv("CONTEXTGEM_AZURE_OPENAI_API_BASE"),
+            api_version=os.getenv("CONTEXTGEM_AZURE_OPENAI_API_VERSION"),
+            reasoning_effort="minimal",
+            role="reasoner_text",
+        )
+        llm.system_message = ""  # disable default system message
+        response = llm.chat("List synonyms for the word 'confidentiality'.")
+        logger.debug(f"Response with minimal reasoning effort:\n{response}")
+
+        # "minimal" reasoning effort is not supported for other models
+        with pytest.raises(
+            ValueError,
+            match="supported only for gpt-5 models",
+        ):
+            DocumentLLM(
+                model="azure/o4-mini",
+                api_key=os.getenv("CONTEXTGEM_AZURE_OPENAI_API_KEY"),
+                api_base=os.getenv("CONTEXTGEM_AZURE_OPENAI_API_BASE"),
+                api_version=os.getenv("CONTEXTGEM_AZURE_OPENAI_API_VERSION"),
+                reasoning_effort="minimal",
+                role="reasoner_text",
+            )
+
+        check_locals_memory_usage(
+            locals(), test_name="test_minimal_reasoning_effort_for_gpt_5"
+        )
+
     @pytest.mark.vcr
     @memory_profile_and_capture
     def test_local_llms_vision(self):
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -84,6 +84,6 @@`
`84`	`84`	`default_factory=lambda: Decimal("0.00000"), ge=Decimal("0.00000")`
`85`	`85`	`)`
`86`	`86`
`87`		`-ReasoningEffort = Literal["low", "medium", "high"]`
	`87`	`+ReasoningEffort = Literal["minimal", "low", "medium", "high"]`
`88`	`88`
`89`	`89`	`TextMode = Literal["raw", "markdown"]`
Original file line number	Diff line number	Diff line change
`@@ -5,5 +5,5 @@`
`5`	`5`	`model="openai/o3-mini",`
`6`	`6`	`api_key="<your-openai-api-key>",`
`7`	`7`	`max_completion_tokens=8000, # Specific to reasoning (CoT-capable) models`
`8`		`- reasoning_effort="medium", # Optional: "low", "medium", "high"`
	`8`	`+ reasoning_effort="medium", # Optional`
`9`	`9`	`)`