From 22dfa37af8f87e01665b2877af2f6a52919f2196 Mon Sep 17 00:00:00 2001 From: datvo06 Date: Fri, 6 Feb 2026 17:03:35 -0500 Subject: [PATCH 01/18] Doctest --- docs/source/llm.ipynb | 1807 ++++++++++++++------------ effectful/handlers/llm/encoding.py | 5 + effectful/handlers/llm/evaluation.py | 117 ++ tests/test_handlers_llm_encoding.py | 131 +- tests/test_handlers_llm_provider.py | 34 +- 5 files changed, 1214 insertions(+), 880 deletions(-) diff --git a/docs/source/llm.ipynb b/docs/source/llm.ipynb index 17f6af02..436a05f2 100644 --- a/docs/source/llm.ipynb +++ b/docs/source/llm.ipynb @@ -1,871 +1,972 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "e7fda1b8", - "metadata": {}, - "source": [ - "# LLM Interface\n", - "The `effectful.handlers.llm` module provides a simplified LLM interface that uses algebraic effects for modularity. The module interface consists of:\n", - "\n", - "- A decorator `Template.define` which creates a prompt template from a callable. A template is an LLM-implemented function whose behavior is specified by a template string. When a template is called, an LLM is invoked to produce the specified behavior.\n", - "- A decorator `Tool.define` which exposes Python callables as tools that templates can call. Tool signatures and docstrings define the schema passed to the model.\n", - "- Structured output handling via `Encodable` (used internally by templates and tool calls) to serialize/deserialize Python types.\n", - "- LLM providers such as `LiteLLMProvider`, and reliability helpers like `RetryLLMHandler` and `ReplayLiteLLMProvider`, which can be composed with `handler(...)` to control execution." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5aaf649f", - "metadata": {}, - "outputs": [], - "source": [ - "import base64\n", - "import dataclasses\n", - "import functools\n", - "import io\n", - "from typing import Literal\n", - "\n", - "import litellm\n", - "import pydantic\n", - "from IPython.display import HTML, display\n", - "from litellm.caching.caching import Cache\n", - "from PIL import Image\n", - "from pydantic import field_validator\n", - "from pydantic_core import PydanticCustomError\n", - "\n", - "from effectful.handlers.llm import Template, Tool\n", - "from effectful.handlers.llm.completions import (\n", - " LiteLLMProvider,\n", - " RetryLLMHandler,\n", - ")\n", - "from effectful.ops.semantics import NotHandled, handler\n", - "\n", - "provider = LiteLLMProvider()" - ] - }, - { - "cell_type": "markdown", - "id": "093243e0", - "metadata": {}, - "source": [ - "In the following sections, we walk through each of the mentioned components." - ] - }, - { - "cell_type": "markdown", - "id": "c1c639d3", - "metadata": {}, - "source": [ - "## Prompt Templates\n", - "\n", - "This template function writes (bad) poetry on a given theme. While difficult to implement in Python, an LLM can provide a reasonable implementation." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "1e832675", - "metadata": {}, - "outputs": [], - "source": [ - "@Template.define\n", - "def limerick(theme: str) -> str:\n", - " \"\"\"Write a limerick on the theme of {theme}. Do not use any tools.\"\"\"\n", - " raise NotHandled" - ] - }, - { - "cell_type": "markdown", - "id": "f2ca6919", - "metadata": {}, - "source": [ - "If we call the template with a provider interpretation installed, we get reasonable behavior. The LLM is nondeterministic by default, so calling the template twice with the same arguments gives us different results.\n", - "\n", - "Templates are regular callables, so can be converted to operations with `defop` if we want to override the LLM implementation in some cases." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "634f6533", - "metadata": {}, - "outputs": [ + "cells": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "In the ocean so deep and so wide,\n", - "There's a fish with a fin full of pride.\n", - "He swims with a gleam,\n", - "In a school like a dream,\n", - "As they wander the blue, side by side.\n", - "----------------------------------------\n", - "In the depths of the sea, fish frolic with glee,\n", - "From goldfish to salmon, they're ever so free.\n", - "They swim and they dart,\n", - "Each plays its own part,\n", - "Underneath waves, they carelessly spree.\n" - ] - } - ], - "source": [ - "with handler(provider):\n", - " print(limerick(\"fish\"))\n", - " print(\"-\" * 40)\n", - " print(limerick(\"fish\"))" - ] - }, - { - "cell_type": "markdown", - "id": "2e59acbc", - "metadata": {}, - "source": [ - "If we want deterministic behavior, we can cache the template call. We can either cache it with the default `@functools.cache` or use LiteLLM's built-in cache by setting a cache backend and passing `caching=True` to the provider:" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "706ce53b", - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "id": "e7fda1b8", + "metadata": {}, + "source": [ + "# LLM Interface\n", + "The `effectful.handlers.llm` module provides a simplified LLM interface that uses algebraic effects for modularity. The module interface consists of:\n", + "\n", + "- A decorator `Template.define` which creates a prompt template from a callable. A template is an LLM-implemented function whose behavior is specified by a template string. When a template is called, an LLM is invoked to produce the specified behavior.\n", + "- A decorator `Tool.define` which exposes Python callables as tools that templates can call. Tool signatures and docstrings define the schema passed to the model.\n", + "- Structured output handling via `Encodable` (used internally by templates and tool calls) to serialize/deserialize Python types.\n", + "- LLM providers such as `LiteLLMProvider`, and reliability helpers like `RetryLLMHandler` and `ReplayLiteLLMProvider`, which can be composed with `handler(...)` to control execution." + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Swim in silent streams,\n", - "Scales gleam under moonlit glow—\n", - "River's whispered dreams.\n", - "----------------------------------------\n", - "Swim in silent streams,\n", - "Scales gleam under moonlit glow—\n", - "River's whispered dreams.\n", - "\n", - "Silver scales glisten,\n", - "Beneath the ocean's whisper— \n", - "Silent fins dance deep.\n", - "----------------------------------------\n", - "In ocean's vast depth, \n", - "Gliding through the watery world, \n", - "Fish dance with the waves.\n", - "\n", - "Fish swim with grace, free—\n", - "In vast blue ocean they glide,\n", - "Silent in their world.\n", - "----------------------------------------\n", - "In tranquil waters,\n", - "Silver scales shimmer and dart—\n", - "Silent fish dance swift.\n" - ] - } - ], - "source": [ - "@functools.cache\n", - "@Template.define\n", - "def haiku(theme: str) -> str:\n", - " \"\"\"Write a haiku on the theme of {theme}. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "@Template.define\n", - "def haiku_no_cache(theme: str) -> str:\n", - " \"\"\"Write a haiku on the theme of {theme}. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "print()\n", - "with handler(provider):\n", - " print(haiku(\"fish\"))\n", - " print(\"-\" * 40)\n", - " print(haiku(\"fish\"))\n", - "\n", - "print()\n", - "# Enable LiteLLM caching by setting a cache backend and enabling caching.\n", - "litellm.cache = Cache()\n", - "provider_cached = LiteLLMProvider(caching=True)\n", - "try:\n", - " with handler(provider_cached):\n", - " print(haiku_no_cache(\"fish2\"))\n", - " print(\"-\" * 40)\n", - " print(haiku_no_cache(\"fish2\"))\n", - "finally:\n", - " litellm.cache = None\n", - "\n", - "print()\n", - "with handler(provider):\n", - " print(haiku_no_cache(\"fish3\"))\n", - " print(\"-\" * 40)\n", - " print(haiku_no_cache(\"fish3\"))" - ] - }, - { - "cell_type": "markdown", - "id": "13adb300", - "metadata": {}, - "source": [ - "## Converting LLM Results to Python Objects\n", - "\n", - "Type conversion is handled by `decode`. By default, primitive types are converted. `DecodeError` is raised if a response cannot be converted." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "2c766859", - "metadata": {}, - "outputs": [], - "source": [ - "@Template.define\n", - "def primes(first_digit: int) -> int:\n", - " \"\"\"Give a prime number with {first_digit} as the first digit. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "with handler(provider):\n", - " assert type(primes(6)) is int" - ] - }, - { - "cell_type": "markdown", - "id": "36d78a71", - "metadata": {}, - "source": [ - "More complex types can be converted by providing handlers for `decode`. Callable synthesis is supported via `Encodable` and the evaluation providers in `effectful.handlers.llm.evaluation` (`UnsafeEvalProvider` or `RestrictedEvalProvider`), which enable parsing/compiling/executing synthesized code." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "c83bbdc0", - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": 1, + "id": "5aaf649f", + "metadata": {}, + "outputs": [], + "source": [ + "import base64\n", + "import dataclasses\n", + "import functools\n", + "import io\n", + "from typing import Literal\n", + "\n", + "import litellm\n", + "import pydantic\n", + "from IPython.display import HTML, display\n", + "from litellm.caching.caching import Cache\n", + "from PIL import Image\n", + "from pydantic import field_validator\n", + "from pydantic_core import PydanticCustomError\n", + "\n", + "from effectful.handlers.llm import Template, Tool\n", + "from effectful.handlers.llm.completions import (\n", + " LiteLLMProvider,\n", + " RetryLLMHandler,\n", + ")\n", + "from effectful.ops.semantics import NotHandled, handler\n", + "\n", + "provider = LiteLLMProvider()" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "def count_a_occurrences(input_string: str) -> int:\n", - " \"\"\"\n", - " Count the occurrences of the letter 'a' in a given string.\n", - "\n", - " :param input_string: The string to search within.\n", - " :return: The number of times 'a' appears in the string.\n", - " \"\"\"\n", - " return input_string.count('a')\n" - ] - } - ], - "source": [ - "import inspect\n", - "from collections.abc import Callable\n", - "\n", - "from effectful.handlers.llm.evaluation import UnsafeEvalProvider\n", - "\n", - "\n", - "@Template.define\n", - "def count_char(char: str) -> Callable[[str], int]:\n", - " \"\"\"Write a function which takes a string and counts the occurrances of '{char}'. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "# Use UnsafeEvalProvider for simple examples; RestrictedEvalProvider may need extra globals.\n", - "with handler(provider), handler(UnsafeEvalProvider()):\n", - " count_a = count_char(\"a\")\n", - " assert callable(count_a)\n", - " assert count_a(\"banana\") == 3\n", - " assert count_a(\"cherry\") == 0\n", - " # Print the source code of the generated function\n", - " print(inspect.getsource(count_a))" - ] - }, - { - "cell_type": "markdown", - "id": "991ee445", - "metadata": {}, - "source": [ - "## Tool Calling\n", - "\n", - "`Operation`s defined in the lexical scope of a `Template` are automatically available for the LLM to call as tools. The description of these operations is inferred from their type annotations and docstrings.\n", - "\n", - "Tool calls are mediated by a helper operation `tool_call`. Handling this operation allows tool use to be tracked or logged." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "66711301", - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "id": "093243e0", + "metadata": {}, + "source": [ + "In the following sections, we walk through each of the mentioned components." + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Based on the weather conditions:\n", - "\n", - "- **Chicago** is currently cold.\n", - "- **New York** is currently wet.\n", - "- **Barcelona** is currently sunny.\n", - "\n", - "I suggest **Barcelona** as the city with good weather.\n" - ] - } - ], - "source": [ - "@Tool.define\n", - "def cities() -> list[str]:\n", - " \"\"\"Return a list of cities that can be passed to `weather`.\"\"\"\n", - " return [\"Chicago\", \"New York\", \"Barcelona\"]\n", - "\n", - "\n", - "@Tool.define\n", - "def weather(city: str) -> str:\n", - " \"\"\"Given a city name, return a description of the weather in that city.\"\"\"\n", - " status = {\"Chicago\": \"cold\", \"New York\": \"wet\", \"Barcelona\": \"sunny\"}\n", - " return status.get(city, \"unknown\")\n", - "\n", - "\n", - "@Template.define # cities and weather auto-captured from lexical scope\n", - "def vacation() -> str:\n", - " \"\"\"Use the provided tools to suggest a city that has good weather. Use only the `cities` and `weather` tools provided.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "with handler(provider):\n", - " print(vacation())" - ] - }, - { - "cell_type": "markdown", - "id": "59584a54", - "metadata": {}, - "source": [ - "## Image Inputs\n", - "\n", - "You can pass `PIL.Image.Image` values directly to templates." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "89992702", - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "id": "c1c639d3", + "metadata": {}, + "source": [ + "## Prompt Templates\n", + "\n", + "This template function writes (bad) poetry on a given theme. While difficult to implement in Python, an LLM can provide a reasonable implementation." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1e832675", + "metadata": {}, + "outputs": [], + "source": [ + "@Template.define\n", + "def limerick(theme: str) -> str:\n", + " \"\"\"Write a limerick on the theme of {theme}. Do not use any tools.\"\"\"\n", + " raise NotHandled" + ] + }, { - "data": { - "text/html": [ - "\"Example" + "cell_type": "markdown", + "id": "f2ca6919", + "metadata": {}, + "source": [ + "If we call the template with a provider interpretation installed, we get reasonable behavior. The LLM is nondeterministic by default, so calling the template twice with the same arguments gives us different results.\n", + "\n", + "Templates are regular callables, so can be converted to operations with `defop` if we want to override the LLM implementation in some cases." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "634f6533", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "In the sea where the silver fish play,\n", + "A salmon once swam by the bay.\n", + "With a splash and a swirl,\n", + "Past the shells it would twirl,\n", + "And vanish at the end of the day.\n", + "----------------------------------------\n", + "In the ocean, where fish freely roam,\n", + "A small school called a coral reef home.\n", + "With fins all aglow,\n", + "They dart to and fro,\n", + "Dancing waves as sea breezes comb.\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "with handler(provider):\n", + " print(limerick(\"fish\"))\n", + " print(\"-\" * 40)\n", + " print(limerick(\"fish\"))" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "A simple smiley face with a yellow background, featuring two black dots for eyes and a curved line for a mouth, typically used to convey happiness or friendliness.\n" - ] - } - ], - "source": [ - "image_base64 = (\n", - " \"iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAhElEQVR4nO2W4QqA\"\n", - " \"MAiEVXr/VzYWDGoMdk7Cgrt/sUs/DqZTd3EplFU2JwATYAJMoOlAB4bq89s95+Mg\"\n", - " \"+gyAchsKAYplBBBA43hFhfxnUixDjdEUUL8hpr7R0KLdt9qElzcyiu8As+Kr8zQA\"\n", - " \"mgLavAl+kIzFZyCRxtsAmWb/voZvqRzgBE1sIDuVFX4eAAAAAElFTkSuQmCC\"\n", - ")\n", - "image = Image.open(io.BytesIO(base64.b64decode(image_base64)))\n", - "\n", - "\n", - "@Template.define\n", - "def describe_image(image: Image.Image) -> str:\n", - " \"\"\"Return a short description of the following image.\n", - " {image}\n", - " \"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "with handler(provider):\n", - " display(\n", - " HTML(\n", - " f'\"Example'\n", - " )\n", - " )\n", - " print(describe_image(image))" - ] - }, - { - "cell_type": "markdown", - "id": "3d221feb", - "metadata": {}, - "source": [ - "## Structured Output Generation\n", - "\n", - "Constrained generation is used for any type that is convertible to a Pydantic model." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "17668ac8", - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "id": "2e59acbc", + "metadata": {}, + "source": [ + "If we want deterministic behavior, we can cache the template call. We can either cache it with the default `@functools.cache` or use LiteLLM's built-in cache by setting a cache backend and passing `caching=True` to the provider:" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "> You are onstage at a comedy club. You tell the following joke:\n", - "Knock knock.\n", - "Who's there?\n", - "Lizard.\n", - "Lizard who?\n", - "Lizard who? Lizard you wonder, there's a gecko at your door!\n", - "> The crowd laughs politely.\n" - ] - } - ], - "source": [ - "@dataclasses.dataclass\n", - "class KnockKnockJoke:\n", - " whos_there: str\n", - " punchline: str\n", - "\n", - "\n", - "@Template.define\n", - "def write_joke(theme: str) -> KnockKnockJoke:\n", - " \"\"\"Write a knock-knock joke on the theme of {theme}. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "@Template.define\n", - "def rate_joke(joke: KnockKnockJoke) -> bool:\n", - " \"\"\"Decide if {joke} is funny or not. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "def do_comedy():\n", - " joke = write_joke(\"lizards\")\n", - " print(\"> You are onstage at a comedy club. You tell the following joke:\")\n", - " print(\n", - " f\"Knock knock.\\nWho's there?\\n{joke.whos_there}.\\n{joke.whos_there} who?\\n{joke.punchline}\"\n", - " )\n", - " if rate_joke(joke):\n", - " print(\"> The crowd laughs politely.\")\n", - " else:\n", - " print(\"> The crowd stares in stony silence.\")\n", - "\n", - "\n", - "with handler(provider):\n", - " do_comedy()" - ] - }, - { - "cell_type": "markdown", - "id": "c0003944", - "metadata": {}, - "source": [ - "## Template Composition\n", - "\n", - "Templates defined in the lexical scope are also captured, enabling template composition. One template can use the result of another template in a pipeline:\n" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "78a4bf44", - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": 4, + "id": "706ce53b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Silent water glides,\n", + "Scales shimmering through currents—\n", + "Nature's art in waves.\n", + "----------------------------------------\n", + "Silent water glides,\n", + "Scales shimmering through currents—\n", + "Nature's art in waves.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/nguyendat/Marc/effectful/.venv/lib/python3.12/site-packages/pydantic/main.py:528: UserWarning: Pydantic serializer warnings:\n", + " PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='{\"value\"...: None}, annotations=[]), input_type=Message])\n", + " PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...ider_specific_fields={}), input_type=Choices])\n", + " return self.__pydantic_serializer__.to_json(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Silent waters gleam, \n", + "Fish dart in a moonlit stream, \n", + "Nature's quiet dream.\n", + "----------------------------------------\n", + "Silent waters dance, \n", + "Scales shimmer in fleeting light, \n", + "Fish glide through soft dreams.\n", + "\n", + "Fish swim through blue waves, \n", + "Their scales gleam like silver jewels, \n", + "Silent in the deep.\n", + "----------------------------------------\n", + "Silent waters flow,\n", + "Fish dart beneath gentle waves—\n", + "Nature's dance below.\n" + ] + } + ], + "source": [ + "@functools.cache\n", + "@Template.define\n", + "def haiku(theme: str) -> str:\n", + " \"\"\"Write a haiku on the theme of {theme}. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "@Template.define\n", + "def haiku_no_cache(theme: str) -> str:\n", + " \"\"\"Write a haiku on the theme of {theme}. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "print()\n", + "with handler(provider):\n", + " print(haiku(\"fish\"))\n", + " print(\"-\" * 40)\n", + " print(haiku(\"fish\"))\n", + "\n", + "print()\n", + "# Enable LiteLLM caching by setting a cache backend and enabling caching.\n", + "litellm.cache = Cache()\n", + "provider_cached = LiteLLMProvider(caching=True)\n", + "try:\n", + " with handler(provider_cached):\n", + " print(haiku_no_cache(\"fish2\"))\n", + " print(\"-\" * 40)\n", + " print(haiku_no_cache(\"fish2\"))\n", + "finally:\n", + " litellm.cache = None\n", + "\n", + "print()\n", + "with handler(provider):\n", + " print(haiku_no_cache(\"fish3\"))\n", + " print(\"-\" * 40)\n", + " print(haiku_no_cache(\"fish3\"))" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Sub-templates available to write_story: dict_keys(['describe_image', 'draw_simple_icon', 'limerick', 'haiku_no_cache', 'primes', 'count_char', 'cities', 'weather', 'vacation', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny'])\n", - "=== Story with moral ===\n", - "\n", - "\n", - "In the case of Whiskers, it was his understanding of this balance that brought him safely home, with both stories and lessons to cherish and share.\n", - "\n", - "=== Funny story ===\n", - "\n", - "\n", - "The End.\n" - ] - } - ], - "source": [ - "# Sub-templates for different story styles\n", - "@Template.define\n", - "def story_with_moral(topic: str) -> str:\n", - " \"\"\"Write a short story about {topic} and end with a moral lesson. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "@Template.define\n", - "def story_funny(topic: str) -> str:\n", - " \"\"\"Write a funny, humorous story about {topic}. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "# Main orchestrator template - has access to sub-templates\n", - "@Template.define\n", - "def write_story(topic: str, style: str) -> str:\n", - " \"\"\"Write a story about {topic} in the style: {style}.\n", - " Available styles: 'moral' for a story with a lesson, 'funny' for humor. Use story_funny for humor, story_with_moral for a story with a lesson.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "# Verify sub-templates are captured in write_story's lexical context\n", - "assert story_with_moral in write_story.tools.values()\n", - "assert story_funny in write_story.tools.values()\n", - "print(\"Sub-templates available to write_story:\", write_story.tools.keys())\n", - "\n", - "with handler(provider):\n", - " print(\"=== Story with moral ===\")\n", - " print(write_story(\"a curious cat\", \"moral\"))\n", - " print()\n", - " print(\"=== Funny story ===\")\n", - " print(write_story(\"a curious cat\", \"funny\"))" - ] - }, - { - "cell_type": "markdown", - "id": "bd25826d", - "metadata": {}, - "source": [ - "## Retrying LLM Requests\n", - "LLM calls can sometimes fail due to transient errors or produce invalid outputs. The `RetryLLMHandler` automatically retries failed template calls and can also surface tool/runtime errors as tool messages:\n", - "\n", - "- `num_retries`: Maximum number of retry attempts (default: 3)\n", - "- `include_traceback`: When `True`, include traceback details in the error feedback (default: False)\n", - "- `catch_tool_errors`: Exception type(s) to catch during tool execution (default: `Exception`)\n" - ] - }, - { - "cell_type": "markdown", - "id": "bafc0a96", - "metadata": {}, - "source": [ - "Example usage: having an unstable service that seldomly fail." - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "4334d07a", - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "id": "13adb300", + "metadata": {}, + "source": [ + "## Converting LLM Results to Python Objects\n", + "\n", + "Type conversion is handled by `decode`. By default, primitive types are converted. `DecodeError` is raised if a response cannot be converted." + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Error: Service unavailable! Attempt 1/3. Please retry.\n", - "Result: The data fetched from the unstable service is: `[1, 2, 3]`. Retries: 3\n" - ] - } - ], - "source": [ - "call_count = 0\n", - "REQUIRED_RETRIES = 3\n", - "\n", - "\n", - "@Tool.define\n", - "def unstable_service() -> str:\n", - " \"\"\"Fetch data from an unstable external service. May require retries.\"\"\"\n", - " global call_count\n", - " call_count += 1\n", - " if call_count < REQUIRED_RETRIES:\n", - " raise ConnectionError(\n", - " f\"Service unavailable! Attempt {call_count}/{REQUIRED_RETRIES}. Please retry.\"\n", - " )\n", - " return \"{ 'status': 'ok', 'data': [1, 2, 3] }\"\n", - "\n", - "\n", - "@Template.define # unstable_service auto-captured from lexical scope\n", - "def fetch_data() -> str:\n", - " \"\"\"Use the unstable_service tool to fetch data.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "with handler(provider):\n", - " try:\n", - " result = fetch_data()\n", - " except Exception as e:\n", - " print(f\"Error: {e}\")\n", - "\n", - "with handler(provider), handler(RetryLLMHandler(num_retries=3)):\n", - " result = fetch_data()\n", - " print(f\"Result: {result}\", \"Retries:\", call_count)" - ] - }, - { - "cell_type": "markdown", - "id": "4ac00e01", - "metadata": {}, - "source": [ - "## Retrying with Validation Errors\n", - "As noted above, the `RetryHandler` can also be used to retry on runtime/validation error:" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "39b2b225", - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": 5, + "id": "2c766859", + "metadata": {}, + "outputs": [], + "source": [ + "@Template.define\n", + "def primes(first_digit: int) -> int:\n", + " \"\"\"Give a prime number with {first_digit} as the first digit. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "with handler(provider):\n", + " assert type(primes(6)) is int" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Error: Error decoding response: 1 validation error for Response\n", - "value.score\n", - " score must be 1–5, got 9 [type=invalid_score, input_value=9, input_type=int]. Please provide a valid response and try again.\n", - "Score: 5/5\n", - "Explanation: Die Hard is widely acclaimed as one of the best action films of all time and earns a perfect score of 5 out of 5. Its success is attributed to a gripping storyline, memorable performances, particularly by Bruce Willis as John McClane, and its innovative approach to action sequences. Its mix of humor, suspense, and holiday-themed backdrop makes it a perennial favorite, cementing its status as a cultural icon.\n" - ] - } - ], - "source": [ - "@pydantic.dataclasses.dataclass\n", - "class Rating:\n", - " score: int\n", - " explanation: str\n", - "\n", - " @field_validator(\"score\")\n", - " @classmethod\n", - " def check_score(cls, v):\n", - " if v < 1 or v > 5:\n", - " raise PydanticCustomError(\n", - " \"invalid_score\",\n", - " \"score must be 1–5, got {v}\",\n", - " {\"v\": v},\n", - " )\n", - " return v\n", - "\n", - " @field_validator(\"explanation\")\n", - " @classmethod\n", - " def check_explanation_contains_score(cls, v, info):\n", - " score = info.data.get(\"score\", None)\n", - " if score is not None and str(score) not in v:\n", - " raise PydanticCustomError(\n", - " \"invalid_explanation\",\n", - " \"explanation must mention the score {score}, got '{explanation}'\",\n", - " {\"score\": score, \"explanation\": v},\n", - " )\n", - " return v\n", - "\n", - "\n", - "@Template.define\n", - "def give_rating_for_movie(movie_name: str) -> Rating:\n", - " \"\"\"Give a rating for {movie_name}. The explanation MUST include the numeric score. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "with handler(provider):\n", - " try:\n", - " rating = give_rating_for_movie(\"Die Hard\")\n", - " except Exception as e:\n", - " print(f\"Error: {e}\")\n", - "\n", - "with handler(provider), handler(RetryLLMHandler(num_retries=3)):\n", - " rating = give_rating_for_movie(\"Die Hard\")\n", - " print(f\"Score: {rating.score}/5\")\n", - " print(f\"Explanation: {rating.explanation}\")" - ] - }, - { - "cell_type": "markdown", - "id": "aec0632c", - "metadata": {}, - "source": [ - "## Generating higher-order functions\n", - "Finally, we can generate higher-order functions that can call templates as well:" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "9d02bc67", - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "id": "36d78a71", + "metadata": {}, + "source": [ + "More complex types can be converted by providing handlers for `decode`. Callable synthesis is supported via `Encodable` and the evaluation providers in `effectful.handlers.llm.evaluation` (`UnsafeEvalProvider` or `RestrictedEvalProvider`), which enable parsing/compiling/executing synthesized code." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c83bbdc0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "def count_char_a(text: str) -> int:\n", + " \"\"\"\n", + " Counts the occurrences of 'a' in the given string.\n", + "\n", + " Args:\n", + " text (str): The string to search within.\n", + "\n", + " Returns:\n", + " int: The count of 'a' characters in the string.\n", + "\n", + " Examples:\n", + " >>> count_char_a('banana')\n", + " 3\n", + " >>> count_char_a('apple')\n", + " 1\n", + " >>> count_char_a('cherry')\n", + " 0\n", + " \"\"\"\n", + " count = 0\n", + " for char in text:\n", + " if char == 'a':\n", + " count += 1\n", + " return count\n", + "\n" + ] + } + ], + "source": [ + "import inspect\n", + "from collections.abc import Callable\n", + "\n", + "from effectful.handlers.llm.evaluation import DoctestHandler, UnsafeEvalProvider\n", + "\n", + "\n", + "@Template.define\n", + "def count_char(char: str) -> Callable[[str], int]:\n", + " \"\"\"Write a function named count_char which takes a string and counts the occurrances of '{char}'. Do not use any tools.\n", + "\n", + " Examples:\n", + " >>> count_char(\"banana\")\n", + " 3\n", + " >>> count_char(\"cherry\")\n", + " 0\n", + " \"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "# Use UnsafeEvalProvider for simple examples; RestrictedEvalProvider may need extra globals.\n", + "with handler(provider), handler(UnsafeEvalProvider()), handler(DoctestHandler()):\n", + " count_a = count_char(\"a\")\n", + " assert callable(count_a)\n", + " assert count_a(\"banana\") == 3\n", + " assert count_a(\"cherry\") == 0\n", + " # Print the source code of the generated function\n", + " print(inspect.getsource(count_a))" + ] + }, + { + "cell_type": "markdown", + "id": "0b6a7b48", + "metadata": {}, + "source": [ + "### Doctest Feedback\n", + "\n", + "Write doctests in the template docstring for callable synthesis. The docstring is formatted with the call arguments, and the doctests are executed during decoding." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "793b12a5", + "metadata": {}, + "outputs": [ + { + "ename": "ResultDecodingError", + "evalue": "Error decoding response: mypy type check failed:\n:170: \u001b[1m\u001b[31merror:\u001b[m Name \u001b[m\u001b[1m\"count_char\"\u001b[m already defined on line 159 \u001b[m\u001b[33m[no-redef]\u001b[m\n\nimport collections\nimport collections.abc\nimport IPython\nimport IPython.core\nimport IPython.core.alias\nimport IPython.core.application\nimport IPython.core.async_helpers\nimport IPython.core.autocall\nimport IPython.core.builtin_trap\nimport IPython.core.compilerop\nimport IPython.core.completer\nimport IPython.core.completerlib\nimport IPython.core.crashhandler\nimport IPython.core.debugger\nimport IPython.core.debugger_backport\nimport IPython.core.display\nimport IPython.core.display_functions\nimport IPython.core.display_trap\nimport IPython.core.displayhook\nimport IPython.core.displaypub\nimport IPython.core.doctb\nimport IPython.core.error\nimport IPython.core.events\nimport IPython.core.extensions\nimport IPython.core.formatters\nimport IPython.core.getipython\nimport IPython.core.guarded_eval\nimport IPython.core.history\nimport IPython.core.hooks\nimport IPython.core.inputtransformer2\nimport IPython.core.interactiveshell\nimport IPython.core.latex_symbols\nimport IPython.core.logger\nimport IPython.core.macro\nimport IPython.core.magic\nimport IPython.core.magic_arguments\nimport IPython.core.magics\nimport IPython.core.magics.ast_mod\nimport IPython.core.magics.auto\nimport IPython.core.magics.basic\nimport IPython.core.magics.code\nimport IPython.core.magics.config\nimport IPython.core.magics.display\nimport IPython.core.magics.execution\nimport IPython.core.magics.extension\nimport IPython.core.magics.history\nimport IPython.core.magics.logging\nimport IPython.core.magics.namespace\nimport IPython.core.magics.osm\nimport IPython.core.magics.packaging\nimport IPython.core.magics.pylab\nimport IPython.core.magics.script\nimport IPython.core.oinspect\nimport IPython.core.page\nimport IPython.core.payload\nimport IPython.core.prefilter\nimport IPython.core.profiledir\nimport IPython.core.release\nimport IPython.core.shellapp\nimport IPython.core.splitinput\nimport IPython.core.tbtools\nimport IPython.core.tips\nimport IPython.core.ultratb\nimport IPython.core.usage\nimport IPython.display\nimport IPython.extensions\nimport IPython.extensions.storemagic\nimport IPython.external\nimport IPython.external.pickleshare\nimport IPython.lib\nimport IPython.lib.clipboard\nimport IPython.lib.display\nimport IPython.lib.pretty\nimport IPython.paths\nimport IPython.terminal\nimport IPython.terminal.debugger\nimport IPython.terminal.embed\nimport IPython.terminal.interactiveshell\nimport IPython.terminal.ipapp\nimport IPython.terminal.magics\nimport IPython.terminal.prompts\nimport IPython.terminal.pt_inputhooks\nimport IPython.terminal.ptutils\nimport IPython.terminal.shortcuts\nimport IPython.terminal.shortcuts.auto_match\nimport IPython.terminal.shortcuts.auto_suggest\nimport IPython.terminal.shortcuts.filters\nimport IPython.testing\nimport IPython.testing.skipdoctest\nimport IPython.utils\nimport IPython.utils.PyColorize\nimport IPython.utils._process_common\nimport IPython.utils._process_posix\nimport IPython.utils._sysinfo\nimport IPython.utils.capture\nimport IPython.utils.contexts\nimport IPython.utils.data\nimport IPython.utils.decorators\nimport IPython.utils.dir2\nimport IPython.utils.docs\nimport IPython.utils.encoding\nimport IPython.utils.frame\nimport IPython.utils.generics\nimport IPython.utils.importstring\nimport IPython.utils.io\nimport IPython.utils.ipstruct\nimport IPython.utils.module_paths\nimport IPython.utils.openpy\nimport IPython.utils.path\nimport IPython.utils.process\nimport IPython.utils.py3compat\nimport IPython.utils.sentinel\nimport IPython.utils.strdispatch\nimport IPython.utils.sysinfo\nimport IPython.utils.syspathcontext\nimport IPython.utils.terminal\nimport IPython.utils.text\nimport IPython.utils.timing\nimport IPython.utils.tokenutil\nimport IPython.utils.wildcard\nimport collections\nimport collections.abc\nimport effectful\nimport effectful.handlers\nimport effectful.handlers.llm\nimport effectful.handlers.llm.completions\nimport effectful.handlers.llm.encoding\nimport effectful.handlers.llm.evaluation\nimport effectful.handlers.llm.template\nimport effectful.internals\nimport effectful.internals.runtime\nimport effectful.internals.unification\nimport effectful.ops\nimport effectful.ops.semantics\nimport effectful.ops.syntax\nimport effectful.ops.types\nimport pathlib\nIn: collections.abc.MutableSequence[str]\nOut: dict\n_: str\n__: str\n___: str\n__vsc_ipynb_file__: str\n_dh: collections.abc.MutableSequence[pathlib.PosixPath]\n_i: str\n_i1: str\n_i2: str\n_i3: str\n_i4: str\n_i5: str\n_i6: str\n_i7: str\n_ih: collections.abc.MutableSequence[str]\n_ii: str\n_iii: str\n_oh: dict\nchar: str\ncount_a: collections.abc.Callable[[str], int]\ncount_char: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\ncount_char_with_doctest: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\nexit: IPython.core.autocall.ZMQExitAutocall\nhaiku: collections.abc.Callable[[str], str]\nhaiku_no_cache: effectful.ops.types.Operation[[str], str]\nlimerick: effectful.ops.types.Operation[[str], str]\nprimes: effectful.ops.types.Operation[[int], int]\nprovider: effectful.ops.types.Interpretation\nprovider_cached: effectful.ops.types.Interpretation\nquit: IPython.core.autocall.ZMQExitAutocall\n\ndef count_char(s: str) -> int:\n \"\"\"\n Count the occurrences of the character 'a' in the given string.\n\n Args:\n s (str): The string in which to count occurrences of 'a'.\n\n Returns:\n int: The number of times 'a' appears in the string.\n\n Examples:\n >>> count_char('banana')\n 3\n >>> count_char('apple')\n 1\n >>> count_char('cherry')\n 0\n \"\"\"\n return s.count('a')\n_synthesized_check: collections.abc.Callable[[str], int] = count_char. Please provide a valid response and try again.", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mTypeError\u001b[39m Traceback (most recent call last)", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/completions.py:302\u001b[39m, in \u001b[36mcall_assistant\u001b[39m\u001b[34m(tools, response_format, model, **kwargs)\u001b[39m\n\u001b[32m 301\u001b[39m raw_result = response_model.model_validate_json(serialized_result)\n\u001b[32m--> \u001b[39m\u001b[32m302\u001b[39m result = \u001b[43mresponse_format\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mraw_result\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[32m 303\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (pydantic.ValidationError, \u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mSyntaxError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/encoding.py:452\u001b[39m, in \u001b[36mCallableEncodable.decode\u001b[39m\u001b[34m(self, encoded_value)\u001b[39m\n\u001b[32m 451\u001b[39m \u001b[38;5;66;03m# Type-check with mypy; pass original module_code so mypy sees exact source\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m452\u001b[39m \u001b[43mevaluation\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtype_check\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 453\u001b[39m \u001b[43m \u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mctx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexpected_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexpected_return\u001b[49m\n\u001b[32m 454\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 456\u001b[39m \u001b[38;5;66;03m# Compile and execute\u001b[39;00m\n\u001b[32m 457\u001b[39m \u001b[38;5;66;03m# https://docs.python.org/3/library/functions.html#exec\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:494\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 493\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m self_handler \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m494\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mself_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 495\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m args \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(args[\u001b[32m0\u001b[39m], Operation) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m args[\u001b[32m0\u001b[39m].__apply__:\n\u001b[32m 496\u001b[39m \u001b[38;5;66;03m# Prevent infinite recursion when calling self.apply directly\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/internals/runtime.py:70\u001b[39m, in \u001b[36m_set_prompt..bound_body\u001b[39m\u001b[34m(*a, **k)\u001b[39m\n\u001b[32m 69\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler({prompt: handler({prompt: next_cont})(cont)}):\n\u001b[32m---> \u001b[39m\u001b[32m70\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbody\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/internals/runtime.py:56\u001b[39m, in \u001b[36m_save_args.._cont_wrapper\u001b[39m\u001b[34m(*a, **k)\u001b[39m\n\u001b[32m 55\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler({_get_args: \u001b[38;5;28;01mlambda\u001b[39;00m: (a, k)}):\n\u001b[32m---> \u001b[39m\u001b[32m56\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/evaluation.py:628\u001b[39m, in \u001b[36mUnsafeEvalProvider.type_check\u001b[39m\u001b[34m(self, module, ctx, expected_params, expected_return)\u001b[39m\n\u001b[32m 620\u001b[39m \u001b[38;5;129m@implements\u001b[39m(type_check)\n\u001b[32m 621\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mtype_check\u001b[39m(\n\u001b[32m 622\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 626\u001b[39m expected_return: \u001b[38;5;28mtype\u001b[39m,\n\u001b[32m 627\u001b[39m ) -> \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m628\u001b[39m \u001b[43mmypy_type_check\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mctx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexpected_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexpected_return\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/evaluation.py:586\u001b[39m, in \u001b[36mmypy_type_check\u001b[39m\u001b[34m(module, ctx, expected_params, expected_return)\u001b[39m\n\u001b[32m 585\u001b[39m report = (stdout \u001b[38;5;129;01mor\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m) + (stderr \u001b[38;5;129;01mor\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m586\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mmypy type check failed:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mreport\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00msource\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 587\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "\u001b[31mTypeError\u001b[39m: mypy type check failed:\n:170: \u001b[1m\u001b[31merror:\u001b[m Name \u001b[m\u001b[1m\"count_char\"\u001b[m already defined on line 159 \u001b[m\u001b[33m[no-redef]\u001b[m\n\nimport collections\nimport collections.abc\nimport IPython\nimport IPython.core\nimport IPython.core.alias\nimport IPython.core.application\nimport IPython.core.async_helpers\nimport IPython.core.autocall\nimport IPython.core.builtin_trap\nimport IPython.core.compilerop\nimport IPython.core.completer\nimport IPython.core.completerlib\nimport IPython.core.crashhandler\nimport IPython.core.debugger\nimport IPython.core.debugger_backport\nimport IPython.core.display\nimport IPython.core.display_functions\nimport IPython.core.display_trap\nimport IPython.core.displayhook\nimport IPython.core.displaypub\nimport IPython.core.doctb\nimport IPython.core.error\nimport IPython.core.events\nimport IPython.core.extensions\nimport IPython.core.formatters\nimport IPython.core.getipython\nimport IPython.core.guarded_eval\nimport IPython.core.history\nimport IPython.core.hooks\nimport IPython.core.inputtransformer2\nimport IPython.core.interactiveshell\nimport IPython.core.latex_symbols\nimport IPython.core.logger\nimport IPython.core.macro\nimport IPython.core.magic\nimport IPython.core.magic_arguments\nimport IPython.core.magics\nimport IPython.core.magics.ast_mod\nimport IPython.core.magics.auto\nimport IPython.core.magics.basic\nimport IPython.core.magics.code\nimport IPython.core.magics.config\nimport IPython.core.magics.display\nimport IPython.core.magics.execution\nimport IPython.core.magics.extension\nimport IPython.core.magics.history\nimport IPython.core.magics.logging\nimport IPython.core.magics.namespace\nimport IPython.core.magics.osm\nimport IPython.core.magics.packaging\nimport IPython.core.magics.pylab\nimport IPython.core.magics.script\nimport IPython.core.oinspect\nimport IPython.core.page\nimport IPython.core.payload\nimport IPython.core.prefilter\nimport IPython.core.profiledir\nimport IPython.core.release\nimport IPython.core.shellapp\nimport IPython.core.splitinput\nimport IPython.core.tbtools\nimport IPython.core.tips\nimport IPython.core.ultratb\nimport IPython.core.usage\nimport IPython.display\nimport IPython.extensions\nimport IPython.extensions.storemagic\nimport IPython.external\nimport IPython.external.pickleshare\nimport IPython.lib\nimport IPython.lib.clipboard\nimport IPython.lib.display\nimport IPython.lib.pretty\nimport IPython.paths\nimport IPython.terminal\nimport IPython.terminal.debugger\nimport IPython.terminal.embed\nimport IPython.terminal.interactiveshell\nimport IPython.terminal.ipapp\nimport IPython.terminal.magics\nimport IPython.terminal.prompts\nimport IPython.terminal.pt_inputhooks\nimport IPython.terminal.ptutils\nimport IPython.terminal.shortcuts\nimport IPython.terminal.shortcuts.auto_match\nimport IPython.terminal.shortcuts.auto_suggest\nimport IPython.terminal.shortcuts.filters\nimport IPython.testing\nimport IPython.testing.skipdoctest\nimport IPython.utils\nimport IPython.utils.PyColorize\nimport IPython.utils._process_common\nimport IPython.utils._process_posix\nimport IPython.utils._sysinfo\nimport IPython.utils.capture\nimport IPython.utils.contexts\nimport IPython.utils.data\nimport IPython.utils.decorators\nimport IPython.utils.dir2\nimport IPython.utils.docs\nimport IPython.utils.encoding\nimport IPython.utils.frame\nimport IPython.utils.generics\nimport IPython.utils.importstring\nimport IPython.utils.io\nimport IPython.utils.ipstruct\nimport IPython.utils.module_paths\nimport IPython.utils.openpy\nimport IPython.utils.path\nimport IPython.utils.process\nimport IPython.utils.py3compat\nimport IPython.utils.sentinel\nimport IPython.utils.strdispatch\nimport IPython.utils.sysinfo\nimport IPython.utils.syspathcontext\nimport IPython.utils.terminal\nimport IPython.utils.text\nimport IPython.utils.timing\nimport IPython.utils.tokenutil\nimport IPython.utils.wildcard\nimport collections\nimport collections.abc\nimport effectful\nimport effectful.handlers\nimport effectful.handlers.llm\nimport effectful.handlers.llm.completions\nimport effectful.handlers.llm.encoding\nimport effectful.handlers.llm.evaluation\nimport effectful.handlers.llm.template\nimport effectful.internals\nimport effectful.internals.runtime\nimport effectful.internals.unification\nimport effectful.ops\nimport effectful.ops.semantics\nimport effectful.ops.syntax\nimport effectful.ops.types\nimport pathlib\nIn: collections.abc.MutableSequence[str]\nOut: dict\n_: str\n__: str\n___: str\n__vsc_ipynb_file__: str\n_dh: collections.abc.MutableSequence[pathlib.PosixPath]\n_i: str\n_i1: str\n_i2: str\n_i3: str\n_i4: str\n_i5: str\n_i6: str\n_i7: str\n_ih: collections.abc.MutableSequence[str]\n_ii: str\n_iii: str\n_oh: dict\nchar: str\ncount_a: collections.abc.Callable[[str], int]\ncount_char: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\ncount_char_with_doctest: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\nexit: IPython.core.autocall.ZMQExitAutocall\nhaiku: collections.abc.Callable[[str], str]\nhaiku_no_cache: effectful.ops.types.Operation[[str], str]\nlimerick: effectful.ops.types.Operation[[str], str]\nprimes: effectful.ops.types.Operation[[int], int]\nprovider: effectful.ops.types.Interpretation\nprovider_cached: effectful.ops.types.Interpretation\nquit: IPython.core.autocall.ZMQExitAutocall\n\ndef count_char(s: str) -> int:\n \"\"\"\n Count the occurrences of the character 'a' in the given string.\n\n Args:\n s (str): The string in which to count occurrences of 'a'.\n\n Returns:\n int: The number of times 'a' appears in the string.\n\n Examples:\n >>> count_char('banana')\n 3\n >>> count_char('apple')\n 1\n >>> count_char('cherry')\n 0\n \"\"\"\n return s.count('a')\n_synthesized_check: collections.abc.Callable[[str], int] = count_char", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[31mResultDecodingError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 10\u001b[39m\n\u001b[32m 6\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m NotHandled\n\u001b[32m 9\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler(provider), handler(UnsafeEvalProvider()):\n\u001b[32m---> \u001b[39m\u001b[32m10\u001b[39m count_a = \u001b[43mcount_char_with_doctest\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43ma\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 11\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m count_a(\u001b[33m\"\u001b[39m\u001b[33mbanana\u001b[39m\u001b[33m\"\u001b[39m) == \u001b[32m3\u001b[39m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:499\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__default__(*args, **kwargs)\n\u001b[32m 498\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m499\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__apply__\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:494\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 492\u001b[39m self_handler = intp.get(\u001b[38;5;28mself\u001b[39m)\n\u001b[32m 493\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m self_handler \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m494\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mself_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 495\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m args \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(args[\u001b[32m0\u001b[39m], Operation) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m args[\u001b[32m0\u001b[39m].__apply__:\n\u001b[32m 496\u001b[39m \u001b[38;5;66;03m# Prevent infinite recursion when calling self.apply directly\u001b[39;00m\n\u001b[32m 497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__default__(*args, **kwargs)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/internals/runtime.py:70\u001b[39m, in \u001b[36m_set_prompt..bound_body\u001b[39m\u001b[34m(*a, **k)\u001b[39m\n\u001b[32m 68\u001b[39m next_cont = get_interpretation().get(prompt, prompt.__default_rule__)\n\u001b[32m 69\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler({prompt: handler({prompt: next_cont})(cont)}):\n\u001b[32m---> \u001b[39m\u001b[32m70\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbody\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/internals/runtime.py:56\u001b[39m, in \u001b[36m_save_args.._cont_wrapper\u001b[39m\u001b[34m(*a, **k)\u001b[39m\n\u001b[32m 53\u001b[39m \u001b[38;5;129m@functools\u001b[39m.wraps(fn)\n\u001b[32m 54\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_cont_wrapper\u001b[39m(*a: P.args, **k: P.kwargs) -> T:\n\u001b[32m 55\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler({_get_args: \u001b[38;5;28;01mlambda\u001b[39;00m: (a, k)}):\n\u001b[32m---> \u001b[39m\u001b[32m56\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/completions.py:521\u001b[39m, in \u001b[36mLiteLLMProvider._call\u001b[39m\u001b[34m(self, template, *args, **kwargs)\u001b[39m\n\u001b[32m 519\u001b[39m result: T | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 520\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m message[\u001b[33m\"\u001b[39m\u001b[33mrole\u001b[39m\u001b[33m\"\u001b[39m] != \u001b[33m\"\u001b[39m\u001b[33massistant\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m tool_calls:\n\u001b[32m--> \u001b[39m\u001b[32m521\u001b[39m message, tool_calls, result = \u001b[43mcall_assistant\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 522\u001b[39m \u001b[43m \u001b[49m\u001b[43mtemplate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_model\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mconfig\u001b[49m\n\u001b[32m 523\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 524\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m tool_call \u001b[38;5;129;01min\u001b[39;00m tool_calls:\n\u001b[32m 525\u001b[39m message = call_tool(tool_call)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:499\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__default__(*args, **kwargs)\n\u001b[32m 498\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m499\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__apply__\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:497\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 494\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m self_handler(*args, **kwargs)\n\u001b[32m 495\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m args \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(args[\u001b[32m0\u001b[39m], Operation) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m args[\u001b[32m0\u001b[39m].__apply__:\n\u001b[32m 496\u001b[39m \u001b[38;5;66;03m# Prevent infinite recursion when calling self.apply directly\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__default__\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 498\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 499\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__apply__(\u001b[38;5;28mself\u001b[39m, *args, **kwargs)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:548\u001b[39m, in \u001b[36m__apply__\u001b[39m\u001b[34m(op, *args, **kwargs)\u001b[39m\n\u001b[32m 519\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__apply__\u001b[39m[**A, B](op: Operation[A, B], *args: A.args, **kwargs: A.kwargs) -> B:\n\u001b[32m 520\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Apply ``op`` to ``args``, ``kwargs`` in interpretation ``intp``.\u001b[39;00m\n\u001b[32m 521\u001b[39m \n\u001b[32m 522\u001b[39m \u001b[33;03m Handling :func:`Operation.__apply__` changes the evaluation strategy of terms.\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 546\u001b[39m \n\u001b[32m 547\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m548\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mop\u001b[49m\u001b[43m.\u001b[49m\u001b[43m__default_rule__\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:350\u001b[39m, in \u001b[36mOperation.__default_rule__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 345\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"The default rule is used when the operation is not handled.\u001b[39;00m\n\u001b[32m 346\u001b[39m \n\u001b[32m 347\u001b[39m \u001b[33;03mIf no default rule is supplied, the free rule is used instead.\u001b[39;00m\n\u001b[32m 348\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 349\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m350\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__default__\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 351\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m NotHandled:\n\u001b[32m 352\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01meffectful\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mops\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01msyntax\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m defdata\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/completions.py:304\u001b[39m, in \u001b[36mcall_assistant\u001b[39m\u001b[34m(tools, response_format, model, **kwargs)\u001b[39m\n\u001b[32m 302\u001b[39m result = response_format.decode(raw_result.value) \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[32m 303\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (pydantic.ValidationError, \u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mSyntaxError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m--> \u001b[39m\u001b[32m304\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m ResultDecodingError(e, raw_message=raw_message) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01me\u001b[39;00m\n\u001b[32m 306\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m (raw_message, tool_calls, result)\n", + "\u001b[31mResultDecodingError\u001b[39m: Error decoding response: mypy type check failed:\n:170: \u001b[1m\u001b[31merror:\u001b[m Name \u001b[m\u001b[1m\"count_char\"\u001b[m already defined on line 159 \u001b[m\u001b[33m[no-redef]\u001b[m\n\nimport collections\nimport collections.abc\nimport IPython\nimport IPython.core\nimport IPython.core.alias\nimport IPython.core.application\nimport IPython.core.async_helpers\nimport IPython.core.autocall\nimport IPython.core.builtin_trap\nimport IPython.core.compilerop\nimport IPython.core.completer\nimport IPython.core.completerlib\nimport IPython.core.crashhandler\nimport IPython.core.debugger\nimport IPython.core.debugger_backport\nimport IPython.core.display\nimport IPython.core.display_functions\nimport IPython.core.display_trap\nimport IPython.core.displayhook\nimport IPython.core.displaypub\nimport IPython.core.doctb\nimport IPython.core.error\nimport IPython.core.events\nimport IPython.core.extensions\nimport IPython.core.formatters\nimport IPython.core.getipython\nimport IPython.core.guarded_eval\nimport IPython.core.history\nimport IPython.core.hooks\nimport IPython.core.inputtransformer2\nimport IPython.core.interactiveshell\nimport IPython.core.latex_symbols\nimport IPython.core.logger\nimport IPython.core.macro\nimport IPython.core.magic\nimport IPython.core.magic_arguments\nimport IPython.core.magics\nimport IPython.core.magics.ast_mod\nimport IPython.core.magics.auto\nimport IPython.core.magics.basic\nimport IPython.core.magics.code\nimport IPython.core.magics.config\nimport IPython.core.magics.display\nimport IPython.core.magics.execution\nimport IPython.core.magics.extension\nimport IPython.core.magics.history\nimport IPython.core.magics.logging\nimport IPython.core.magics.namespace\nimport IPython.core.magics.osm\nimport IPython.core.magics.packaging\nimport IPython.core.magics.pylab\nimport IPython.core.magics.script\nimport IPython.core.oinspect\nimport IPython.core.page\nimport IPython.core.payload\nimport IPython.core.prefilter\nimport IPython.core.profiledir\nimport IPython.core.release\nimport IPython.core.shellapp\nimport IPython.core.splitinput\nimport IPython.core.tbtools\nimport IPython.core.tips\nimport IPython.core.ultratb\nimport IPython.core.usage\nimport IPython.display\nimport IPython.extensions\nimport IPython.extensions.storemagic\nimport IPython.external\nimport IPython.external.pickleshare\nimport IPython.lib\nimport IPython.lib.clipboard\nimport IPython.lib.display\nimport IPython.lib.pretty\nimport IPython.paths\nimport IPython.terminal\nimport IPython.terminal.debugger\nimport IPython.terminal.embed\nimport IPython.terminal.interactiveshell\nimport IPython.terminal.ipapp\nimport IPython.terminal.magics\nimport IPython.terminal.prompts\nimport IPython.terminal.pt_inputhooks\nimport IPython.terminal.ptutils\nimport IPython.terminal.shortcuts\nimport IPython.terminal.shortcuts.auto_match\nimport IPython.terminal.shortcuts.auto_suggest\nimport IPython.terminal.shortcuts.filters\nimport IPython.testing\nimport IPython.testing.skipdoctest\nimport IPython.utils\nimport IPython.utils.PyColorize\nimport IPython.utils._process_common\nimport IPython.utils._process_posix\nimport IPython.utils._sysinfo\nimport IPython.utils.capture\nimport IPython.utils.contexts\nimport IPython.utils.data\nimport IPython.utils.decorators\nimport IPython.utils.dir2\nimport IPython.utils.docs\nimport IPython.utils.encoding\nimport IPython.utils.frame\nimport IPython.utils.generics\nimport IPython.utils.importstring\nimport IPython.utils.io\nimport IPython.utils.ipstruct\nimport IPython.utils.module_paths\nimport IPython.utils.openpy\nimport IPython.utils.path\nimport IPython.utils.process\nimport IPython.utils.py3compat\nimport IPython.utils.sentinel\nimport IPython.utils.strdispatch\nimport IPython.utils.sysinfo\nimport IPython.utils.syspathcontext\nimport IPython.utils.terminal\nimport IPython.utils.text\nimport IPython.utils.timing\nimport IPython.utils.tokenutil\nimport IPython.utils.wildcard\nimport collections\nimport collections.abc\nimport effectful\nimport effectful.handlers\nimport effectful.handlers.llm\nimport effectful.handlers.llm.completions\nimport effectful.handlers.llm.encoding\nimport effectful.handlers.llm.evaluation\nimport effectful.handlers.llm.template\nimport effectful.internals\nimport effectful.internals.runtime\nimport effectful.internals.unification\nimport effectful.ops\nimport effectful.ops.semantics\nimport effectful.ops.syntax\nimport effectful.ops.types\nimport pathlib\nIn: collections.abc.MutableSequence[str]\nOut: dict\n_: str\n__: str\n___: str\n__vsc_ipynb_file__: str\n_dh: collections.abc.MutableSequence[pathlib.PosixPath]\n_i: str\n_i1: str\n_i2: str\n_i3: str\n_i4: str\n_i5: str\n_i6: str\n_i7: str\n_ih: collections.abc.MutableSequence[str]\n_ii: str\n_iii: str\n_oh: dict\nchar: str\ncount_a: collections.abc.Callable[[str], int]\ncount_char: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\ncount_char_with_doctest: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\nexit: IPython.core.autocall.ZMQExitAutocall\nhaiku: collections.abc.Callable[[str], str]\nhaiku_no_cache: effectful.ops.types.Operation[[str], str]\nlimerick: effectful.ops.types.Operation[[str], str]\nprimes: effectful.ops.types.Operation[[int], int]\nprovider: effectful.ops.types.Interpretation\nprovider_cached: effectful.ops.types.Interpretation\nquit: IPython.core.autocall.ZMQExitAutocall\n\ndef count_char(s: str) -> int:\n \"\"\"\n Count the occurrences of the character 'a' in the given string.\n\n Args:\n s (str): The string in which to count occurrences of 'a'.\n\n Returns:\n int: The number of times 'a' appears in the string.\n\n Examples:\n >>> count_char('banana')\n 3\n >>> count_char('apple')\n 1\n >>> count_char('cherry')\n 0\n \"\"\"\n return s.count('a')\n_synthesized_check: collections.abc.Callable[[str], int] = count_char. Please provide a valid response and try again." + ] + } + ], + "source": [ + "@Template.define\n", + "def count_char_with_doctest(char: str) -> Callable[[str], int]:\n", + " \"\"\"Write a function named count_char that counts the occurrances of '{char}'.\n", + " Do not use any tools.\n", + " \"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "with handler(provider), handler(UnsafeEvalProvider()), handler(DoctestHandler()):\n", + " count_a = count_char_with_doctest(\"a\")\n", + " assert count_a(\"banana\") == 3" + ] + }, + { + "cell_type": "markdown", + "id": "991ee445", + "metadata": {}, + "source": [ + "## Tool Calling\n", + "\n", + "`Operation`s defined in the lexical scope of a `Template` are automatically available for the LLM to call as tools. The description of these operations is inferred from their type annotations and docstrings.\n", + "\n", + "Tool calls are mediated by a helper operation `tool_call`. Handling this operation allows tool use to be tracked or logged." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66711301", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Based on the weather conditions:\n", + "\n", + "- **Chicago** is currently cold.\n", + "- **New York** is currently wet.\n", + "- **Barcelona** is currently sunny.\n", + "\n", + "I suggest **Barcelona** as the city with good weather.\n" + ] + } + ], + "source": [ + "@Tool.define\n", + "def cities() -> list[str]:\n", + " \"\"\"Return a list of cities that can be passed to `weather`.\"\"\"\n", + " return [\"Chicago\", \"New York\", \"Barcelona\"]\n", + "\n", + "\n", + "@Tool.define\n", + "def weather(city: str) -> str:\n", + " \"\"\"Given a city name, return a description of the weather in that city.\"\"\"\n", + " status = {\"Chicago\": \"cold\", \"New York\": \"wet\", \"Barcelona\": \"sunny\"}\n", + " return status.get(city, \"unknown\")\n", + "\n", + "\n", + "@Template.define # cities and weather auto-captured from lexical scope\n", + "def vacation() -> str:\n", + " \"\"\"Use the provided tools to suggest a city that has good weather. Use only the `cities` and `weather` tools provided.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "with handler(provider):\n", + " print(vacation())" + ] + }, + { + "cell_type": "markdown", + "id": "59584a54", + "metadata": {}, + "source": [ + "## Image Inputs\n", + "\n", + "You can pass `PIL.Image.Image` values directly to templates." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89992702", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\"Example" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "A simple smiley face with a yellow background, featuring two black dots for eyes and a curved line for a mouth, typically used to convey happiness or friendliness.\n" + ] + } + ], + "source": [ + "image_base64 = (\n", + " \"iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAhElEQVR4nO2W4QqA\"\n", + " \"MAiEVXr/VzYWDGoMdk7Cgrt/sUs/DqZTd3EplFU2JwATYAJMoOlAB4bq89s95+Mg\"\n", + " \"+gyAchsKAYplBBBA43hFhfxnUixDjdEUUL8hpr7R0KLdt9qElzcyiu8As+Kr8zQA\"\n", + " \"mgLavAl+kIzFZyCRxtsAmWb/voZvqRzgBE1sIDuVFX4eAAAAAElFTkSuQmCC\"\n", + ")\n", + "image = Image.open(io.BytesIO(base64.b64decode(image_base64)))\n", + "\n", + "\n", + "@Template.define\n", + "def describe_image(image: Image.Image) -> str:\n", + " \"\"\"Return a short description of the following image.\n", + " {image}\n", + " \"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "with handler(provider):\n", + " display(\n", + " HTML(\n", + " f'\"Example'\n", + " )\n", + " )\n", + " print(describe_image(image))" + ] + }, + { + "cell_type": "markdown", + "id": "3d221feb", + "metadata": {}, + "source": [ + "## Structured Output Generation\n", + "\n", + "Constrained generation is used for any type that is convertible to a Pydantic model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17668ac8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> You are onstage at a comedy club. You tell the following joke:\n", + "Knock knock.\n", + "Who's there?\n", + "Lizard.\n", + "Lizard who?\n", + "Lizard who? Lizard you wonder, there's a gecko at your door!\n", + "> The crowd laughs politely.\n" + ] + } + ], + "source": [ + "@dataclasses.dataclass\n", + "class KnockKnockJoke:\n", + " whos_there: str\n", + " punchline: str\n", + "\n", + "\n", + "@Template.define\n", + "def write_joke(theme: str) -> KnockKnockJoke:\n", + " \"\"\"Write a knock-knock joke on the theme of {theme}. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "@Template.define\n", + "def rate_joke(joke: KnockKnockJoke) -> bool:\n", + " \"\"\"Decide if {joke} is funny or not. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "def do_comedy():\n", + " joke = write_joke(\"lizards\")\n", + " print(\"> You are onstage at a comedy club. You tell the following joke:\")\n", + " print(\n", + " f\"Knock knock.\\nWho's there?\\n{joke.whos_there}.\\n{joke.whos_there} who?\\n{joke.punchline}\"\n", + " )\n", + " if rate_joke(joke):\n", + " print(\"> The crowd laughs politely.\")\n", + " else:\n", + " print(\"> The crowd stares in stony silence.\")\n", + "\n", + "\n", + "with handler(provider):\n", + " do_comedy()" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Sub-templates available to write_story: dict_keys(['describe_image', 'draw_simple_icon', 'limerick', 'haiku_no_cache', 'primes', 'count_char', 'cities', 'weather', 'vacation', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny', 'write_story', 'unstable_service', 'fetch_data', 'give_rating_for_movie', 'write_chapter', 'judge_chapter'])\n", - "=== Story with moral ===\n", - "def create_moral_story(topic: str) -> str:\n", - " # Start with an introduction and establish the story theme\n", - " chapter_1 = write_chapter(1, f\"Introduction to {topic}\")\n", - " \n", - " # Develop the plot with a challenge or situation related to the topic\n", - " chapter_2 = write_chapter(2, f\"The Challenge of {topic}\")\n", - " \n", - " # Introduce a turning point or decision-making moment involving the topic\n", - " chapter_3 = write_chapter(3, f\"Decisions and Consequences of {topic}\")\n", - " \n", - " # Conclusion wrapping up the story and highlighting the moral\n", - " chapter_4 = write_chapter(4, f\"Moral and Lessons of {topic}\")\n", - " \n", - " # Combine all chapters into one coherent story\n", - " full_story = \"\\n\\n\".join([chapter_1, chapter_2, chapter_3, chapter_4])\n", - " \n", - " return full_story\n", - "**Title: The Journey of One**\n", - "\n", - "In a land far beyond imagination, where numbers were not just symbols but beings with feelings and desires, there was a little number known as One. Though often underestimated, One had a dream larger than any universe: to find its true purpose.\n", - "\n", - "One was simple, yet unique. It started each day by enjoying the sunrise, counting the seconds in silent appreciation of the continuum of time. Yet, in the grand tapestry of numbers, One felt ordinary and unnoticed, especially among the grandiosity of larger numbers like Millions and Billions, which often boasted about their size and importance.\n", - "\n", - "One day, O came across Zero, a soft-spoken and kind companion, often seen lingering in the shadows of others. \"Why do you look so glum, One?\" asked Zero as they both watched the twinkling stars above.\n", - "\n", - "\"I feel small in a world full of giants. What significance do I hold when everyone seems to multiply and magnify everything far beyond my own capacity?\"\n", - "\n", - "Zero smiled softly. \"You must explore, dear One. For you might be small, but with the right touch, you can change everything. You are the beginning of dreams, the spark that starts a continuum.\"\n", - "\n", - "Taking this advice to heart, One embarked on a journey to discover its true power and potential. As it wandered through the Land of Mathematics, it met Addition, the kind-hearted magician, who taught One how it could transform nothing into something, just by joining in a dance.\n", - "\n", - "With Multiplication, One learned coordination and rhythm, expanding its influence exponentially with a simple step forward. There were days spent in the Company of Fractions, shrinking itself to explore the depth of intricacy plus seeing life from a new perspective.\n", - "\n", - "Finally, it found itself near the great figure of Unity, where all numbers whether large or small, participated in harmony. Here, One discovered its greatest potential—to bring completeness. When used wisely, One could complete a perfect circle or spell disaster if miscalculated.\n", - "\n", - "In its quest, One realized its strength was simplicity itself. As small as it was, it was the foundation upon which countless worlds depended. Without One, there was nothing to start; no Number Line, no Life Progression.\n", - "\n", - "And so, One returned to its place in the universe, no longer ordinary but extraordinary in its ability to bring beginnings.\n", - "\n", - "Thus, the moral of the story: No matter how small or insignificant you feel, remember that you have the power to change everything. You are the first step in your journey and those of others. Embrace your role and start with conviction, for you are One. \n", - "\n", - "And sometimes, that's all you need to be remarkable.\n", - "\n", - "Once upon a time in the quaint village of Digiton, nestled in the Valley of Numerals, lived the number 2. In this village, each number had their unique talents and ways to contribute to the community. Number 2 was known for its ability to find wonderful pairings and create harmony.\n", - "\n", - "It was a bright, sunny morning when 2 decided it was time to plan the grand Numerals Gala, an event celebrated by all numbers from 1 to 9. This year's theme was \"Unity in Pairs,\" and 2 took the responsibility seriously.\n", - "\n", - "With a checklist in hand, 2 began to organize the event. First, 2 visited its oldest friend, the number 1. \"Would you be one half of a winning pair, dear friend?\" 2 asked. \"Of course,\" replied 1, \"together we make the perfect pair of Unity, everyone knows!\"\n", - "\n", - "Next, 2 approached the number 3. Though sometimes perceived as a little off-kilter, 3 was eager to join and suggested pairing with 4 to symbolize growth and progression: 3 plus 4 always added up to 7—a lucky number for all.\n", - "\n", - "Eager to ensure everyone was included, 2 made a special stop at number 5's cheerful blue cottage. \"5, would you create a bridge with me?\" 2 proposed. \"Together we form \"7\", the lucky charm—how can I resist?\" giggled 5.\n", - "\n", - "Day by day, the excitement in Digiton grew. Numbers periodically gathered in the square to rehearse their speeches and musical acts. Finally, the day of the Gala arrived, and pairs paraded on stage, highlighting unity through their performances. The pairing of 6 and 7 showcased a dance of luck and prosperity, while 8 and 9 painted visions of a dreamy future.\n", - "\n", - "As everyone settled down for the final speech, number 2 took the stage, its heart full of joy. \"Dear friends,\" 2 began, \"thank you for showing us the beauty of partnership. Alone, each of us is a number, but together, we build the world. Let us remember that two is a bond that shows love, loyalty, and peace.\"\n", - "\n", - "With a warm round of applause, the Gala concluded, but in their hearts, every number knew that it was 2's thoughtful pairing that showed them the profound harmony within.\n", - "\n", - "And so, 2's legacy in Digiton was etched as a gentle, powerful reminder that the most meaningful journeys are those taken with another by your side. Such was the wisdom of number 2.\n", - "\n", - "Once upon a time, nestled in the quiet and serene landscape of Numerland, there was a unique and charismatic number named \"Three.\" Unlike the other numbers, Three was proudly quirky and adventurous. Sporting three shining points, he dazzled with a triangular shape that made him quite distinctive among his peers.\n", - "\n", - "Three lived in the bustling community of Tallytown, a place where numbers came together to form equations, solve problems, and have numerical debates. But Three often felt that Tallytown was too caught up in linear thinking. He liked to think outside the box—or pyramid, in his case.\n", - "\n", - "One sunny day, Three decided to embark on an adventure across the wide fields of positivity. His first stop was Addition Avenue, a lively street where numbers piled atop each other, eagerly building bridges to larger sums. While there, Three met other numbers like Six and Nine, who greeted him warmly.\n", - "\n", - "“Why travel, Three?” asked Six.\n", - "\n", - "\"I'm seeking something more,\" Three replied. \"I feel like there's a whole world of meritorious multiplicities and radiant reciprocals waiting for me!\"\n", - "\n", - "With a friendly nod, Three continued on his journey. He navigated through Subtraction Square, where he learned to appreciate simplicity. As he passed through, Two’s counsel resonated: “Sometimes less is more, Three.”\n", - "\n", - "Eventually, Three found himself at the multipliers' meadow, a wide expanse where numbers did cartwheels, creating exponential wonders. It was here he met Zero, who diffidently warned, \"Multiply with me, and I'll vanish you into nothingness!\"\n", - "\n", - "Three chuckled at the paradox and moved on. He rolled over to Division Dale, where he admired the symmetry of parts and ratios. Three realized he was not just a number but a part of something truly wondrous.\n", - "\n", - "Finally, gazing at the starry skyscape of Infinity Lane, Three discovered his true potential—he was a constant, reliable factor that held significance beyond simple numerical value. Each point of his triangular form seemed to twinkle with this newfound wisdom.\n", - "\n", - "As he made his way back home to Tallytown, Three felt renewed, armed with appreciation for his uniqueness and the harmony between all numbers. He returned not just as Three, but as the representation of balance, creativity, and the beautiful geometric world from which he drew his strength.\n", - "\n", - "And so, in the land of Numerland, Three lived happily, not just a simple integer, but a remarkable journey in and of itself—a point of convergence in a universe of endless possibilities.\n", - "\n", - "**The Tale of Four Friends**\n", - "\n", - "Once upon a time, in the cozy town of Little Numbers, there dwelt a modest fellow known simply as \"4.\" Though he appeared ordinary, 4 was actually quite special. He had three devoted friends: 1, 2, and 3. Together, they formed a dynamic quartet of remarkable adventures.\n", - "\n", - "One bright spring morning, they embarked on a journey to solve the mystery of the Lost Sequence. It was said that the sequence held the secret to solving any mathematical problem, and possessing it would mean endless possibilities.\n", - "\n", - "4, ever confident in his stability, led the group with enthusiasm. \"We can decipher any riddle with our unity,\" he declared, his square-shaped stature conveying authority.\n", - "\n", - "Their first challenge arrived at the Great Divide Canyon, a vast gap that seemed insurmountable. \"Fear not!\" said 2, offering help with her talent for pairing. She balanced 1 on her left and 3 on her right. Effortlessly, they formed a bridge sturdy enough for 4 to cross, leading them all safely to the other side.\n", - "\n", - "The team soon reached the Valley of Equations, where intricate puzzles befuddled passersby. With 4's knack for balance and proportion, they made short work of the conundrums. 1's simplicity, combined with 3's creative approach, solved complex equations, while 2's knack for harmonizing detected patterns invisible to others.\n", - "\n", - "As they journeyed deeper, they encountered the enigma known as the Paradox Terrain. Here, problems that seemed unsolvable loomed ominously. \"Let us remember,\" 4 reminded them, \"that solutions are often nearer than they appear.\"\n", - "\n", - "With a fresh perspective, 3 noticed a pattern: each unsolvable problem required going back to basic principles. By retracing steps, simplifying assumptions, and adding unique insights, they cracked the paradox.\n", - "\n", - "At last, the quartet arrived at the Chamber of the Lost Sequence, where wisdom awaited them. The mystical sequence unveiled itself, revealing the elegance of mathematical harmony, in which each number played a crucial role.\n", - "\n", - "Embracing the sequence, the friends returned to Little Numbers, wiser and more united than ever. Thus, in the camaraderie of 4 and his friends, the town learned a timeless lesson: the greatest strength comes not from singular achievement, but from the harmony of collective unity.\n", - "\n", - "And so, they lived happily and mathematically ever after.\n", - "\n" - ] + "cell_type": "markdown", + "id": "c0003944", + "metadata": {}, + "source": [ + "## Template Composition\n", + "\n", + "Templates defined in the lexical scope are also captured, enabling template composition. One template can use the result of another template in a pipeline:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78a4bf44", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sub-templates available to write_story: dict_keys(['describe_image', 'draw_simple_icon', 'limerick', 'haiku_no_cache', 'primes', 'count_char', 'cities', 'weather', 'vacation', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny'])\n", + "=== Story with moral ===\n", + "\n", + "\n", + "In the case of Whiskers, it was his understanding of this balance that brought him safely home, with both stories and lessons to cherish and share.\n", + "\n", + "=== Funny story ===\n", + "\n", + "\n", + "The End.\n" + ] + } + ], + "source": [ + "# Sub-templates for different story styles\n", + "@Template.define\n", + "def story_with_moral(topic: str) -> str:\n", + " \"\"\"Write a short story about {topic} and end with a moral lesson. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "@Template.define\n", + "def story_funny(topic: str) -> str:\n", + " \"\"\"Write a funny, humorous story about {topic}. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "# Main orchestrator template - has access to sub-templates\n", + "@Template.define\n", + "def write_story(topic: str, style: str) -> str:\n", + " \"\"\"Write a story about {topic} in the style: {style}.\n", + " Available styles: 'moral' for a story with a lesson, 'funny' for humor. Use story_funny for humor, story_with_moral for a story with a lesson.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "# Verify sub-templates are captured in write_story's lexical context\n", + "assert story_with_moral in write_story.tools.values()\n", + "assert story_funny in write_story.tools.values()\n", + "print(\"Sub-templates available to write_story:\", write_story.tools.keys())\n", + "\n", + "with handler(provider):\n", + " print(\"=== Story with moral ===\")\n", + " print(write_story(\"a curious cat\", \"moral\"))\n", + " print()\n", + " print(\"=== Funny story ===\")\n", + " print(write_story(\"a curious cat\", \"funny\"))" + ] + }, + { + "cell_type": "markdown", + "id": "bd25826d", + "metadata": {}, + "source": [ + "## Retrying LLM Requests\n", + "LLM calls can sometimes fail due to transient errors or produce invalid outputs. The `RetryLLMHandler` automatically retries failed template calls and can also surface tool/runtime errors as tool messages:\n", + "\n", + "- `num_retries`: Maximum number of retry attempts (default: 3)\n", + "- `include_traceback`: When `True`, include traceback details in the error feedback (default: False)\n", + "- `catch_tool_errors`: Exception type(s) to catch during tool execution (default: `Exception`)\n" + ] + }, + { + "cell_type": "markdown", + "id": "bafc0a96", + "metadata": {}, + "source": [ + "Example usage: having an unstable service that seldomly fail." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4334d07a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Error: Service unavailable! Attempt 1/3. Please retry.\n", + "Result: The data fetched from the unstable service is: `[1, 2, 3]`. Retries: 3\n" + ] + } + ], + "source": [ + "call_count = 0\n", + "REQUIRED_RETRIES = 3\n", + "\n", + "\n", + "@Tool.define\n", + "def unstable_service() -> str:\n", + " \"\"\"Fetch data from an unstable external service. May require retries.\"\"\"\n", + " global call_count\n", + " call_count += 1\n", + " if call_count < REQUIRED_RETRIES:\n", + " raise ConnectionError(\n", + " f\"Service unavailable! Attempt {call_count}/{REQUIRED_RETRIES}. Please retry.\"\n", + " )\n", + " return \"{ 'status': 'ok', 'data': [1, 2, 3] }\"\n", + "\n", + "\n", + "@Template.define # unstable_service auto-captured from lexical scope\n", + "def fetch_data() -> str:\n", + " \"\"\"Use the unstable_service tool to fetch data.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "with handler(provider):\n", + " try:\n", + " result = fetch_data()\n", + " except Exception as e:\n", + " print(f\"Error: {e}\")\n", + "\n", + "with handler(provider), handler(RetryLLMHandler(num_retries=3)):\n", + " result = fetch_data()\n", + " print(f\"Result: {result}\", \"Retries:\", call_count)" + ] + }, + { + "cell_type": "markdown", + "id": "4ac00e01", + "metadata": {}, + "source": [ + "## Retrying with Validation Errors\n", + "As noted above, the `RetryHandler` can also be used to retry on runtime/validation error:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b2b225", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Error: Error decoding response: 1 validation error for Response\n", + "value.score\n", + " score must be 1–5, got 9 [type=invalid_score, input_value=9, input_type=int]. Please provide a valid response and try again.\n", + "Score: 5/5\n", + "Explanation: Die Hard is widely acclaimed as one of the best action films of all time and earns a perfect score of 5 out of 5. Its success is attributed to a gripping storyline, memorable performances, particularly by Bruce Willis as John McClane, and its innovative approach to action sequences. Its mix of humor, suspense, and holiday-themed backdrop makes it a perennial favorite, cementing its status as a cultural icon.\n" + ] + } + ], + "source": [ + "@pydantic.dataclasses.dataclass\n", + "class Rating:\n", + " score: int\n", + " explanation: str\n", + "\n", + " @field_validator(\"score\")\n", + " @classmethod\n", + " def check_score(cls, v):\n", + " if v < 1 or v > 5:\n", + " raise PydanticCustomError(\n", + " \"invalid_score\",\n", + " \"score must be 1–5, got {v}\",\n", + " {\"v\": v},\n", + " )\n", + " return v\n", + "\n", + " @field_validator(\"explanation\")\n", + " @classmethod\n", + " def check_explanation_contains_score(cls, v, info):\n", + " score = info.data.get(\"score\", None)\n", + " if score is not None and str(score) not in v:\n", + " raise PydanticCustomError(\n", + " \"invalid_explanation\",\n", + " \"explanation must mention the score {score}, got '{explanation}'\",\n", + " {\"score\": score, \"explanation\": v},\n", + " )\n", + " return v\n", + "\n", + "\n", + "@Template.define\n", + "def give_rating_for_movie(movie_name: str) -> Rating:\n", + " \"\"\"Give a rating for {movie_name}. The explanation MUST include the numeric score. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "with handler(provider):\n", + " try:\n", + " rating = give_rating_for_movie(\"Die Hard\")\n", + " except Exception as e:\n", + " print(f\"Error: {e}\")\n", + "\n", + "with handler(provider), handler(RetryLLMHandler(num_retries=3)):\n", + " rating = give_rating_for_movie(\"Die Hard\")\n", + " print(f\"Score: {rating.score}/5\")\n", + " print(f\"Explanation: {rating.explanation}\")" + ] + }, + { + "cell_type": "markdown", + "id": "aec0632c", + "metadata": {}, + "source": [ + "## Generating higher-order functions\n", + "Finally, we can generate higher-order functions that can call templates as well:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d02bc67", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sub-templates available to write_story: dict_keys(['describe_image', 'draw_simple_icon', 'limerick', 'haiku_no_cache', 'primes', 'count_char', 'cities', 'weather', 'vacation', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny', 'write_story', 'unstable_service', 'fetch_data', 'give_rating_for_movie', 'write_chapter', 'judge_chapter'])\n", + "=== Story with moral ===\n", + "def create_moral_story(topic: str) -> str:\n", + " # Start with an introduction and establish the story theme\n", + " chapter_1 = write_chapter(1, f\"Introduction to {topic}\")\n", + " \n", + " # Develop the plot with a challenge or situation related to the topic\n", + " chapter_2 = write_chapter(2, f\"The Challenge of {topic}\")\n", + " \n", + " # Introduce a turning point or decision-making moment involving the topic\n", + " chapter_3 = write_chapter(3, f\"Decisions and Consequences of {topic}\")\n", + " \n", + " # Conclusion wrapping up the story and highlighting the moral\n", + " chapter_4 = write_chapter(4, f\"Moral and Lessons of {topic}\")\n", + " \n", + " # Combine all chapters into one coherent story\n", + " full_story = \"\\n\\n\".join([chapter_1, chapter_2, chapter_3, chapter_4])\n", + " \n", + " return full_story\n", + "**Title: The Journey of One**\n", + "\n", + "In a land far beyond imagination, where numbers were not just symbols but beings with feelings and desires, there was a little number known as One. Though often underestimated, One had a dream larger than any universe: to find its true purpose.\n", + "\n", + "One was simple, yet unique. It started each day by enjoying the sunrise, counting the seconds in silent appreciation of the continuum of time. Yet, in the grand tapestry of numbers, One felt ordinary and unnoticed, especially among the grandiosity of larger numbers like Millions and Billions, which often boasted about their size and importance.\n", + "\n", + "One day, O came across Zero, a soft-spoken and kind companion, often seen lingering in the shadows of others. \"Why do you look so glum, One?\" asked Zero as they both watched the twinkling stars above.\n", + "\n", + "\"I feel small in a world full of giants. What significance do I hold when everyone seems to multiply and magnify everything far beyond my own capacity?\"\n", + "\n", + "Zero smiled softly. \"You must explore, dear One. For you might be small, but with the right touch, you can change everything. You are the beginning of dreams, the spark that starts a continuum.\"\n", + "\n", + "Taking this advice to heart, One embarked on a journey to discover its true power and potential. As it wandered through the Land of Mathematics, it met Addition, the kind-hearted magician, who taught One how it could transform nothing into something, just by joining in a dance.\n", + "\n", + "With Multiplication, One learned coordination and rhythm, expanding its influence exponentially with a simple step forward. There were days spent in the Company of Fractions, shrinking itself to explore the depth of intricacy plus seeing life from a new perspective.\n", + "\n", + "Finally, it found itself near the great figure of Unity, where all numbers whether large or small, participated in harmony. Here, One discovered its greatest potential—to bring completeness. When used wisely, One could complete a perfect circle or spell disaster if miscalculated.\n", + "\n", + "In its quest, One realized its strength was simplicity itself. As small as it was, it was the foundation upon which countless worlds depended. Without One, there was nothing to start; no Number Line, no Life Progression.\n", + "\n", + "And so, One returned to its place in the universe, no longer ordinary but extraordinary in its ability to bring beginnings.\n", + "\n", + "Thus, the moral of the story: No matter how small or insignificant you feel, remember that you have the power to change everything. You are the first step in your journey and those of others. Embrace your role and start with conviction, for you are One. \n", + "\n", + "And sometimes, that's all you need to be remarkable.\n", + "\n", + "Once upon a time in the quaint village of Digiton, nestled in the Valley of Numerals, lived the number 2. In this village, each number had their unique talents and ways to contribute to the community. Number 2 was known for its ability to find wonderful pairings and create harmony.\n", + "\n", + "It was a bright, sunny morning when 2 decided it was time to plan the grand Numerals Gala, an event celebrated by all numbers from 1 to 9. This year's theme was \"Unity in Pairs,\" and 2 took the responsibility seriously.\n", + "\n", + "With a checklist in hand, 2 began to organize the event. First, 2 visited its oldest friend, the number 1. \"Would you be one half of a winning pair, dear friend?\" 2 asked. \"Of course,\" replied 1, \"together we make the perfect pair of Unity, everyone knows!\"\n", + "\n", + "Next, 2 approached the number 3. Though sometimes perceived as a little off-kilter, 3 was eager to join and suggested pairing with 4 to symbolize growth and progression: 3 plus 4 always added up to 7—a lucky number for all.\n", + "\n", + "Eager to ensure everyone was included, 2 made a special stop at number 5's cheerful blue cottage. \"5, would you create a bridge with me?\" 2 proposed. \"Together we form \"7\", the lucky charm—how can I resist?\" giggled 5.\n", + "\n", + "Day by day, the excitement in Digiton grew. Numbers periodically gathered in the square to rehearse their speeches and musical acts. Finally, the day of the Gala arrived, and pairs paraded on stage, highlighting unity through their performances. The pairing of 6 and 7 showcased a dance of luck and prosperity, while 8 and 9 painted visions of a dreamy future.\n", + "\n", + "As everyone settled down for the final speech, number 2 took the stage, its heart full of joy. \"Dear friends,\" 2 began, \"thank you for showing us the beauty of partnership. Alone, each of us is a number, but together, we build the world. Let us remember that two is a bond that shows love, loyalty, and peace.\"\n", + "\n", + "With a warm round of applause, the Gala concluded, but in their hearts, every number knew that it was 2's thoughtful pairing that showed them the profound harmony within.\n", + "\n", + "And so, 2's legacy in Digiton was etched as a gentle, powerful reminder that the most meaningful journeys are those taken with another by your side. Such was the wisdom of number 2.\n", + "\n", + "Once upon a time, nestled in the quiet and serene landscape of Numerland, there was a unique and charismatic number named \"Three.\" Unlike the other numbers, Three was proudly quirky and adventurous. Sporting three shining points, he dazzled with a triangular shape that made him quite distinctive among his peers.\n", + "\n", + "Three lived in the bustling community of Tallytown, a place where numbers came together to form equations, solve problems, and have numerical debates. But Three often felt that Tallytown was too caught up in linear thinking. He liked to think outside the box—or pyramid, in his case.\n", + "\n", + "One sunny day, Three decided to embark on an adventure across the wide fields of positivity. His first stop was Addition Avenue, a lively street where numbers piled atop each other, eagerly building bridges to larger sums. While there, Three met other numbers like Six and Nine, who greeted him warmly.\n", + "\n", + "“Why travel, Three?” asked Six.\n", + "\n", + "\"I'm seeking something more,\" Three replied. \"I feel like there's a whole world of meritorious multiplicities and radiant reciprocals waiting for me!\"\n", + "\n", + "With a friendly nod, Three continued on his journey. He navigated through Subtraction Square, where he learned to appreciate simplicity. As he passed through, Two’s counsel resonated: “Sometimes less is more, Three.”\n", + "\n", + "Eventually, Three found himself at the multipliers' meadow, a wide expanse where numbers did cartwheels, creating exponential wonders. It was here he met Zero, who diffidently warned, \"Multiply with me, and I'll vanish you into nothingness!\"\n", + "\n", + "Three chuckled at the paradox and moved on. He rolled over to Division Dale, where he admired the symmetry of parts and ratios. Three realized he was not just a number but a part of something truly wondrous.\n", + "\n", + "Finally, gazing at the starry skyscape of Infinity Lane, Three discovered his true potential—he was a constant, reliable factor that held significance beyond simple numerical value. Each point of his triangular form seemed to twinkle with this newfound wisdom.\n", + "\n", + "As he made his way back home to Tallytown, Three felt renewed, armed with appreciation for his uniqueness and the harmony between all numbers. He returned not just as Three, but as the representation of balance, creativity, and the beautiful geometric world from which he drew his strength.\n", + "\n", + "And so, in the land of Numerland, Three lived happily, not just a simple integer, but a remarkable journey in and of itself—a point of convergence in a universe of endless possibilities.\n", + "\n", + "**The Tale of Four Friends**\n", + "\n", + "Once upon a time, in the cozy town of Little Numbers, there dwelt a modest fellow known simply as \"4.\" Though he appeared ordinary, 4 was actually quite special. He had three devoted friends: 1, 2, and 3. Together, they formed a dynamic quartet of remarkable adventures.\n", + "\n", + "One bright spring morning, they embarked on a journey to solve the mystery of the Lost Sequence. It was said that the sequence held the secret to solving any mathematical problem, and possessing it would mean endless possibilities.\n", + "\n", + "4, ever confident in his stability, led the group with enthusiasm. \"We can decipher any riddle with our unity,\" he declared, his square-shaped stature conveying authority.\n", + "\n", + "Their first challenge arrived at the Great Divide Canyon, a vast gap that seemed insurmountable. \"Fear not!\" said 2, offering help with her talent for pairing. She balanced 1 on her left and 3 on her right. Effortlessly, they formed a bridge sturdy enough for 4 to cross, leading them all safely to the other side.\n", + "\n", + "The team soon reached the Valley of Equations, where intricate puzzles befuddled passersby. With 4's knack for balance and proportion, they made short work of the conundrums. 1's simplicity, combined with 3's creative approach, solved complex equations, while 2's knack for harmonizing detected patterns invisible to others.\n", + "\n", + "As they journeyed deeper, they encountered the enigma known as the Paradox Terrain. Here, problems that seemed unsolvable loomed ominously. \"Let us remember,\" 4 reminded them, \"that solutions are often nearer than they appear.\"\n", + "\n", + "With a fresh perspective, 3 noticed a pattern: each unsolvable problem required going back to basic principles. By retracing steps, simplifying assumptions, and adding unique insights, they cracked the paradox.\n", + "\n", + "At last, the quartet arrived at the Chamber of the Lost Sequence, where wisdom awaited them. The mystical sequence unveiled itself, revealing the elegance of mathematical harmony, in which each number played a crucial role.\n", + "\n", + "Embracing the sequence, the friends returned to Little Numbers, wiser and more united than ever. Thus, in the camaraderie of 4 and his friends, the town learned a timeless lesson: the greatest strength comes not from singular achievement, but from the harmony of collective unity.\n", + "\n", + "And so, they lived happily and mathematically ever after.\n", + "\n" + ] + } + ], + "source": [ + "# Sub-templates for different story styles\n", + "@Template.define\n", + "def write_chapter(chapter_number: int, chapter_name: str) -> str:\n", + " \"\"\"Write a short story about {chapter_number}. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "@Template.define\n", + "def judge_chapter(story_so_far: str, chapter_number: int) -> bool:\n", + " \"\"\"Decide if the new chapter is coherence with the story so far. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "# Main orchestrator template - has access to sub-templates\n", + "@Template.define\n", + "def write_multi_chapter_story(style: Literal[\"moral\", \"funny\"]) -> Callable[[str], str]:\n", + " \"\"\"Generate a function that writes a story in style: {style} about the given topic.\n", + "\n", + " The program can use helper functions defined elsewhere (DO NOT REDEFINE THEM):\n", + " - write_chapter(chapter_number: int, chapter_name: str) -> str\n", + " - judge_chapter(story_so_far: str, chapter_number: int) -> bool\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "# Verify sub-templates are captured in write_story's lexical context\n", + "print(\"Sub-templates available to write_story:\", write_multi_chapter_story.tools.keys())\n", + "\n", + "with (\n", + " handler(RetryLLMHandler(num_retries=3)),\n", + " handler(provider),\n", + " handler(UnsafeEvalProvider()),\n", + "):\n", + " print(\"=== Story with moral ===\")\n", + " function_that_writes_story = write_multi_chapter_story(\"moral\")\n", + " print(inspect.getsource(function_that_writes_story))\n", + " print(function_that_writes_story(\"a curious cat\"))\n", + " print()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" } - ], - "source": [ - "# Sub-templates for different story styles\n", - "@Template.define\n", - "def write_chapter(chapter_number: int, chapter_name: str) -> str:\n", - " \"\"\"Write a short story about {chapter_number}. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "@Template.define\n", - "def judge_chapter(story_so_far: str, chapter_number: int) -> bool:\n", - " \"\"\"Decide if the new chapter is coherence with the story so far. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "# Main orchestrator template - has access to sub-templates\n", - "@Template.define\n", - "def write_multi_chapter_story(style: Literal[\"moral\", \"funny\"]) -> Callable[[str], str]:\n", - " \"\"\"Generate a function that writes a story in style: {style} about the given topic.\n", - "\n", - " The program can use helper functions defined elsewhere (DO NOT REDEFINE THEM):\n", - " - write_chapter(chapter_number: int, chapter_name: str) -> str\n", - " - judge_chapter(story_so_far: str, chapter_number: int) -> bool\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "# Verify sub-templates are captured in write_story's lexical context\n", - "print(\"Sub-templates available to write_story:\", write_multi_chapter_story.tools.keys())\n", - "\n", - "with (\n", - " handler(RetryLLMHandler(num_retries=3)),\n", - " handler(provider),\n", - " handler(UnsafeEvalProvider()),\n", - "):\n", - " print(\"=== Story with moral ===\")\n", - " function_that_writes_story = write_multi_chapter_story(\"moral\")\n", - " print(inspect.getsource(function_that_writes_story))\n", - " print(function_that_writes_story(\"a curious cat\"))\n", - " print()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/effectful/handlers/llm/encoding.py b/effectful/handlers/llm/encoding.py index 66edf6ad..abab04b2 100644 --- a/effectful/handlers/llm/encoding.py +++ b/effectful/handlers/llm/encoding.py @@ -475,6 +475,11 @@ def decode(self, encoded_value: SynthesizedFunction) -> Callable: # Validate signature from runtime callable after execution _validate_signature_callable(result, self.expected_params, self.expected_return) + # Run doctests from the original template docstring (if any) + module_obj = types.ModuleType(filename) + module_obj.__dict__.update(g) + evaluation.test(module_obj, module_obj.__dict__) + return result def serialize( diff --git a/effectful/handlers/llm/evaluation.py b/effectful/handlers/llm/evaluation.py index 2317e1a8..90fc4e52 100644 --- a/effectful/handlers/llm/evaluation.py +++ b/effectful/handlers/llm/evaluation.py @@ -1,9 +1,11 @@ import ast import builtins import collections.abc +import doctest import inspect import linecache import sys +import textwrap import types import typing from collections.abc import Mapping @@ -20,7 +22,9 @@ safe_globals, ) +from effectful.handlers.llm.template import Template from effectful.internals.unification import nested_type +from effectful.ops.semantics import fwd from effectful.ops.syntax import ObjectInterpretation, defop, implements from effectful.ops.types import Operation @@ -92,6 +96,34 @@ def exec( ) +@defop +def doctest_check(obj: object, ctx: typing.Mapping[str, Any]) -> None: + """ + Run doctests for an object under the given context. + + obj: The object whose doctests should be tested. + ctx: The namespace used to run doctest examples. + + Returns None, raises TypeError on doctest failure. + """ + raise NotImplementedError( + "An eval provider must be installed in order to run doctests." + ) + + +@defop +def test(obj: object, ctx: typing.Mapping[str, Any]) -> None: + """ + Run doctests for a synthesized program using the current doctest stack. + + obj: The synthesized module object. + ctx: The namespace used to run doctest examples. + """ + raise NotImplementedError( + "A doctest handler must be installed in order to run doctests." + ) + + # Type checking implementation def type_to_ast(typ: Any) -> ast.expr: """Convert a Python type to an AST expression. @@ -579,6 +611,83 @@ def mypy_type_check( return None +def _run_doctests(obj: object, ctx: typing.Mapping[str, Any]) -> None: + name = getattr(obj, "__name__", obj.__class__.__name__) + globs = dict(ctx) + finder = doctest.DocTestFinder(exclude_empty=True) + if isinstance(obj, types.ModuleType): + tests = finder.find(obj, name=name, globs=globs, module=False) + else: + tests = finder.find(obj, name=name, globs=globs) + if not tests: + return + + output: list[str] = [] + runner = doctest.DocTestRunner(verbose=False) + for test in tests: + runner.run(test, out=output.append) + results = runner.summarize(verbose=False) + if results.failed: + report = "".join(output).strip() + if not report: + report = ( + f"{results.failed} doctest(s) failed " + f"out of {results.attempted} attempted." + ) + raise TypeError(f"doctest failed:\n{report}") + + +class DoctestHandler(ObjectInterpretation): + """Collect doctests from templates and run them on synthesis results.""" + + _doctest_stack: list[str] + + def __init__(self): + self._doctest_stack = [] + + @implements(Template.__apply__) + def _capture_doctest[**P, T]( + self, template: Template[P, T], *args: P.args, **kwargs: P.kwargs + ) -> T: + bound_args = inspect.signature(template).bind(*args, **kwargs) + bound_args.apply_defaults() + env = template.__context__.new_child(bound_args.arguments) + doctest_source = textwrap.dedent(template.__prompt_template__).format_map(env) + self._doctest_stack.append(doctest_source) + return fwd() + + @implements(test) + def _run_from_stack(self, obj: object, ctx: typing.Mapping[str, Any]) -> None: + if not self._doctest_stack: + return + doctest_source = self._doctest_stack.pop() + if not doctest_source.strip(): + return + globs = dict(ctx) + parser = doctest.DocTestParser() + test_case = parser.get_doctest( + doctest_source, + globs, + name=f"{getattr(obj, '__name__', obj.__class__.__name__)}.__template_doctest__", + filename=None, + lineno=0, + ) + if not test_case.examples: + return + output: list[str] = [] + runner = doctest.DocTestRunner(verbose=False) + runner.run(test_case, out=output.append) + results = runner.summarize(verbose=False) + if results.failed: + report = "".join(output).strip() + if not report: + report = ( + f"{results.failed} doctest(s) failed " + f"out of {results.attempted} attempted." + ) + raise TypeError(f"doctest failed:\n{report}") + + # Eval Providers @@ -596,6 +705,10 @@ def type_check( ) -> None: mypy_type_check(module, ctx, expected_params, expected_return) + @implements(doctest_check) + def doctest_check(self, obj: object, ctx: typing.Mapping[str, Any]) -> None: + _run_doctests(obj, ctx) + @implements(parse) def parse(self, source: str, filename: str) -> ast.Module: # Cache source under `filename` so inspect.getsource() can retrieve it later. @@ -656,6 +769,10 @@ def type_check( ) -> None: mypy_type_check(module, ctx, expected_params, expected_return) + @implements(doctest_check) + def doctest_check(self, obj: object, ctx: typing.Mapping[str, Any]) -> None: + _run_doctests(obj, ctx) + @implements(parse) def parse(self, source: str, filename: str) -> ast.Module: # Keep inspect.getsource() working for dynamically-defined objects. diff --git a/tests/test_handlers_llm_encoding.py b/tests/test_handlers_llm_encoding.py index f024e2b3..8daea4ed 100644 --- a/tests/test_handlers_llm_encoding.py +++ b/tests/test_handlers_llm_encoding.py @@ -10,7 +10,11 @@ from RestrictedPython import RestrictingNodeTransformer from effectful.handlers.llm.encoding import Encodable, SynthesizedFunction -from effectful.handlers.llm.evaluation import RestrictedEvalProvider, UnsafeEvalProvider +from effectful.handlers.llm.evaluation import ( + DoctestHandler, + RestrictedEvalProvider, + UnsafeEvalProvider, +) from effectful.ops.semantics import handler from effectful.ops.types import Operation, Term @@ -747,7 +751,10 @@ def add(a: int, b: int) -> int: assert "def add" in encoded.module_code assert "return a + b" in encoded.module_code - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): decoded = encodable.decode(encoded) assert callable(decoded) assert decoded(2, 3) == 5 @@ -762,7 +769,10 @@ def test_decode_with_ellipsis_params(self, eval_provider): func_source = SynthesizedFunction( module_code="def double(x) -> int:\n return x * 2" ) - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): decoded = encodable.decode(func_source) assert callable(decoded) assert decoded(5) == 10 @@ -776,7 +786,10 @@ def test_decode_with_env(self, eval_provider): return x * factor""" ) - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): decoded = encodable.decode(source) assert callable(decoded) assert decoded(4) == 12 @@ -816,7 +829,10 @@ def test_decode_no_function_at_end_raises(self, eval_provider): with pytest.raises( ValueError, match="last statement to be a function definition" ): - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): encodable.decode(source) @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS) @@ -830,7 +846,10 @@ def test_decode_multiple_functions_uses_last(self, eval_provider): def bar() -> int: return 2""" ) - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): decoded = encodable.decode(source) assert callable(decoded) assert decoded.__name__ == "bar" @@ -852,7 +871,10 @@ def greet(self): with pytest.raises( ValueError, match="last statement to be a function definition" ): - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): encodable.decode(source) @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS) @@ -861,7 +883,10 @@ def greet(name: str) -> str: return f"Hello, {name}!" encodable = Encodable.define(Callable[[str], str], {}) - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): encoded = encodable.encode(greet) decoded = encodable.decode(encoded) @@ -901,7 +926,10 @@ def test_decode_validates_last_statement(self, eval_provider): with pytest.raises( ValueError, match="last statement to be a function definition" ): - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): encodable.decode(source) def test_typed_callable_includes_signature_in_docstring(self): @@ -921,7 +949,10 @@ def test_typed_callable_validates_param_count(self, eval_provider): return a""" ) with pytest.raises(ValueError, match="expected function with 2 parameters"): - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): encodable.decode(source) @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS) @@ -934,7 +965,10 @@ def test_typed_callable_validates_return_type(self, eval_provider): return str(a + b)""" ) with pytest.raises(TypeError, match="Incompatible types in assignment"): - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): encodable.decode(source) @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS) @@ -950,7 +984,10 @@ def test_typed_callable_requires_return_annotation(self, eval_provider): ValueError, match="requires synthesized function to have a return type annotation", ): - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): encodable.decode(source) @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS) @@ -962,7 +999,10 @@ def test_typed_callable_accepts_correct_signature(self, eval_provider): module_code="""def add(a: int, b: int) -> int: return a + b""" ) - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): result = encodable.decode(source) assert callable(result) assert result(2, 3) == 5 @@ -1002,7 +1042,10 @@ def test_ellipsis_callable_skips_param_validation(self, eval_provider): module_code="""def anything(a, b, c, d, e) -> int: return 42""" ) - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): result = encodable.decode(source) assert callable(result) assert result(1, 2, 3, 4, 5) == 42 @@ -1042,7 +1085,10 @@ def test_validates_param_count_via_ast(self, eval_provider): return a + b + c""" ) with pytest.raises(ValueError, match="expected function with 2 parameters"): - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): encodable.decode(source) @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS) @@ -1056,7 +1102,10 @@ def test_validates_param_count_zero_params(self, eval_provider): return x""" ) with pytest.raises(ValueError, match="expected function with 0 parameters"): - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): encodable.decode(source) @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS) @@ -1068,7 +1117,10 @@ def test_validates_accepts_zero_params(self, eval_provider): module_code="""def get_value() -> int: return 42""" ) - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): result = encodable.decode(source) assert callable(result) assert result() == 42 @@ -1092,7 +1144,10 @@ def test_ellipsis_callable_validates_return_type(self, eval_provider): return "wrong type\"""" ) with pytest.raises(TypeError, match="Incompatible types in assignment"): - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): encodable.decode(source) @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS) @@ -1103,7 +1158,10 @@ def test_callable_with_single_param(self, eval_provider): module_code="""def count_chars(s: str) -> int: return len(s)""" ) - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): result = encodable.decode(source) assert callable(result) assert result("hello") == 5 @@ -1116,7 +1174,10 @@ def test_callable_with_many_params(self, eval_provider): module_code="""def sum_four(a: int, b: int, c: int, d: int) -> int: return a + b + c + d""" ) - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): result = encodable.decode(source) assert callable(result) assert result(1, 2, 3, 4) == 10 @@ -1129,7 +1190,10 @@ def test_callable_with_bool_return(self, eval_provider): module_code="""def is_positive(x: int) -> bool: return x > 0""" ) - with handler(eval_provider): + with ( + handler(eval_provider), + handler(DoctestHandler()), + ): result = encodable.decode(source) assert callable(result) assert result(5) is True @@ -1167,7 +1231,10 @@ def test_restricted_blocks_private_attribute_access(self): ) # Should raise due to restricted attribute access with pytest.raises(Exception): # Could be NameError or AttributeError - with handler(RestrictedEvalProvider()): + with ( + handler(RestrictedEvalProvider()), + handler(DoctestHandler()), + ): fn = encodable.decode(source) fn("test") @@ -1208,7 +1275,10 @@ def test_builtins_in_env_does_not_bypass_security(self): return open(path).read()""" ) with pytest.raises(Exception): # Could be NameError, ValueError, or other - with handler(RestrictedEvalProvider()): + with ( + handler(RestrictedEvalProvider()), + handler(DoctestHandler()), + ): fn = encodable_open.decode(source_open) # If decode succeeded (shouldn't), calling should still fail fn("/etc/passwd") @@ -1221,7 +1291,10 @@ def test_builtins_in_env_does_not_bypass_security(self): return os.name""" ) with pytest.raises(Exception): - with handler(RestrictedEvalProvider()): + with ( + handler(RestrictedEvalProvider()), + handler(DoctestHandler()), + ): fn = encodable_import.decode(source_import) fn() @@ -1232,7 +1305,10 @@ def test_builtins_in_env_does_not_bypass_security(self): module_code="""def add(a: int, b: int) -> int: return a + b""" ) - with handler(RestrictedEvalProvider()): + with ( + handler(RestrictedEvalProvider()), + handler(DoctestHandler()), + ): fn = encodable_safe.decode(source_safe) assert fn(2, 3) == 5, "Safe code should still work" @@ -1243,6 +1319,9 @@ def test_builtins_in_env_does_not_bypass_security(self): return s.__class__.__name__""" ) with pytest.raises(Exception): - with handler(RestrictedEvalProvider()): + with ( + handler(RestrictedEvalProvider()), + handler(DoctestHandler()), + ): fn = encodable_private.decode(source_private) fn("test") diff --git a/tests/test_handlers_llm_provider.py b/tests/test_handlers_llm_provider.py index 1619ef9a..2028396a 100644 --- a/tests/test_handlers_llm_provider.py +++ b/tests/test_handlers_llm_provider.py @@ -36,7 +36,7 @@ get_message_sequence, ) from effectful.handlers.llm.encoding import Encodable, SynthesizedFunction -from effectful.handlers.llm.evaluation import UnsafeEvalProvider +from effectful.handlers.llm.evaluation import DoctestHandler, UnsafeEvalProvider from effectful.ops.semantics import fwd, handler from effectful.ops.syntax import ObjectInterpretation, implements from effectful.ops.types import NotHandled @@ -1083,6 +1083,20 @@ def synthesize_counter(char: str) -> Callable[[str], int]: raise NotHandled +@Template.define +def synthesize_counter_with_doctest(char: str) -> Callable[[str], int]: + """Generate a Python function named count_char that counts occurrences of the character '{char}' + in a given input string. + + The function should be case-sensitive. + + Examples: + >>> count_char("banana") + 4 + """ + raise NotHandled + + @Template.define def synthesize_is_even() -> Callable[[int], bool]: """Generate a Python function that checks if a number is even. @@ -1109,6 +1123,7 @@ def test_synthesize_adder_function(self, request): with ( handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")), handler(UnsafeEvalProvider()), + handler(DoctestHandler()), handler(LimitLLMCallsHandler(max_calls=1)), ): add_func = synthesize_adder() @@ -1125,6 +1140,7 @@ def test_synthesize_string_processor(self, request): with ( handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")), handler(UnsafeEvalProvider()), + handler(DoctestHandler()), handler(LimitLLMCallsHandler(max_calls=1)), ): process_func = synthesize_string_processor() @@ -1141,6 +1157,7 @@ def test_synthesize_counter_with_parameter(self, request): with ( handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")), handler(UnsafeEvalProvider()), + handler(DoctestHandler()), handler(LimitLLMCallsHandler(max_calls=3)), ): count_a = synthesize_counter("a") @@ -1151,6 +1168,18 @@ def test_synthesize_counter_with_parameter(self, request): assert count_a("aardvark") == 3 assert count_a("AAA") == 0 # case-sensitive + @requires_openai + def test_synthesized_doctest_runs(self, request): + """Test that doctests run for synthesized functions.""" + with ( + handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")), + handler(UnsafeEvalProvider()), + handler(DoctestHandler()), + handler(LimitLLMCallsHandler(max_calls=1)), + ): + with pytest.raises(ResultDecodingError, match="doctest failed"): + synthesize_counter_with_doctest("a") + @requires_openai def test_callable_type_signature_in_schema(self, request): """Test that the callable type signature is communicated to the LLM.""" @@ -1171,6 +1200,7 @@ def test_synthesized_function_roundtrip(self, request): with ( handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")), handler(UnsafeEvalProvider()), + handler(DoctestHandler()), handler(LimitLLMCallsHandler(max_calls=1)), ): # Synthesize a function @@ -1195,6 +1225,7 @@ def test_synthesize_bool_return_type(self, request): with ( handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")), handler(UnsafeEvalProvider()), + handler(DoctestHandler()), handler(LimitLLMCallsHandler(max_calls=1)), ): is_even = synthesize_is_even() @@ -1217,6 +1248,7 @@ def test_synthesize_three_params(self, request): with ( handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")), handler(UnsafeEvalProvider()), + handler(DoctestHandler()), handler(LimitLLMCallsHandler(max_calls=1)), ): multiply_three = synthesize_three_param_func() From d0bdbeff860c5857b01532b56f3a7d47c192343e Mon Sep 17 00:00:00 2001 From: datvo06 Date: Fri, 6 Feb 2026 17:20:01 -0500 Subject: [PATCH 02/18] Update notebook --- docs/source/llm.ipynb | 1905 +++++++++++++------------- effectful/handlers/llm/evaluation.py | 23 - 2 files changed, 957 insertions(+), 971 deletions(-) diff --git a/docs/source/llm.ipynb b/docs/source/llm.ipynb index 436a05f2..d10a95be 100644 --- a/docs/source/llm.ipynb +++ b/docs/source/llm.ipynb @@ -1,972 +1,981 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "e7fda1b8", - "metadata": {}, - "source": [ - "# LLM Interface\n", - "The `effectful.handlers.llm` module provides a simplified LLM interface that uses algebraic effects for modularity. The module interface consists of:\n", - "\n", - "- A decorator `Template.define` which creates a prompt template from a callable. A template is an LLM-implemented function whose behavior is specified by a template string. When a template is called, an LLM is invoked to produce the specified behavior.\n", - "- A decorator `Tool.define` which exposes Python callables as tools that templates can call. Tool signatures and docstrings define the schema passed to the model.\n", - "- Structured output handling via `Encodable` (used internally by templates and tool calls) to serialize/deserialize Python types.\n", - "- LLM providers such as `LiteLLMProvider`, and reliability helpers like `RetryLLMHandler` and `ReplayLiteLLMProvider`, which can be composed with `handler(...)` to control execution." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "5aaf649f", - "metadata": {}, - "outputs": [], - "source": [ - "import base64\n", - "import dataclasses\n", - "import functools\n", - "import io\n", - "from typing import Literal\n", - "\n", - "import litellm\n", - "import pydantic\n", - "from IPython.display import HTML, display\n", - "from litellm.caching.caching import Cache\n", - "from PIL import Image\n", - "from pydantic import field_validator\n", - "from pydantic_core import PydanticCustomError\n", - "\n", - "from effectful.handlers.llm import Template, Tool\n", - "from effectful.handlers.llm.completions import (\n", - " LiteLLMProvider,\n", - " RetryLLMHandler,\n", - ")\n", - "from effectful.ops.semantics import NotHandled, handler\n", - "\n", - "provider = LiteLLMProvider()" - ] - }, - { - "cell_type": "markdown", - "id": "093243e0", - "metadata": {}, - "source": [ - "In the following sections, we walk through each of the mentioned components." - ] - }, - { - "cell_type": "markdown", - "id": "c1c639d3", - "metadata": {}, - "source": [ - "## Prompt Templates\n", - "\n", - "This template function writes (bad) poetry on a given theme. While difficult to implement in Python, an LLM can provide a reasonable implementation." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "1e832675", - "metadata": {}, - "outputs": [], - "source": [ - "@Template.define\n", - "def limerick(theme: str) -> str:\n", - " \"\"\"Write a limerick on the theme of {theme}. Do not use any tools.\"\"\"\n", - " raise NotHandled" - ] - }, - { - "cell_type": "markdown", - "id": "f2ca6919", - "metadata": {}, - "source": [ - "If we call the template with a provider interpretation installed, we get reasonable behavior. The LLM is nondeterministic by default, so calling the template twice with the same arguments gives us different results.\n", - "\n", - "Templates are regular callables, so can be converted to operations with `defop` if we want to override the LLM implementation in some cases." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "634f6533", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "In the sea where the silver fish play,\n", - "A salmon once swam by the bay.\n", - "With a splash and a swirl,\n", - "Past the shells it would twirl,\n", - "And vanish at the end of the day.\n", - "----------------------------------------\n", - "In the ocean, where fish freely roam,\n", - "A small school called a coral reef home.\n", - "With fins all aglow,\n", - "They dart to and fro,\n", - "Dancing waves as sea breezes comb.\n" - ] - } - ], - "source": [ - "with handler(provider):\n", - " print(limerick(\"fish\"))\n", - " print(\"-\" * 40)\n", - " print(limerick(\"fish\"))" - ] - }, - { - "cell_type": "markdown", - "id": "2e59acbc", - "metadata": {}, - "source": [ - "If we want deterministic behavior, we can cache the template call. We can either cache it with the default `@functools.cache` or use LiteLLM's built-in cache by setting a cache backend and passing `caching=True` to the provider:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "706ce53b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Silent water glides,\n", - "Scales shimmering through currents—\n", - "Nature's art in waves.\n", - "----------------------------------------\n", - "Silent water glides,\n", - "Scales shimmering through currents—\n", - "Nature's art in waves.\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/nguyendat/Marc/effectful/.venv/lib/python3.12/site-packages/pydantic/main.py:528: UserWarning: Pydantic serializer warnings:\n", - " PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='{\"value\"...: None}, annotations=[]), input_type=Message])\n", - " PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...ider_specific_fields={}), input_type=Choices])\n", - " return self.__pydantic_serializer__.to_json(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Silent waters gleam, \n", - "Fish dart in a moonlit stream, \n", - "Nature's quiet dream.\n", - "----------------------------------------\n", - "Silent waters dance, \n", - "Scales shimmer in fleeting light, \n", - "Fish glide through soft dreams.\n", - "\n", - "Fish swim through blue waves, \n", - "Their scales gleam like silver jewels, \n", - "Silent in the deep.\n", - "----------------------------------------\n", - "Silent waters flow,\n", - "Fish dart beneath gentle waves—\n", - "Nature's dance below.\n" - ] - } - ], - "source": [ - "@functools.cache\n", - "@Template.define\n", - "def haiku(theme: str) -> str:\n", - " \"\"\"Write a haiku on the theme of {theme}. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "@Template.define\n", - "def haiku_no_cache(theme: str) -> str:\n", - " \"\"\"Write a haiku on the theme of {theme}. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "print()\n", - "with handler(provider):\n", - " print(haiku(\"fish\"))\n", - " print(\"-\" * 40)\n", - " print(haiku(\"fish\"))\n", - "\n", - "print()\n", - "# Enable LiteLLM caching by setting a cache backend and enabling caching.\n", - "litellm.cache = Cache()\n", - "provider_cached = LiteLLMProvider(caching=True)\n", - "try:\n", - " with handler(provider_cached):\n", - " print(haiku_no_cache(\"fish2\"))\n", - " print(\"-\" * 40)\n", - " print(haiku_no_cache(\"fish2\"))\n", - "finally:\n", - " litellm.cache = None\n", - "\n", - "print()\n", - "with handler(provider):\n", - " print(haiku_no_cache(\"fish3\"))\n", - " print(\"-\" * 40)\n", - " print(haiku_no_cache(\"fish3\"))" - ] - }, - { - "cell_type": "markdown", - "id": "13adb300", - "metadata": {}, - "source": [ - "## Converting LLM Results to Python Objects\n", - "\n", - "Type conversion is handled by `decode`. By default, primitive types are converted. `DecodeError` is raised if a response cannot be converted." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "2c766859", - "metadata": {}, - "outputs": [], - "source": [ - "@Template.define\n", - "def primes(first_digit: int) -> int:\n", - " \"\"\"Give a prime number with {first_digit} as the first digit. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "with handler(provider):\n", - " assert type(primes(6)) is int" - ] - }, - { - "cell_type": "markdown", - "id": "36d78a71", - "metadata": {}, - "source": [ - "More complex types can be converted by providing handlers for `decode`. Callable synthesis is supported via `Encodable` and the evaluation providers in `effectful.handlers.llm.evaluation` (`UnsafeEvalProvider` or `RestrictedEvalProvider`), which enable parsing/compiling/executing synthesized code." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "c83bbdc0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "def count_char_a(text: str) -> int:\n", - " \"\"\"\n", - " Counts the occurrences of 'a' in the given string.\n", - "\n", - " Args:\n", - " text (str): The string to search within.\n", - "\n", - " Returns:\n", - " int: The count of 'a' characters in the string.\n", - "\n", - " Examples:\n", - " >>> count_char_a('banana')\n", - " 3\n", - " >>> count_char_a('apple')\n", - " 1\n", - " >>> count_char_a('cherry')\n", - " 0\n", - " \"\"\"\n", - " count = 0\n", - " for char in text:\n", - " if char == 'a':\n", - " count += 1\n", - " return count\n", - "\n" - ] - } - ], - "source": [ - "import inspect\n", - "from collections.abc import Callable\n", - "\n", - "from effectful.handlers.llm.evaluation import DoctestHandler, UnsafeEvalProvider\n", - "\n", - "\n", - "@Template.define\n", - "def count_char(char: str) -> Callable[[str], int]:\n", - " \"\"\"Write a function named count_char which takes a string and counts the occurrances of '{char}'. Do not use any tools.\n", - "\n", - " Examples:\n", - " >>> count_char(\"banana\")\n", - " 3\n", - " >>> count_char(\"cherry\")\n", - " 0\n", - " \"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "# Use UnsafeEvalProvider for simple examples; RestrictedEvalProvider may need extra globals.\n", - "with handler(provider), handler(UnsafeEvalProvider()), handler(DoctestHandler()):\n", - " count_a = count_char(\"a\")\n", - " assert callable(count_a)\n", - " assert count_a(\"banana\") == 3\n", - " assert count_a(\"cherry\") == 0\n", - " # Print the source code of the generated function\n", - " print(inspect.getsource(count_a))" - ] - }, - { - "cell_type": "markdown", - "id": "0b6a7b48", - "metadata": {}, - "source": [ - "### Doctest Feedback\n", - "\n", - "Write doctests in the template docstring for callable synthesis. The docstring is formatted with the call arguments, and the doctests are executed during decoding." - ] - }, + "cells": [ + { + "cell_type": "markdown", + "id": "e7fda1b8", + "metadata": {}, + "source": [ + "# LLM Interface\n", + "The `effectful.handlers.llm` module provides a simplified LLM interface that uses algebraic effects for modularity. The module interface consists of:\n", + "\n", + "- A decorator `Template.define` which creates a prompt template from a callable. A template is an LLM-implemented function whose behavior is specified by a template string. When a template is called, an LLM is invoked to produce the specified behavior.\n", + "- A decorator `Tool.define` which exposes Python callables as tools that templates can call. Tool signatures and docstrings define the schema passed to the model.\n", + "- Structured output handling via `Encodable` (used internally by templates and tool calls) to serialize/deserialize Python types.\n", + "- LLM providers such as `LiteLLMProvider`, and reliability helpers like `RetryLLMHandler` and `ReplayLiteLLMProvider`, which can be composed with `handler(...)` to control execution." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "5aaf649f", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": 7, - "id": "793b12a5", - "metadata": {}, - "outputs": [ - { - "ename": "ResultDecodingError", - "evalue": "Error decoding response: mypy type check failed:\n:170: \u001b[1m\u001b[31merror:\u001b[m Name \u001b[m\u001b[1m\"count_char\"\u001b[m already defined on line 159 \u001b[m\u001b[33m[no-redef]\u001b[m\n\nimport collections\nimport collections.abc\nimport IPython\nimport IPython.core\nimport IPython.core.alias\nimport IPython.core.application\nimport IPython.core.async_helpers\nimport IPython.core.autocall\nimport IPython.core.builtin_trap\nimport IPython.core.compilerop\nimport IPython.core.completer\nimport IPython.core.completerlib\nimport IPython.core.crashhandler\nimport IPython.core.debugger\nimport IPython.core.debugger_backport\nimport IPython.core.display\nimport IPython.core.display_functions\nimport IPython.core.display_trap\nimport IPython.core.displayhook\nimport IPython.core.displaypub\nimport IPython.core.doctb\nimport IPython.core.error\nimport IPython.core.events\nimport IPython.core.extensions\nimport IPython.core.formatters\nimport IPython.core.getipython\nimport IPython.core.guarded_eval\nimport IPython.core.history\nimport IPython.core.hooks\nimport IPython.core.inputtransformer2\nimport IPython.core.interactiveshell\nimport IPython.core.latex_symbols\nimport IPython.core.logger\nimport IPython.core.macro\nimport IPython.core.magic\nimport IPython.core.magic_arguments\nimport IPython.core.magics\nimport IPython.core.magics.ast_mod\nimport IPython.core.magics.auto\nimport IPython.core.magics.basic\nimport IPython.core.magics.code\nimport IPython.core.magics.config\nimport IPython.core.magics.display\nimport IPython.core.magics.execution\nimport IPython.core.magics.extension\nimport IPython.core.magics.history\nimport IPython.core.magics.logging\nimport IPython.core.magics.namespace\nimport IPython.core.magics.osm\nimport IPython.core.magics.packaging\nimport IPython.core.magics.pylab\nimport IPython.core.magics.script\nimport IPython.core.oinspect\nimport IPython.core.page\nimport IPython.core.payload\nimport IPython.core.prefilter\nimport IPython.core.profiledir\nimport IPython.core.release\nimport IPython.core.shellapp\nimport IPython.core.splitinput\nimport IPython.core.tbtools\nimport IPython.core.tips\nimport IPython.core.ultratb\nimport IPython.core.usage\nimport IPython.display\nimport IPython.extensions\nimport IPython.extensions.storemagic\nimport IPython.external\nimport IPython.external.pickleshare\nimport IPython.lib\nimport IPython.lib.clipboard\nimport IPython.lib.display\nimport IPython.lib.pretty\nimport IPython.paths\nimport IPython.terminal\nimport IPython.terminal.debugger\nimport IPython.terminal.embed\nimport IPython.terminal.interactiveshell\nimport IPython.terminal.ipapp\nimport IPython.terminal.magics\nimport IPython.terminal.prompts\nimport IPython.terminal.pt_inputhooks\nimport IPython.terminal.ptutils\nimport IPython.terminal.shortcuts\nimport IPython.terminal.shortcuts.auto_match\nimport IPython.terminal.shortcuts.auto_suggest\nimport IPython.terminal.shortcuts.filters\nimport IPython.testing\nimport IPython.testing.skipdoctest\nimport IPython.utils\nimport IPython.utils.PyColorize\nimport IPython.utils._process_common\nimport IPython.utils._process_posix\nimport IPython.utils._sysinfo\nimport IPython.utils.capture\nimport IPython.utils.contexts\nimport IPython.utils.data\nimport IPython.utils.decorators\nimport IPython.utils.dir2\nimport IPython.utils.docs\nimport IPython.utils.encoding\nimport IPython.utils.frame\nimport IPython.utils.generics\nimport IPython.utils.importstring\nimport IPython.utils.io\nimport IPython.utils.ipstruct\nimport IPython.utils.module_paths\nimport IPython.utils.openpy\nimport IPython.utils.path\nimport IPython.utils.process\nimport IPython.utils.py3compat\nimport IPython.utils.sentinel\nimport IPython.utils.strdispatch\nimport IPython.utils.sysinfo\nimport IPython.utils.syspathcontext\nimport IPython.utils.terminal\nimport IPython.utils.text\nimport IPython.utils.timing\nimport IPython.utils.tokenutil\nimport IPython.utils.wildcard\nimport collections\nimport collections.abc\nimport effectful\nimport effectful.handlers\nimport effectful.handlers.llm\nimport effectful.handlers.llm.completions\nimport effectful.handlers.llm.encoding\nimport effectful.handlers.llm.evaluation\nimport effectful.handlers.llm.template\nimport effectful.internals\nimport effectful.internals.runtime\nimport effectful.internals.unification\nimport effectful.ops\nimport effectful.ops.semantics\nimport effectful.ops.syntax\nimport effectful.ops.types\nimport pathlib\nIn: collections.abc.MutableSequence[str]\nOut: dict\n_: str\n__: str\n___: str\n__vsc_ipynb_file__: str\n_dh: collections.abc.MutableSequence[pathlib.PosixPath]\n_i: str\n_i1: str\n_i2: str\n_i3: str\n_i4: str\n_i5: str\n_i6: str\n_i7: str\n_ih: collections.abc.MutableSequence[str]\n_ii: str\n_iii: str\n_oh: dict\nchar: str\ncount_a: collections.abc.Callable[[str], int]\ncount_char: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\ncount_char_with_doctest: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\nexit: IPython.core.autocall.ZMQExitAutocall\nhaiku: collections.abc.Callable[[str], str]\nhaiku_no_cache: effectful.ops.types.Operation[[str], str]\nlimerick: effectful.ops.types.Operation[[str], str]\nprimes: effectful.ops.types.Operation[[int], int]\nprovider: effectful.ops.types.Interpretation\nprovider_cached: effectful.ops.types.Interpretation\nquit: IPython.core.autocall.ZMQExitAutocall\n\ndef count_char(s: str) -> int:\n \"\"\"\n Count the occurrences of the character 'a' in the given string.\n\n Args:\n s (str): The string in which to count occurrences of 'a'.\n\n Returns:\n int: The number of times 'a' appears in the string.\n\n Examples:\n >>> count_char('banana')\n 3\n >>> count_char('apple')\n 1\n >>> count_char('cherry')\n 0\n \"\"\"\n return s.count('a')\n_synthesized_check: collections.abc.Callable[[str], int] = count_char. Please provide a valid response and try again.", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mTypeError\u001b[39m Traceback (most recent call last)", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/completions.py:302\u001b[39m, in \u001b[36mcall_assistant\u001b[39m\u001b[34m(tools, response_format, model, **kwargs)\u001b[39m\n\u001b[32m 301\u001b[39m raw_result = response_model.model_validate_json(serialized_result)\n\u001b[32m--> \u001b[39m\u001b[32m302\u001b[39m result = \u001b[43mresponse_format\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mraw_result\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[32m 303\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (pydantic.ValidationError, \u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mSyntaxError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/encoding.py:452\u001b[39m, in \u001b[36mCallableEncodable.decode\u001b[39m\u001b[34m(self, encoded_value)\u001b[39m\n\u001b[32m 451\u001b[39m \u001b[38;5;66;03m# Type-check with mypy; pass original module_code so mypy sees exact source\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m452\u001b[39m \u001b[43mevaluation\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtype_check\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 453\u001b[39m \u001b[43m \u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mctx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexpected_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexpected_return\u001b[49m\n\u001b[32m 454\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 456\u001b[39m \u001b[38;5;66;03m# Compile and execute\u001b[39;00m\n\u001b[32m 457\u001b[39m \u001b[38;5;66;03m# https://docs.python.org/3/library/functions.html#exec\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:494\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 493\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m self_handler \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m494\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mself_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 495\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m args \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(args[\u001b[32m0\u001b[39m], Operation) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m args[\u001b[32m0\u001b[39m].__apply__:\n\u001b[32m 496\u001b[39m \u001b[38;5;66;03m# Prevent infinite recursion when calling self.apply directly\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/internals/runtime.py:70\u001b[39m, in \u001b[36m_set_prompt..bound_body\u001b[39m\u001b[34m(*a, **k)\u001b[39m\n\u001b[32m 69\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler({prompt: handler({prompt: next_cont})(cont)}):\n\u001b[32m---> \u001b[39m\u001b[32m70\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbody\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/internals/runtime.py:56\u001b[39m, in \u001b[36m_save_args.._cont_wrapper\u001b[39m\u001b[34m(*a, **k)\u001b[39m\n\u001b[32m 55\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler({_get_args: \u001b[38;5;28;01mlambda\u001b[39;00m: (a, k)}):\n\u001b[32m---> \u001b[39m\u001b[32m56\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/evaluation.py:628\u001b[39m, in \u001b[36mUnsafeEvalProvider.type_check\u001b[39m\u001b[34m(self, module, ctx, expected_params, expected_return)\u001b[39m\n\u001b[32m 620\u001b[39m \u001b[38;5;129m@implements\u001b[39m(type_check)\n\u001b[32m 621\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mtype_check\u001b[39m(\n\u001b[32m 622\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 626\u001b[39m expected_return: \u001b[38;5;28mtype\u001b[39m,\n\u001b[32m 627\u001b[39m ) -> \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m628\u001b[39m \u001b[43mmypy_type_check\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mctx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexpected_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexpected_return\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/evaluation.py:586\u001b[39m, in \u001b[36mmypy_type_check\u001b[39m\u001b[34m(module, ctx, expected_params, expected_return)\u001b[39m\n\u001b[32m 585\u001b[39m report = (stdout \u001b[38;5;129;01mor\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m) + (stderr \u001b[38;5;129;01mor\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m586\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mmypy type check failed:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mreport\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00msource\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 587\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", - "\u001b[31mTypeError\u001b[39m: mypy type check failed:\n:170: \u001b[1m\u001b[31merror:\u001b[m Name \u001b[m\u001b[1m\"count_char\"\u001b[m already defined on line 159 \u001b[m\u001b[33m[no-redef]\u001b[m\n\nimport collections\nimport collections.abc\nimport IPython\nimport IPython.core\nimport IPython.core.alias\nimport IPython.core.application\nimport IPython.core.async_helpers\nimport IPython.core.autocall\nimport IPython.core.builtin_trap\nimport IPython.core.compilerop\nimport IPython.core.completer\nimport IPython.core.completerlib\nimport IPython.core.crashhandler\nimport IPython.core.debugger\nimport IPython.core.debugger_backport\nimport IPython.core.display\nimport IPython.core.display_functions\nimport IPython.core.display_trap\nimport IPython.core.displayhook\nimport IPython.core.displaypub\nimport IPython.core.doctb\nimport IPython.core.error\nimport IPython.core.events\nimport IPython.core.extensions\nimport IPython.core.formatters\nimport IPython.core.getipython\nimport IPython.core.guarded_eval\nimport IPython.core.history\nimport IPython.core.hooks\nimport IPython.core.inputtransformer2\nimport IPython.core.interactiveshell\nimport IPython.core.latex_symbols\nimport IPython.core.logger\nimport IPython.core.macro\nimport IPython.core.magic\nimport IPython.core.magic_arguments\nimport IPython.core.magics\nimport IPython.core.magics.ast_mod\nimport IPython.core.magics.auto\nimport IPython.core.magics.basic\nimport IPython.core.magics.code\nimport IPython.core.magics.config\nimport IPython.core.magics.display\nimport IPython.core.magics.execution\nimport IPython.core.magics.extension\nimport IPython.core.magics.history\nimport IPython.core.magics.logging\nimport IPython.core.magics.namespace\nimport IPython.core.magics.osm\nimport IPython.core.magics.packaging\nimport IPython.core.magics.pylab\nimport IPython.core.magics.script\nimport IPython.core.oinspect\nimport IPython.core.page\nimport IPython.core.payload\nimport IPython.core.prefilter\nimport IPython.core.profiledir\nimport IPython.core.release\nimport IPython.core.shellapp\nimport IPython.core.splitinput\nimport IPython.core.tbtools\nimport IPython.core.tips\nimport IPython.core.ultratb\nimport IPython.core.usage\nimport IPython.display\nimport IPython.extensions\nimport IPython.extensions.storemagic\nimport IPython.external\nimport IPython.external.pickleshare\nimport IPython.lib\nimport IPython.lib.clipboard\nimport IPython.lib.display\nimport IPython.lib.pretty\nimport IPython.paths\nimport IPython.terminal\nimport IPython.terminal.debugger\nimport IPython.terminal.embed\nimport IPython.terminal.interactiveshell\nimport IPython.terminal.ipapp\nimport IPython.terminal.magics\nimport IPython.terminal.prompts\nimport IPython.terminal.pt_inputhooks\nimport IPython.terminal.ptutils\nimport IPython.terminal.shortcuts\nimport IPython.terminal.shortcuts.auto_match\nimport IPython.terminal.shortcuts.auto_suggest\nimport IPython.terminal.shortcuts.filters\nimport IPython.testing\nimport IPython.testing.skipdoctest\nimport IPython.utils\nimport IPython.utils.PyColorize\nimport IPython.utils._process_common\nimport IPython.utils._process_posix\nimport IPython.utils._sysinfo\nimport IPython.utils.capture\nimport IPython.utils.contexts\nimport IPython.utils.data\nimport IPython.utils.decorators\nimport IPython.utils.dir2\nimport IPython.utils.docs\nimport IPython.utils.encoding\nimport IPython.utils.frame\nimport IPython.utils.generics\nimport IPython.utils.importstring\nimport IPython.utils.io\nimport IPython.utils.ipstruct\nimport IPython.utils.module_paths\nimport IPython.utils.openpy\nimport IPython.utils.path\nimport IPython.utils.process\nimport IPython.utils.py3compat\nimport IPython.utils.sentinel\nimport IPython.utils.strdispatch\nimport IPython.utils.sysinfo\nimport IPython.utils.syspathcontext\nimport IPython.utils.terminal\nimport IPython.utils.text\nimport IPython.utils.timing\nimport IPython.utils.tokenutil\nimport IPython.utils.wildcard\nimport collections\nimport collections.abc\nimport effectful\nimport effectful.handlers\nimport effectful.handlers.llm\nimport effectful.handlers.llm.completions\nimport effectful.handlers.llm.encoding\nimport effectful.handlers.llm.evaluation\nimport effectful.handlers.llm.template\nimport effectful.internals\nimport effectful.internals.runtime\nimport effectful.internals.unification\nimport effectful.ops\nimport effectful.ops.semantics\nimport effectful.ops.syntax\nimport effectful.ops.types\nimport pathlib\nIn: collections.abc.MutableSequence[str]\nOut: dict\n_: str\n__: str\n___: str\n__vsc_ipynb_file__: str\n_dh: collections.abc.MutableSequence[pathlib.PosixPath]\n_i: str\n_i1: str\n_i2: str\n_i3: str\n_i4: str\n_i5: str\n_i6: str\n_i7: str\n_ih: collections.abc.MutableSequence[str]\n_ii: str\n_iii: str\n_oh: dict\nchar: str\ncount_a: collections.abc.Callable[[str], int]\ncount_char: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\ncount_char_with_doctest: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\nexit: IPython.core.autocall.ZMQExitAutocall\nhaiku: collections.abc.Callable[[str], str]\nhaiku_no_cache: effectful.ops.types.Operation[[str], str]\nlimerick: effectful.ops.types.Operation[[str], str]\nprimes: effectful.ops.types.Operation[[int], int]\nprovider: effectful.ops.types.Interpretation\nprovider_cached: effectful.ops.types.Interpretation\nquit: IPython.core.autocall.ZMQExitAutocall\n\ndef count_char(s: str) -> int:\n \"\"\"\n Count the occurrences of the character 'a' in the given string.\n\n Args:\n s (str): The string in which to count occurrences of 'a'.\n\n Returns:\n int: The number of times 'a' appears in the string.\n\n Examples:\n >>> count_char('banana')\n 3\n >>> count_char('apple')\n 1\n >>> count_char('cherry')\n 0\n \"\"\"\n return s.count('a')\n_synthesized_check: collections.abc.Callable[[str], int] = count_char", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[31mResultDecodingError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 10\u001b[39m\n\u001b[32m 6\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m NotHandled\n\u001b[32m 9\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler(provider), handler(UnsafeEvalProvider()):\n\u001b[32m---> \u001b[39m\u001b[32m10\u001b[39m count_a = \u001b[43mcount_char_with_doctest\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43ma\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 11\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m count_a(\u001b[33m\"\u001b[39m\u001b[33mbanana\u001b[39m\u001b[33m\"\u001b[39m) == \u001b[32m3\u001b[39m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:499\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__default__(*args, **kwargs)\n\u001b[32m 498\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m499\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__apply__\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:494\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 492\u001b[39m self_handler = intp.get(\u001b[38;5;28mself\u001b[39m)\n\u001b[32m 493\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m self_handler \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m494\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mself_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 495\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m args \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(args[\u001b[32m0\u001b[39m], Operation) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m args[\u001b[32m0\u001b[39m].__apply__:\n\u001b[32m 496\u001b[39m \u001b[38;5;66;03m# Prevent infinite recursion when calling self.apply directly\u001b[39;00m\n\u001b[32m 497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__default__(*args, **kwargs)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/internals/runtime.py:70\u001b[39m, in \u001b[36m_set_prompt..bound_body\u001b[39m\u001b[34m(*a, **k)\u001b[39m\n\u001b[32m 68\u001b[39m next_cont = get_interpretation().get(prompt, prompt.__default_rule__)\n\u001b[32m 69\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler({prompt: handler({prompt: next_cont})(cont)}):\n\u001b[32m---> \u001b[39m\u001b[32m70\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbody\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/internals/runtime.py:56\u001b[39m, in \u001b[36m_save_args.._cont_wrapper\u001b[39m\u001b[34m(*a, **k)\u001b[39m\n\u001b[32m 53\u001b[39m \u001b[38;5;129m@functools\u001b[39m.wraps(fn)\n\u001b[32m 54\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_cont_wrapper\u001b[39m(*a: P.args, **k: P.kwargs) -> T:\n\u001b[32m 55\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler({_get_args: \u001b[38;5;28;01mlambda\u001b[39;00m: (a, k)}):\n\u001b[32m---> \u001b[39m\u001b[32m56\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/completions.py:521\u001b[39m, in \u001b[36mLiteLLMProvider._call\u001b[39m\u001b[34m(self, template, *args, **kwargs)\u001b[39m\n\u001b[32m 519\u001b[39m result: T | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 520\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m message[\u001b[33m\"\u001b[39m\u001b[33mrole\u001b[39m\u001b[33m\"\u001b[39m] != \u001b[33m\"\u001b[39m\u001b[33massistant\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m tool_calls:\n\u001b[32m--> \u001b[39m\u001b[32m521\u001b[39m message, tool_calls, result = \u001b[43mcall_assistant\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 522\u001b[39m \u001b[43m \u001b[49m\u001b[43mtemplate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_model\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mconfig\u001b[49m\n\u001b[32m 523\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 524\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m tool_call \u001b[38;5;129;01min\u001b[39;00m tool_calls:\n\u001b[32m 525\u001b[39m message = call_tool(tool_call)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:499\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__default__(*args, **kwargs)\n\u001b[32m 498\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m499\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__apply__\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:497\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 494\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m self_handler(*args, **kwargs)\n\u001b[32m 495\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m args \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(args[\u001b[32m0\u001b[39m], Operation) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m args[\u001b[32m0\u001b[39m].__apply__:\n\u001b[32m 496\u001b[39m \u001b[38;5;66;03m# Prevent infinite recursion when calling self.apply directly\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__default__\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 498\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 499\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__apply__(\u001b[38;5;28mself\u001b[39m, *args, **kwargs)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:548\u001b[39m, in \u001b[36m__apply__\u001b[39m\u001b[34m(op, *args, **kwargs)\u001b[39m\n\u001b[32m 519\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__apply__\u001b[39m[**A, B](op: Operation[A, B], *args: A.args, **kwargs: A.kwargs) -> B:\n\u001b[32m 520\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Apply ``op`` to ``args``, ``kwargs`` in interpretation ``intp``.\u001b[39;00m\n\u001b[32m 521\u001b[39m \n\u001b[32m 522\u001b[39m \u001b[33;03m Handling :func:`Operation.__apply__` changes the evaluation strategy of terms.\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 546\u001b[39m \n\u001b[32m 547\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m548\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mop\u001b[49m\u001b[43m.\u001b[49m\u001b[43m__default_rule__\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:350\u001b[39m, in \u001b[36mOperation.__default_rule__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 345\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"The default rule is used when the operation is not handled.\u001b[39;00m\n\u001b[32m 346\u001b[39m \n\u001b[32m 347\u001b[39m \u001b[33;03mIf no default rule is supplied, the free rule is used instead.\u001b[39;00m\n\u001b[32m 348\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 349\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m350\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__default__\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 351\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m NotHandled:\n\u001b[32m 352\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01meffectful\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mops\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01msyntax\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m defdata\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/completions.py:304\u001b[39m, in \u001b[36mcall_assistant\u001b[39m\u001b[34m(tools, response_format, model, **kwargs)\u001b[39m\n\u001b[32m 302\u001b[39m result = response_format.decode(raw_result.value) \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[32m 303\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (pydantic.ValidationError, \u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mSyntaxError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m--> \u001b[39m\u001b[32m304\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m ResultDecodingError(e, raw_message=raw_message) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01me\u001b[39;00m\n\u001b[32m 306\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m (raw_message, tool_calls, result)\n", - "\u001b[31mResultDecodingError\u001b[39m: Error decoding response: mypy type check failed:\n:170: \u001b[1m\u001b[31merror:\u001b[m Name \u001b[m\u001b[1m\"count_char\"\u001b[m already defined on line 159 \u001b[m\u001b[33m[no-redef]\u001b[m\n\nimport collections\nimport collections.abc\nimport IPython\nimport IPython.core\nimport IPython.core.alias\nimport IPython.core.application\nimport IPython.core.async_helpers\nimport IPython.core.autocall\nimport IPython.core.builtin_trap\nimport IPython.core.compilerop\nimport IPython.core.completer\nimport IPython.core.completerlib\nimport IPython.core.crashhandler\nimport IPython.core.debugger\nimport IPython.core.debugger_backport\nimport IPython.core.display\nimport IPython.core.display_functions\nimport IPython.core.display_trap\nimport IPython.core.displayhook\nimport IPython.core.displaypub\nimport IPython.core.doctb\nimport IPython.core.error\nimport IPython.core.events\nimport IPython.core.extensions\nimport IPython.core.formatters\nimport IPython.core.getipython\nimport IPython.core.guarded_eval\nimport IPython.core.history\nimport IPython.core.hooks\nimport IPython.core.inputtransformer2\nimport IPython.core.interactiveshell\nimport IPython.core.latex_symbols\nimport IPython.core.logger\nimport IPython.core.macro\nimport IPython.core.magic\nimport IPython.core.magic_arguments\nimport IPython.core.magics\nimport IPython.core.magics.ast_mod\nimport IPython.core.magics.auto\nimport IPython.core.magics.basic\nimport IPython.core.magics.code\nimport IPython.core.magics.config\nimport IPython.core.magics.display\nimport IPython.core.magics.execution\nimport IPython.core.magics.extension\nimport IPython.core.magics.history\nimport IPython.core.magics.logging\nimport IPython.core.magics.namespace\nimport IPython.core.magics.osm\nimport IPython.core.magics.packaging\nimport IPython.core.magics.pylab\nimport IPython.core.magics.script\nimport IPython.core.oinspect\nimport IPython.core.page\nimport IPython.core.payload\nimport IPython.core.prefilter\nimport IPython.core.profiledir\nimport IPython.core.release\nimport IPython.core.shellapp\nimport IPython.core.splitinput\nimport IPython.core.tbtools\nimport IPython.core.tips\nimport IPython.core.ultratb\nimport IPython.core.usage\nimport IPython.display\nimport IPython.extensions\nimport IPython.extensions.storemagic\nimport IPython.external\nimport IPython.external.pickleshare\nimport IPython.lib\nimport IPython.lib.clipboard\nimport IPython.lib.display\nimport IPython.lib.pretty\nimport IPython.paths\nimport IPython.terminal\nimport IPython.terminal.debugger\nimport IPython.terminal.embed\nimport IPython.terminal.interactiveshell\nimport IPython.terminal.ipapp\nimport IPython.terminal.magics\nimport IPython.terminal.prompts\nimport IPython.terminal.pt_inputhooks\nimport IPython.terminal.ptutils\nimport IPython.terminal.shortcuts\nimport IPython.terminal.shortcuts.auto_match\nimport IPython.terminal.shortcuts.auto_suggest\nimport IPython.terminal.shortcuts.filters\nimport IPython.testing\nimport IPython.testing.skipdoctest\nimport IPython.utils\nimport IPython.utils.PyColorize\nimport IPython.utils._process_common\nimport IPython.utils._process_posix\nimport IPython.utils._sysinfo\nimport IPython.utils.capture\nimport IPython.utils.contexts\nimport IPython.utils.data\nimport IPython.utils.decorators\nimport IPython.utils.dir2\nimport IPython.utils.docs\nimport IPython.utils.encoding\nimport IPython.utils.frame\nimport IPython.utils.generics\nimport IPython.utils.importstring\nimport IPython.utils.io\nimport IPython.utils.ipstruct\nimport IPython.utils.module_paths\nimport IPython.utils.openpy\nimport IPython.utils.path\nimport IPython.utils.process\nimport IPython.utils.py3compat\nimport IPython.utils.sentinel\nimport IPython.utils.strdispatch\nimport IPython.utils.sysinfo\nimport IPython.utils.syspathcontext\nimport IPython.utils.terminal\nimport IPython.utils.text\nimport IPython.utils.timing\nimport IPython.utils.tokenutil\nimport IPython.utils.wildcard\nimport collections\nimport collections.abc\nimport effectful\nimport effectful.handlers\nimport effectful.handlers.llm\nimport effectful.handlers.llm.completions\nimport effectful.handlers.llm.encoding\nimport effectful.handlers.llm.evaluation\nimport effectful.handlers.llm.template\nimport effectful.internals\nimport effectful.internals.runtime\nimport effectful.internals.unification\nimport effectful.ops\nimport effectful.ops.semantics\nimport effectful.ops.syntax\nimport effectful.ops.types\nimport pathlib\nIn: collections.abc.MutableSequence[str]\nOut: dict\n_: str\n__: str\n___: str\n__vsc_ipynb_file__: str\n_dh: collections.abc.MutableSequence[pathlib.PosixPath]\n_i: str\n_i1: str\n_i2: str\n_i3: str\n_i4: str\n_i5: str\n_i6: str\n_i7: str\n_ih: collections.abc.MutableSequence[str]\n_ii: str\n_iii: str\n_oh: dict\nchar: str\ncount_a: collections.abc.Callable[[str], int]\ncount_char: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\ncount_char_with_doctest: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\nexit: IPython.core.autocall.ZMQExitAutocall\nhaiku: collections.abc.Callable[[str], str]\nhaiku_no_cache: effectful.ops.types.Operation[[str], str]\nlimerick: effectful.ops.types.Operation[[str], str]\nprimes: effectful.ops.types.Operation[[int], int]\nprovider: effectful.ops.types.Interpretation\nprovider_cached: effectful.ops.types.Interpretation\nquit: IPython.core.autocall.ZMQExitAutocall\n\ndef count_char(s: str) -> int:\n \"\"\"\n Count the occurrences of the character 'a' in the given string.\n\n Args:\n s (str): The string in which to count occurrences of 'a'.\n\n Returns:\n int: The number of times 'a' appears in the string.\n\n Examples:\n >>> count_char('banana')\n 3\n >>> count_char('apple')\n 1\n >>> count_char('cherry')\n 0\n \"\"\"\n return s.count('a')\n_synthesized_check: collections.abc.Callable[[str], int] = count_char. Please provide a valid response and try again." - ] - } - ], - "source": [ - "@Template.define\n", - "def count_char_with_doctest(char: str) -> Callable[[str], int]:\n", - " \"\"\"Write a function named count_char that counts the occurrances of '{char}'.\n", - " Do not use any tools.\n", - " \"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "with handler(provider), handler(UnsafeEvalProvider()), handler(DoctestHandler()):\n", - " count_a = count_char_with_doctest(\"a\")\n", - " assert count_a(\"banana\") == 3" - ] - }, + "ename": "ImportError", + "evalue": "cannot import name 'DoctestHandler' from 'effectful.handlers.llm.evaluation' (/Users/nguyendat/Marc/effectful/effectful/handlers/llm/evaluation.py)", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mImportError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[18]\u001b[39m\u001b[32m, line 22\u001b[39m\n\u001b[32m 17\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01meffectful\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mhandlers\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Template, Tool\n\u001b[32m 18\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01meffectful\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mhandlers\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcompletions\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 19\u001b[39m LiteLLMProvider,\n\u001b[32m 20\u001b[39m RetryLLMHandler,\n\u001b[32m 21\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m22\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01meffectful\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mhandlers\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mevaluation\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m DoctestHandler, UnsafeEvalProvider\n\u001b[32m 23\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01meffectful\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mops\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01msemantics\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m NotHandled, handler\n\u001b[32m 25\u001b[39m provider = LiteLLMProvider()\n", + "\u001b[31mImportError\u001b[39m: cannot import name 'DoctestHandler' from 'effectful.handlers.llm.evaluation' (/Users/nguyendat/Marc/effectful/effectful/handlers/llm/evaluation.py)" + ] + } + ], + "source": [ + "import base64\n", + "import dataclasses\n", + "import functools\n", + "import inspect\n", + "import io\n", + "from collections.abc import Callable\n", + "from typing import Literal\n", + "\n", + "import litellm\n", + "import pydantic\n", + "from IPython.display import HTML, display\n", + "from litellm.caching.caching import Cache\n", + "from PIL import Image\n", + "from pydantic import field_validator\n", + "from pydantic_core import PydanticCustomError\n", + "\n", + "from effectful.handlers.llm import Template, Tool\n", + "from effectful.handlers.llm.completions import (\n", + " LiteLLMProvider,\n", + " RetryLLMHandler,\n", + ")\n", + "from effectful.handlers.llm.evaluation import DoctestHandler, UnsafeEvalProvider\n", + "from effectful.ops.semantics import NotHandled, handler\n", + "\n", + "provider = LiteLLMProvider()" + ] + }, + { + "cell_type": "markdown", + "id": "093243e0", + "metadata": {}, + "source": [ + "In the following sections, we walk through each of the mentioned components." + ] + }, + { + "cell_type": "markdown", + "id": "c1c639d3", + "metadata": {}, + "source": [ + "## Prompt Templates\n", + "\n", + "This template function writes (bad) poetry on a given theme. While difficult to implement in Python, an LLM can provide a reasonable implementation." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1e832675", + "metadata": {}, + "outputs": [], + "source": [ + "@Template.define\n", + "def limerick(theme: str) -> str:\n", + " \"\"\"Write a limerick on the theme of {theme}. Do not use any tools.\"\"\"\n", + " raise NotHandled" + ] + }, + { + "cell_type": "markdown", + "id": "f2ca6919", + "metadata": {}, + "source": [ + "If we call the template with a provider interpretation installed, we get reasonable behavior. The LLM is nondeterministic by default, so calling the template twice with the same arguments gives us different results.\n", + "\n", + "Templates are regular callables, so can be converted to operations with `defop` if we want to override the LLM implementation in some cases." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "634f6533", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "991ee445", - "metadata": {}, - "source": [ - "## Tool Calling\n", - "\n", - "`Operation`s defined in the lexical scope of a `Template` are automatically available for the LLM to call as tools. The description of these operations is inferred from their type annotations and docstrings.\n", - "\n", - "Tool calls are mediated by a helper operation `tool_call`. Handling this operation allows tool use to be tracked or logged." - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "In the sea where the silver fish play,\n", + "A salmon once swam by the bay.\n", + "With a splash and a swirl,\n", + "Past the shells it would twirl,\n", + "And vanish at the end of the day.\n", + "----------------------------------------\n", + "In the ocean, where fish freely roam,\n", + "A small school called a coral reef home.\n", + "With fins all aglow,\n", + "They dart to and fro,\n", + "Dancing waves as sea breezes comb.\n" + ] + } + ], + "source": [ + "with handler(provider):\n", + " print(limerick(\"fish\"))\n", + " print(\"-\" * 40)\n", + " print(limerick(\"fish\"))" + ] + }, + { + "cell_type": "markdown", + "id": "2e59acbc", + "metadata": {}, + "source": [ + "If we want deterministic behavior, we can cache the template call. We can either cache it with the default `@functools.cache` or use LiteLLM's built-in cache by setting a cache backend and passing `caching=True` to the provider:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "706ce53b", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "id": "66711301", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Based on the weather conditions:\n", - "\n", - "- **Chicago** is currently cold.\n", - "- **New York** is currently wet.\n", - "- **Barcelona** is currently sunny.\n", - "\n", - "I suggest **Barcelona** as the city with good weather.\n" - ] - } - ], - "source": [ - "@Tool.define\n", - "def cities() -> list[str]:\n", - " \"\"\"Return a list of cities that can be passed to `weather`.\"\"\"\n", - " return [\"Chicago\", \"New York\", \"Barcelona\"]\n", - "\n", - "\n", - "@Tool.define\n", - "def weather(city: str) -> str:\n", - " \"\"\"Given a city name, return a description of the weather in that city.\"\"\"\n", - " status = {\"Chicago\": \"cold\", \"New York\": \"wet\", \"Barcelona\": \"sunny\"}\n", - " return status.get(city, \"unknown\")\n", - "\n", - "\n", - "@Template.define # cities and weather auto-captured from lexical scope\n", - "def vacation() -> str:\n", - " \"\"\"Use the provided tools to suggest a city that has good weather. Use only the `cities` and `weather` tools provided.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "with handler(provider):\n", - " print(vacation())" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Silent water glides,\n", + "Scales shimmering through currents—\n", + "Nature's art in waves.\n", + "----------------------------------------\n", + "Silent water glides,\n", + "Scales shimmering through currents—\n", + "Nature's art in waves.\n", + "\n" + ] }, { - "cell_type": "markdown", - "id": "59584a54", - "metadata": {}, - "source": [ - "## Image Inputs\n", - "\n", - "You can pass `PIL.Image.Image` values directly to templates." - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/nguyendat/Marc/effectful/.venv/lib/python3.12/site-packages/pydantic/main.py:528: UserWarning: Pydantic serializer warnings:\n", + " PydanticSerializationUnexpectedValue(Expected 10 fields but got 6: Expected `Message` - serialized value may not be as expected [field_name='message', input_value=Message(content='{\"value\"...: None}, annotations=[]), input_type=Message])\n", + " PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [field_name='choices', input_value=Choices(finish_reason='st...ider_specific_fields={}), input_type=Choices])\n", + " return self.__pydantic_serializer__.to_json(\n" + ] }, { - "cell_type": "code", - "execution_count": null, - "id": "89992702", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\"Example" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "A simple smiley face with a yellow background, featuring two black dots for eyes and a curved line for a mouth, typically used to convey happiness or friendliness.\n" - ] - } - ], - "source": [ - "image_base64 = (\n", - " \"iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAhElEQVR4nO2W4QqA\"\n", - " \"MAiEVXr/VzYWDGoMdk7Cgrt/sUs/DqZTd3EplFU2JwATYAJMoOlAB4bq89s95+Mg\"\n", - " \"+gyAchsKAYplBBBA43hFhfxnUixDjdEUUL8hpr7R0KLdt9qElzcyiu8As+Kr8zQA\"\n", - " \"mgLavAl+kIzFZyCRxtsAmWb/voZvqRzgBE1sIDuVFX4eAAAAAElFTkSuQmCC\"\n", - ")\n", - "image = Image.open(io.BytesIO(base64.b64decode(image_base64)))\n", - "\n", - "\n", - "@Template.define\n", - "def describe_image(image: Image.Image) -> str:\n", - " \"\"\"Return a short description of the following image.\n", - " {image}\n", - " \"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "with handler(provider):\n", - " display(\n", - " HTML(\n", - " f'\"Example'\n", - " )\n", - " )\n", - " print(describe_image(image))" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Silent waters gleam, \n", + "Fish dart in a moonlit stream, \n", + "Nature's quiet dream.\n", + "----------------------------------------\n", + "Silent waters dance, \n", + "Scales shimmer in fleeting light, \n", + "Fish glide through soft dreams.\n", + "\n", + "Fish swim through blue waves, \n", + "Their scales gleam like silver jewels, \n", + "Silent in the deep.\n", + "----------------------------------------\n", + "Silent waters flow,\n", + "Fish dart beneath gentle waves—\n", + "Nature's dance below.\n" + ] + } + ], + "source": [ + "@functools.cache\n", + "@Template.define\n", + "def haiku(theme: str) -> str:\n", + " \"\"\"Write a haiku on the theme of {theme}. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "@Template.define\n", + "def haiku_no_cache(theme: str) -> str:\n", + " \"\"\"Write a haiku on the theme of {theme}. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "print()\n", + "with handler(provider):\n", + " print(haiku(\"fish\"))\n", + " print(\"-\" * 40)\n", + " print(haiku(\"fish\"))\n", + "\n", + "print()\n", + "# Enable LiteLLM caching by setting a cache backend and enabling caching.\n", + "litellm.cache = Cache()\n", + "provider_cached = LiteLLMProvider(caching=True)\n", + "try:\n", + " with handler(provider_cached):\n", + " print(haiku_no_cache(\"fish2\"))\n", + " print(\"-\" * 40)\n", + " print(haiku_no_cache(\"fish2\"))\n", + "finally:\n", + " litellm.cache = None\n", + "\n", + "print()\n", + "with handler(provider):\n", + " print(haiku_no_cache(\"fish3\"))\n", + " print(\"-\" * 40)\n", + " print(haiku_no_cache(\"fish3\"))" + ] + }, + { + "cell_type": "markdown", + "id": "13adb300", + "metadata": {}, + "source": [ + "## Converting LLM Results to Python Objects\n", + "\n", + "Type conversion is handled by `decode`. By default, primitive types are converted. `DecodeError` is raised if a response cannot be converted." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2c766859", + "metadata": {}, + "outputs": [], + "source": [ + "@Template.define\n", + "def primes(first_digit: int) -> int:\n", + " \"\"\"Give a prime number with {first_digit} as the first digit. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "with handler(provider):\n", + " assert type(primes(6)) is int" + ] + }, + { + "cell_type": "markdown", + "id": "36d78a71", + "metadata": {}, + "source": [ + "More complex types can be converted by providing handlers for `decode`. Callable synthesis is supported via `Encodable` and the evaluation providers in `effectful.handlers.llm.evaluation` (`UnsafeEvalProvider` or `RestrictedEvalProvider`), which enable parsing/compiling/executing synthesized code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c83bbdc0", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "3d221feb", - "metadata": {}, - "source": [ - "## Structured Output Generation\n", - "\n", - "Constrained generation is used for any type that is convertible to a Pydantic model." - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "def count_char_a(text: str) -> int:\n", + " \"\"\"\n", + " Counts the occurrences of 'a' in the given string.\n", + "\n", + " Args:\n", + " text (str): The string to search within.\n", + "\n", + " Returns:\n", + " int: The count of 'a' characters in the string.\n", + "\n", + " Examples:\n", + " >>> count_char_a('banana')\n", + " 3\n", + " >>> count_char_a('apple')\n", + " 1\n", + " >>> count_char_a('cherry')\n", + " 0\n", + " \"\"\"\n", + " count = 0\n", + " for char in text:\n", + " if char == 'a':\n", + " count += 1\n", + " return count\n", + "\n" + ] + } + ], + "source": [ + "@Template.define\n", + "def count_char(char: str) -> Callable[[str], int]:\n", + " \"\"\"Write a function named count_char which takes a string and counts the occurrances of '{char}'. Do not use any tools.\n", + "\n", + " Examples:\n", + " >>> count_char(\"banana\")\n", + " 3\n", + " >>> count_char(\"cherry\")\n", + " 0\n", + " \"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "# Use UnsafeEvalProvider for simple examples; RestrictedEvalProvider may need extra globals.\n", + "with handler(provider), handler(UnsafeEvalProvider()), handler(DoctestHandler()):\n", + " count_a = count_char(\"a\")\n", + " assert callable(count_a)\n", + " assert count_a(\"banana\") == 3\n", + " assert count_a(\"cherry\") == 0\n", + " # Print the source code of the generated function\n", + " print(inspect.getsource(count_a))" + ] + }, + { + "cell_type": "markdown", + "id": "0b6a7b48", + "metadata": {}, + "source": [ + "### Doctest Feedback\n", + "\n", + "Write doctests in the template docstring for callable synthesis. The docstring is formatted with the call arguments, and the doctests are executed during decoding." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "793b12a5", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "id": "17668ac8", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> You are onstage at a comedy club. You tell the following joke:\n", - "Knock knock.\n", - "Who's there?\n", - "Lizard.\n", - "Lizard who?\n", - "Lizard who? Lizard you wonder, there's a gecko at your door!\n", - "> The crowd laughs politely.\n" - ] - } - ], - "source": [ - "@dataclasses.dataclass\n", - "class KnockKnockJoke:\n", - " whos_there: str\n", - " punchline: str\n", - "\n", - "\n", - "@Template.define\n", - "def write_joke(theme: str) -> KnockKnockJoke:\n", - " \"\"\"Write a knock-knock joke on the theme of {theme}. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "@Template.define\n", - "def rate_joke(joke: KnockKnockJoke) -> bool:\n", - " \"\"\"Decide if {joke} is funny or not. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "def do_comedy():\n", - " joke = write_joke(\"lizards\")\n", - " print(\"> You are onstage at a comedy club. You tell the following joke:\")\n", - " print(\n", - " f\"Knock knock.\\nWho's there?\\n{joke.whos_there}.\\n{joke.whos_there} who?\\n{joke.punchline}\"\n", - " )\n", - " if rate_joke(joke):\n", - " print(\"> The crowd laughs politely.\")\n", - " else:\n", - " print(\"> The crowd stares in stony silence.\")\n", - "\n", - "\n", - "with handler(provider):\n", - " do_comedy()" - ] - }, + "ename": "ResultDecodingError", + "evalue": "Error decoding response: mypy type check failed:\n:170: \u001b[1m\u001b[31merror:\u001b[m Name \u001b[m\u001b[1m\"count_char\"\u001b[m already defined on line 159 \u001b[m\u001b[33m[no-redef]\u001b[m\n\nimport collections\nimport collections.abc\nimport IPython\nimport IPython.core\nimport IPython.core.alias\nimport IPython.core.application\nimport IPython.core.async_helpers\nimport IPython.core.autocall\nimport IPython.core.builtin_trap\nimport IPython.core.compilerop\nimport IPython.core.completer\nimport IPython.core.completerlib\nimport IPython.core.crashhandler\nimport IPython.core.debugger\nimport IPython.core.debugger_backport\nimport IPython.core.display\nimport IPython.core.display_functions\nimport IPython.core.display_trap\nimport IPython.core.displayhook\nimport IPython.core.displaypub\nimport IPython.core.doctb\nimport IPython.core.error\nimport IPython.core.events\nimport IPython.core.extensions\nimport IPython.core.formatters\nimport IPython.core.getipython\nimport IPython.core.guarded_eval\nimport IPython.core.history\nimport IPython.core.hooks\nimport IPython.core.inputtransformer2\nimport IPython.core.interactiveshell\nimport IPython.core.latex_symbols\nimport IPython.core.logger\nimport IPython.core.macro\nimport IPython.core.magic\nimport IPython.core.magic_arguments\nimport IPython.core.magics\nimport IPython.core.magics.ast_mod\nimport IPython.core.magics.auto\nimport IPython.core.magics.basic\nimport IPython.core.magics.code\nimport IPython.core.magics.config\nimport IPython.core.magics.display\nimport IPython.core.magics.execution\nimport IPython.core.magics.extension\nimport IPython.core.magics.history\nimport IPython.core.magics.logging\nimport IPython.core.magics.namespace\nimport IPython.core.magics.osm\nimport IPython.core.magics.packaging\nimport IPython.core.magics.pylab\nimport IPython.core.magics.script\nimport IPython.core.oinspect\nimport IPython.core.page\nimport IPython.core.payload\nimport IPython.core.prefilter\nimport IPython.core.profiledir\nimport IPython.core.release\nimport IPython.core.shellapp\nimport IPython.core.splitinput\nimport IPython.core.tbtools\nimport IPython.core.tips\nimport IPython.core.ultratb\nimport IPython.core.usage\nimport IPython.display\nimport IPython.extensions\nimport IPython.extensions.storemagic\nimport IPython.external\nimport IPython.external.pickleshare\nimport IPython.lib\nimport IPython.lib.clipboard\nimport IPython.lib.display\nimport IPython.lib.pretty\nimport IPython.paths\nimport IPython.terminal\nimport IPython.terminal.debugger\nimport IPython.terminal.embed\nimport IPython.terminal.interactiveshell\nimport IPython.terminal.ipapp\nimport IPython.terminal.magics\nimport IPython.terminal.prompts\nimport IPython.terminal.pt_inputhooks\nimport IPython.terminal.ptutils\nimport IPython.terminal.shortcuts\nimport IPython.terminal.shortcuts.auto_match\nimport IPython.terminal.shortcuts.auto_suggest\nimport IPython.terminal.shortcuts.filters\nimport IPython.testing\nimport IPython.testing.skipdoctest\nimport IPython.utils\nimport IPython.utils.PyColorize\nimport IPython.utils._process_common\nimport IPython.utils._process_posix\nimport IPython.utils._sysinfo\nimport IPython.utils.capture\nimport IPython.utils.contexts\nimport IPython.utils.data\nimport IPython.utils.decorators\nimport IPython.utils.dir2\nimport IPython.utils.docs\nimport IPython.utils.encoding\nimport IPython.utils.frame\nimport IPython.utils.generics\nimport IPython.utils.importstring\nimport IPython.utils.io\nimport IPython.utils.ipstruct\nimport IPython.utils.module_paths\nimport IPython.utils.openpy\nimport IPython.utils.path\nimport IPython.utils.process\nimport IPython.utils.py3compat\nimport IPython.utils.sentinel\nimport IPython.utils.strdispatch\nimport IPython.utils.sysinfo\nimport IPython.utils.syspathcontext\nimport IPython.utils.terminal\nimport IPython.utils.text\nimport IPython.utils.timing\nimport IPython.utils.tokenutil\nimport IPython.utils.wildcard\nimport collections\nimport collections.abc\nimport effectful\nimport effectful.handlers\nimport effectful.handlers.llm\nimport effectful.handlers.llm.completions\nimport effectful.handlers.llm.encoding\nimport effectful.handlers.llm.evaluation\nimport effectful.handlers.llm.template\nimport effectful.internals\nimport effectful.internals.runtime\nimport effectful.internals.unification\nimport effectful.ops\nimport effectful.ops.semantics\nimport effectful.ops.syntax\nimport effectful.ops.types\nimport pathlib\nIn: collections.abc.MutableSequence[str]\nOut: dict\n_: str\n__: str\n___: str\n__vsc_ipynb_file__: str\n_dh: collections.abc.MutableSequence[pathlib.PosixPath]\n_i: str\n_i1: str\n_i2: str\n_i3: str\n_i4: str\n_i5: str\n_i6: str\n_i7: str\n_ih: collections.abc.MutableSequence[str]\n_ii: str\n_iii: str\n_oh: dict\nchar: str\ncount_a: collections.abc.Callable[[str], int]\ncount_char: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\ncount_char_with_doctest: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\nexit: IPython.core.autocall.ZMQExitAutocall\nhaiku: collections.abc.Callable[[str], str]\nhaiku_no_cache: effectful.ops.types.Operation[[str], str]\nlimerick: effectful.ops.types.Operation[[str], str]\nprimes: effectful.ops.types.Operation[[int], int]\nprovider: effectful.ops.types.Interpretation\nprovider_cached: effectful.ops.types.Interpretation\nquit: IPython.core.autocall.ZMQExitAutocall\n\ndef count_char(s: str) -> int:\n \"\"\"\n Count the occurrences of the character 'a' in the given string.\n\n Args:\n s (str): The string in which to count occurrences of 'a'.\n\n Returns:\n int: The number of times 'a' appears in the string.\n\n Examples:\n >>> count_char('banana')\n 3\n >>> count_char('apple')\n 1\n >>> count_char('cherry')\n 0\n \"\"\"\n return s.count('a')\n_synthesized_check: collections.abc.Callable[[str], int] = count_char. Please provide a valid response and try again.", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mTypeError\u001b[39m Traceback (most recent call last)", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/completions.py:302\u001b[39m, in \u001b[36mcall_assistant\u001b[39m\u001b[34m(tools, response_format, model, **kwargs)\u001b[39m\n\u001b[32m 301\u001b[39m raw_result = response_model.model_validate_json(serialized_result)\n\u001b[32m--> \u001b[39m\u001b[32m302\u001b[39m result = \u001b[43mresponse_format\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mraw_result\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[32m 303\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (pydantic.ValidationError, \u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mSyntaxError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/encoding.py:452\u001b[39m, in \u001b[36mCallableEncodable.decode\u001b[39m\u001b[34m(self, encoded_value)\u001b[39m\n\u001b[32m 451\u001b[39m \u001b[38;5;66;03m# Type-check with mypy; pass original module_code so mypy sees exact source\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m452\u001b[39m \u001b[43mevaluation\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtype_check\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 453\u001b[39m \u001b[43m \u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mctx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexpected_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexpected_return\u001b[49m\n\u001b[32m 454\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 456\u001b[39m \u001b[38;5;66;03m# Compile and execute\u001b[39;00m\n\u001b[32m 457\u001b[39m \u001b[38;5;66;03m# https://docs.python.org/3/library/functions.html#exec\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:494\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 493\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m self_handler \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m494\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mself_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 495\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m args \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(args[\u001b[32m0\u001b[39m], Operation) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m args[\u001b[32m0\u001b[39m].__apply__:\n\u001b[32m 496\u001b[39m \u001b[38;5;66;03m# Prevent infinite recursion when calling self.apply directly\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/internals/runtime.py:70\u001b[39m, in \u001b[36m_set_prompt..bound_body\u001b[39m\u001b[34m(*a, **k)\u001b[39m\n\u001b[32m 69\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler({prompt: handler({prompt: next_cont})(cont)}):\n\u001b[32m---> \u001b[39m\u001b[32m70\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbody\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/internals/runtime.py:56\u001b[39m, in \u001b[36m_save_args.._cont_wrapper\u001b[39m\u001b[34m(*a, **k)\u001b[39m\n\u001b[32m 55\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler({_get_args: \u001b[38;5;28;01mlambda\u001b[39;00m: (a, k)}):\n\u001b[32m---> \u001b[39m\u001b[32m56\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/evaluation.py:628\u001b[39m, in \u001b[36mUnsafeEvalProvider.type_check\u001b[39m\u001b[34m(self, module, ctx, expected_params, expected_return)\u001b[39m\n\u001b[32m 620\u001b[39m \u001b[38;5;129m@implements\u001b[39m(type_check)\n\u001b[32m 621\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mtype_check\u001b[39m(\n\u001b[32m 622\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 626\u001b[39m expected_return: \u001b[38;5;28mtype\u001b[39m,\n\u001b[32m 627\u001b[39m ) -> \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m628\u001b[39m \u001b[43mmypy_type_check\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mctx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexpected_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexpected_return\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/evaluation.py:586\u001b[39m, in \u001b[36mmypy_type_check\u001b[39m\u001b[34m(module, ctx, expected_params, expected_return)\u001b[39m\n\u001b[32m 585\u001b[39m report = (stdout \u001b[38;5;129;01mor\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m) + (stderr \u001b[38;5;129;01mor\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m586\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mmypy type check failed:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mreport\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00msource\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 587\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "\u001b[31mTypeError\u001b[39m: mypy type check failed:\n:170: \u001b[1m\u001b[31merror:\u001b[m Name \u001b[m\u001b[1m\"count_char\"\u001b[m already defined on line 159 \u001b[m\u001b[33m[no-redef]\u001b[m\n\nimport collections\nimport collections.abc\nimport IPython\nimport IPython.core\nimport IPython.core.alias\nimport IPython.core.application\nimport IPython.core.async_helpers\nimport IPython.core.autocall\nimport IPython.core.builtin_trap\nimport IPython.core.compilerop\nimport IPython.core.completer\nimport IPython.core.completerlib\nimport IPython.core.crashhandler\nimport IPython.core.debugger\nimport IPython.core.debugger_backport\nimport IPython.core.display\nimport IPython.core.display_functions\nimport IPython.core.display_trap\nimport IPython.core.displayhook\nimport IPython.core.displaypub\nimport IPython.core.doctb\nimport IPython.core.error\nimport IPython.core.events\nimport IPython.core.extensions\nimport IPython.core.formatters\nimport IPython.core.getipython\nimport IPython.core.guarded_eval\nimport IPython.core.history\nimport IPython.core.hooks\nimport IPython.core.inputtransformer2\nimport IPython.core.interactiveshell\nimport IPython.core.latex_symbols\nimport IPython.core.logger\nimport IPython.core.macro\nimport IPython.core.magic\nimport IPython.core.magic_arguments\nimport IPython.core.magics\nimport IPython.core.magics.ast_mod\nimport IPython.core.magics.auto\nimport IPython.core.magics.basic\nimport IPython.core.magics.code\nimport IPython.core.magics.config\nimport IPython.core.magics.display\nimport IPython.core.magics.execution\nimport IPython.core.magics.extension\nimport IPython.core.magics.history\nimport IPython.core.magics.logging\nimport IPython.core.magics.namespace\nimport IPython.core.magics.osm\nimport IPython.core.magics.packaging\nimport IPython.core.magics.pylab\nimport IPython.core.magics.script\nimport IPython.core.oinspect\nimport IPython.core.page\nimport IPython.core.payload\nimport IPython.core.prefilter\nimport IPython.core.profiledir\nimport IPython.core.release\nimport IPython.core.shellapp\nimport IPython.core.splitinput\nimport IPython.core.tbtools\nimport IPython.core.tips\nimport IPython.core.ultratb\nimport IPython.core.usage\nimport IPython.display\nimport IPython.extensions\nimport IPython.extensions.storemagic\nimport IPython.external\nimport IPython.external.pickleshare\nimport IPython.lib\nimport IPython.lib.clipboard\nimport IPython.lib.display\nimport IPython.lib.pretty\nimport IPython.paths\nimport IPython.terminal\nimport IPython.terminal.debugger\nimport IPython.terminal.embed\nimport IPython.terminal.interactiveshell\nimport IPython.terminal.ipapp\nimport IPython.terminal.magics\nimport IPython.terminal.prompts\nimport IPython.terminal.pt_inputhooks\nimport IPython.terminal.ptutils\nimport IPython.terminal.shortcuts\nimport IPython.terminal.shortcuts.auto_match\nimport IPython.terminal.shortcuts.auto_suggest\nimport IPython.terminal.shortcuts.filters\nimport IPython.testing\nimport IPython.testing.skipdoctest\nimport IPython.utils\nimport IPython.utils.PyColorize\nimport IPython.utils._process_common\nimport IPython.utils._process_posix\nimport IPython.utils._sysinfo\nimport IPython.utils.capture\nimport IPython.utils.contexts\nimport IPython.utils.data\nimport IPython.utils.decorators\nimport IPython.utils.dir2\nimport IPython.utils.docs\nimport IPython.utils.encoding\nimport IPython.utils.frame\nimport IPython.utils.generics\nimport IPython.utils.importstring\nimport IPython.utils.io\nimport IPython.utils.ipstruct\nimport IPython.utils.module_paths\nimport IPython.utils.openpy\nimport IPython.utils.path\nimport IPython.utils.process\nimport IPython.utils.py3compat\nimport IPython.utils.sentinel\nimport IPython.utils.strdispatch\nimport IPython.utils.sysinfo\nimport IPython.utils.syspathcontext\nimport IPython.utils.terminal\nimport IPython.utils.text\nimport IPython.utils.timing\nimport IPython.utils.tokenutil\nimport IPython.utils.wildcard\nimport collections\nimport collections.abc\nimport effectful\nimport effectful.handlers\nimport effectful.handlers.llm\nimport effectful.handlers.llm.completions\nimport effectful.handlers.llm.encoding\nimport effectful.handlers.llm.evaluation\nimport effectful.handlers.llm.template\nimport effectful.internals\nimport effectful.internals.runtime\nimport effectful.internals.unification\nimport effectful.ops\nimport effectful.ops.semantics\nimport effectful.ops.syntax\nimport effectful.ops.types\nimport pathlib\nIn: collections.abc.MutableSequence[str]\nOut: dict\n_: str\n__: str\n___: str\n__vsc_ipynb_file__: str\n_dh: collections.abc.MutableSequence[pathlib.PosixPath]\n_i: str\n_i1: str\n_i2: str\n_i3: str\n_i4: str\n_i5: str\n_i6: str\n_i7: str\n_ih: collections.abc.MutableSequence[str]\n_ii: str\n_iii: str\n_oh: dict\nchar: str\ncount_a: collections.abc.Callable[[str], int]\ncount_char: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\ncount_char_with_doctest: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\nexit: IPython.core.autocall.ZMQExitAutocall\nhaiku: collections.abc.Callable[[str], str]\nhaiku_no_cache: effectful.ops.types.Operation[[str], str]\nlimerick: effectful.ops.types.Operation[[str], str]\nprimes: effectful.ops.types.Operation[[int], int]\nprovider: effectful.ops.types.Interpretation\nprovider_cached: effectful.ops.types.Interpretation\nquit: IPython.core.autocall.ZMQExitAutocall\n\ndef count_char(s: str) -> int:\n \"\"\"\n Count the occurrences of the character 'a' in the given string.\n\n Args:\n s (str): The string in which to count occurrences of 'a'.\n\n Returns:\n int: The number of times 'a' appears in the string.\n\n Examples:\n >>> count_char('banana')\n 3\n >>> count_char('apple')\n 1\n >>> count_char('cherry')\n 0\n \"\"\"\n return s.count('a')\n_synthesized_check: collections.abc.Callable[[str], int] = count_char", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[31mResultDecodingError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 10\u001b[39m\n\u001b[32m 6\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m NotHandled\n\u001b[32m 9\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler(provider), handler(UnsafeEvalProvider()):\n\u001b[32m---> \u001b[39m\u001b[32m10\u001b[39m count_a = \u001b[43mcount_char_with_doctest\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43ma\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 11\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m count_a(\u001b[33m\"\u001b[39m\u001b[33mbanana\u001b[39m\u001b[33m\"\u001b[39m) == \u001b[32m3\u001b[39m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:499\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__default__(*args, **kwargs)\n\u001b[32m 498\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m499\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__apply__\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:494\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 492\u001b[39m self_handler = intp.get(\u001b[38;5;28mself\u001b[39m)\n\u001b[32m 493\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m self_handler \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m494\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mself_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 495\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m args \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(args[\u001b[32m0\u001b[39m], Operation) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m args[\u001b[32m0\u001b[39m].__apply__:\n\u001b[32m 496\u001b[39m \u001b[38;5;66;03m# Prevent infinite recursion when calling self.apply directly\u001b[39;00m\n\u001b[32m 497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__default__(*args, **kwargs)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/internals/runtime.py:70\u001b[39m, in \u001b[36m_set_prompt..bound_body\u001b[39m\u001b[34m(*a, **k)\u001b[39m\n\u001b[32m 68\u001b[39m next_cont = get_interpretation().get(prompt, prompt.__default_rule__)\n\u001b[32m 69\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler({prompt: handler({prompt: next_cont})(cont)}):\n\u001b[32m---> \u001b[39m\u001b[32m70\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbody\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/internals/runtime.py:56\u001b[39m, in \u001b[36m_save_args.._cont_wrapper\u001b[39m\u001b[34m(*a, **k)\u001b[39m\n\u001b[32m 53\u001b[39m \u001b[38;5;129m@functools\u001b[39m.wraps(fn)\n\u001b[32m 54\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_cont_wrapper\u001b[39m(*a: P.args, **k: P.kwargs) -> T:\n\u001b[32m 55\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler({_get_args: \u001b[38;5;28;01mlambda\u001b[39;00m: (a, k)}):\n\u001b[32m---> \u001b[39m\u001b[32m56\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/completions.py:521\u001b[39m, in \u001b[36mLiteLLMProvider._call\u001b[39m\u001b[34m(self, template, *args, **kwargs)\u001b[39m\n\u001b[32m 519\u001b[39m result: T | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 520\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m message[\u001b[33m\"\u001b[39m\u001b[33mrole\u001b[39m\u001b[33m\"\u001b[39m] != \u001b[33m\"\u001b[39m\u001b[33massistant\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m tool_calls:\n\u001b[32m--> \u001b[39m\u001b[32m521\u001b[39m message, tool_calls, result = \u001b[43mcall_assistant\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 522\u001b[39m \u001b[43m \u001b[49m\u001b[43mtemplate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_model\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mconfig\u001b[49m\n\u001b[32m 523\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 524\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m tool_call \u001b[38;5;129;01min\u001b[39;00m tool_calls:\n\u001b[32m 525\u001b[39m message = call_tool(tool_call)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:499\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__default__(*args, **kwargs)\n\u001b[32m 498\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m499\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__apply__\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:497\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 494\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m self_handler(*args, **kwargs)\n\u001b[32m 495\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m args \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(args[\u001b[32m0\u001b[39m], Operation) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m args[\u001b[32m0\u001b[39m].__apply__:\n\u001b[32m 496\u001b[39m \u001b[38;5;66;03m# Prevent infinite recursion when calling self.apply directly\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__default__\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 498\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 499\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__apply__(\u001b[38;5;28mself\u001b[39m, *args, **kwargs)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:548\u001b[39m, in \u001b[36m__apply__\u001b[39m\u001b[34m(op, *args, **kwargs)\u001b[39m\n\u001b[32m 519\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__apply__\u001b[39m[**A, B](op: Operation[A, B], *args: A.args, **kwargs: A.kwargs) -> B:\n\u001b[32m 520\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Apply ``op`` to ``args``, ``kwargs`` in interpretation ``intp``.\u001b[39;00m\n\u001b[32m 521\u001b[39m \n\u001b[32m 522\u001b[39m \u001b[33;03m Handling :func:`Operation.__apply__` changes the evaluation strategy of terms.\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 546\u001b[39m \n\u001b[32m 547\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m548\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mop\u001b[49m\u001b[43m.\u001b[49m\u001b[43m__default_rule__\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:350\u001b[39m, in \u001b[36mOperation.__default_rule__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 345\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"The default rule is used when the operation is not handled.\u001b[39;00m\n\u001b[32m 346\u001b[39m \n\u001b[32m 347\u001b[39m \u001b[33;03mIf no default rule is supplied, the free rule is used instead.\u001b[39;00m\n\u001b[32m 348\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 349\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m350\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__default__\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 351\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m NotHandled:\n\u001b[32m 352\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01meffectful\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mops\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01msyntax\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m defdata\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/completions.py:304\u001b[39m, in \u001b[36mcall_assistant\u001b[39m\u001b[34m(tools, response_format, model, **kwargs)\u001b[39m\n\u001b[32m 302\u001b[39m result = response_format.decode(raw_result.value) \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[32m 303\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (pydantic.ValidationError, \u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mSyntaxError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m--> \u001b[39m\u001b[32m304\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m ResultDecodingError(e, raw_message=raw_message) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01me\u001b[39;00m\n\u001b[32m 306\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m (raw_message, tool_calls, result)\n", + "\u001b[31mResultDecodingError\u001b[39m: Error decoding response: mypy type check failed:\n:170: \u001b[1m\u001b[31merror:\u001b[m Name \u001b[m\u001b[1m\"count_char\"\u001b[m already defined on line 159 \u001b[m\u001b[33m[no-redef]\u001b[m\n\nimport collections\nimport collections.abc\nimport IPython\nimport IPython.core\nimport IPython.core.alias\nimport IPython.core.application\nimport IPython.core.async_helpers\nimport IPython.core.autocall\nimport IPython.core.builtin_trap\nimport IPython.core.compilerop\nimport IPython.core.completer\nimport IPython.core.completerlib\nimport IPython.core.crashhandler\nimport IPython.core.debugger\nimport IPython.core.debugger_backport\nimport IPython.core.display\nimport IPython.core.display_functions\nimport IPython.core.display_trap\nimport IPython.core.displayhook\nimport IPython.core.displaypub\nimport IPython.core.doctb\nimport IPython.core.error\nimport IPython.core.events\nimport IPython.core.extensions\nimport IPython.core.formatters\nimport IPython.core.getipython\nimport IPython.core.guarded_eval\nimport IPython.core.history\nimport IPython.core.hooks\nimport IPython.core.inputtransformer2\nimport IPython.core.interactiveshell\nimport IPython.core.latex_symbols\nimport IPython.core.logger\nimport IPython.core.macro\nimport IPython.core.magic\nimport IPython.core.magic_arguments\nimport IPython.core.magics\nimport IPython.core.magics.ast_mod\nimport IPython.core.magics.auto\nimport IPython.core.magics.basic\nimport IPython.core.magics.code\nimport IPython.core.magics.config\nimport IPython.core.magics.display\nimport IPython.core.magics.execution\nimport IPython.core.magics.extension\nimport IPython.core.magics.history\nimport IPython.core.magics.logging\nimport IPython.core.magics.namespace\nimport IPython.core.magics.osm\nimport IPython.core.magics.packaging\nimport IPython.core.magics.pylab\nimport IPython.core.magics.script\nimport IPython.core.oinspect\nimport IPython.core.page\nimport IPython.core.payload\nimport IPython.core.prefilter\nimport IPython.core.profiledir\nimport IPython.core.release\nimport IPython.core.shellapp\nimport IPython.core.splitinput\nimport IPython.core.tbtools\nimport IPython.core.tips\nimport IPython.core.ultratb\nimport IPython.core.usage\nimport IPython.display\nimport IPython.extensions\nimport IPython.extensions.storemagic\nimport IPython.external\nimport IPython.external.pickleshare\nimport IPython.lib\nimport IPython.lib.clipboard\nimport IPython.lib.display\nimport IPython.lib.pretty\nimport IPython.paths\nimport IPython.terminal\nimport IPython.terminal.debugger\nimport IPython.terminal.embed\nimport IPython.terminal.interactiveshell\nimport IPython.terminal.ipapp\nimport IPython.terminal.magics\nimport IPython.terminal.prompts\nimport IPython.terminal.pt_inputhooks\nimport IPython.terminal.ptutils\nimport IPython.terminal.shortcuts\nimport IPython.terminal.shortcuts.auto_match\nimport IPython.terminal.shortcuts.auto_suggest\nimport IPython.terminal.shortcuts.filters\nimport IPython.testing\nimport IPython.testing.skipdoctest\nimport IPython.utils\nimport IPython.utils.PyColorize\nimport IPython.utils._process_common\nimport IPython.utils._process_posix\nimport IPython.utils._sysinfo\nimport IPython.utils.capture\nimport IPython.utils.contexts\nimport IPython.utils.data\nimport IPython.utils.decorators\nimport IPython.utils.dir2\nimport IPython.utils.docs\nimport IPython.utils.encoding\nimport IPython.utils.frame\nimport IPython.utils.generics\nimport IPython.utils.importstring\nimport IPython.utils.io\nimport IPython.utils.ipstruct\nimport IPython.utils.module_paths\nimport IPython.utils.openpy\nimport IPython.utils.path\nimport IPython.utils.process\nimport IPython.utils.py3compat\nimport IPython.utils.sentinel\nimport IPython.utils.strdispatch\nimport IPython.utils.sysinfo\nimport IPython.utils.syspathcontext\nimport IPython.utils.terminal\nimport IPython.utils.text\nimport IPython.utils.timing\nimport IPython.utils.tokenutil\nimport IPython.utils.wildcard\nimport collections\nimport collections.abc\nimport effectful\nimport effectful.handlers\nimport effectful.handlers.llm\nimport effectful.handlers.llm.completions\nimport effectful.handlers.llm.encoding\nimport effectful.handlers.llm.evaluation\nimport effectful.handlers.llm.template\nimport effectful.internals\nimport effectful.internals.runtime\nimport effectful.internals.unification\nimport effectful.ops\nimport effectful.ops.semantics\nimport effectful.ops.syntax\nimport effectful.ops.types\nimport pathlib\nIn: collections.abc.MutableSequence[str]\nOut: dict\n_: str\n__: str\n___: str\n__vsc_ipynb_file__: str\n_dh: collections.abc.MutableSequence[pathlib.PosixPath]\n_i: str\n_i1: str\n_i2: str\n_i3: str\n_i4: str\n_i5: str\n_i6: str\n_i7: str\n_ih: collections.abc.MutableSequence[str]\n_ii: str\n_iii: str\n_oh: dict\nchar: str\ncount_a: collections.abc.Callable[[str], int]\ncount_char: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\ncount_char_with_doctest: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\nexit: IPython.core.autocall.ZMQExitAutocall\nhaiku: collections.abc.Callable[[str], str]\nhaiku_no_cache: effectful.ops.types.Operation[[str], str]\nlimerick: effectful.ops.types.Operation[[str], str]\nprimes: effectful.ops.types.Operation[[int], int]\nprovider: effectful.ops.types.Interpretation\nprovider_cached: effectful.ops.types.Interpretation\nquit: IPython.core.autocall.ZMQExitAutocall\n\ndef count_char(s: str) -> int:\n \"\"\"\n Count the occurrences of the character 'a' in the given string.\n\n Args:\n s (str): The string in which to count occurrences of 'a'.\n\n Returns:\n int: The number of times 'a' appears in the string.\n\n Examples:\n >>> count_char('banana')\n 3\n >>> count_char('apple')\n 1\n >>> count_char('cherry')\n 0\n \"\"\"\n return s.count('a')\n_synthesized_check: collections.abc.Callable[[str], int] = count_char. Please provide a valid response and try again." + ] + } + ], + "source": [ + "@Template.define\n", + "def count_char_with_doctest(char: str) -> Callable[[str], int]:\n", + " \"\"\"Write a function named count_char that counts the occurrances of '{char}'.\n", + " Do not use any tools.\n", + " \"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "with handler(provider), handler(UnsafeEvalProvider()), handler(DoctestHandler()):\n", + " count_a = count_char_with_doctest(\"a\")\n", + " assert count_a(\"banana\") == 3" + ] + }, + { + "cell_type": "markdown", + "id": "991ee445", + "metadata": {}, + "source": [ + "## Tool Calling\n", + "\n", + "`Operation`s defined in the lexical scope of a `Template` are automatically available for the LLM to call as tools. The description of these operations is inferred from their type annotations and docstrings.\n", + "\n", + "Tool calls are mediated by a helper operation `tool_call`. Handling this operation allows tool use to be tracked or logged." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66711301", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "c0003944", - "metadata": {}, - "source": [ - "## Template Composition\n", - "\n", - "Templates defined in the lexical scope are also captured, enabling template composition. One template can use the result of another template in a pipeline:\n" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Based on the weather conditions:\n", + "\n", + "- **Chicago** is currently cold.\n", + "- **New York** is currently wet.\n", + "- **Barcelona** is currently sunny.\n", + "\n", + "I suggest **Barcelona** as the city with good weather.\n" + ] + } + ], + "source": [ + "@Tool.define\n", + "def cities() -> list[str]:\n", + " \"\"\"Return a list of cities that can be passed to `weather`.\"\"\"\n", + " return [\"Chicago\", \"New York\", \"Barcelona\"]\n", + "\n", + "\n", + "@Tool.define\n", + "def weather(city: str) -> str:\n", + " \"\"\"Given a city name, return a description of the weather in that city.\"\"\"\n", + " status = {\"Chicago\": \"cold\", \"New York\": \"wet\", \"Barcelona\": \"sunny\"}\n", + " return status.get(city, \"unknown\")\n", + "\n", + "\n", + "@Template.define # cities and weather auto-captured from lexical scope\n", + "def vacation() -> str:\n", + " \"\"\"Use the provided tools to suggest a city that has good weather. Use only the `cities` and `weather` tools provided.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "with handler(provider):\n", + " print(vacation())" + ] + }, + { + "cell_type": "markdown", + "id": "59584a54", + "metadata": {}, + "source": [ + "## Image Inputs\n", + "\n", + "You can pass `PIL.Image.Image` values directly to templates." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89992702", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "id": "78a4bf44", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Sub-templates available to write_story: dict_keys(['describe_image', 'draw_simple_icon', 'limerick', 'haiku_no_cache', 'primes', 'count_char', 'cities', 'weather', 'vacation', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny'])\n", - "=== Story with moral ===\n", - "\n", - "\n", - "In the case of Whiskers, it was his understanding of this balance that brought him safely home, with both stories and lessons to cherish and share.\n", - "\n", - "=== Funny story ===\n", - "\n", - "\n", - "The End.\n" - ] - } + "data": { + "text/html": [ + "\"Example" ], - "source": [ - "# Sub-templates for different story styles\n", - "@Template.define\n", - "def story_with_moral(topic: str) -> str:\n", - " \"\"\"Write a short story about {topic} and end with a moral lesson. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "@Template.define\n", - "def story_funny(topic: str) -> str:\n", - " \"\"\"Write a funny, humorous story about {topic}. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "# Main orchestrator template - has access to sub-templates\n", - "@Template.define\n", - "def write_story(topic: str, style: str) -> str:\n", - " \"\"\"Write a story about {topic} in the style: {style}.\n", - " Available styles: 'moral' for a story with a lesson, 'funny' for humor. Use story_funny for humor, story_with_moral for a story with a lesson.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "# Verify sub-templates are captured in write_story's lexical context\n", - "assert story_with_moral in write_story.tools.values()\n", - "assert story_funny in write_story.tools.values()\n", - "print(\"Sub-templates available to write_story:\", write_story.tools.keys())\n", - "\n", - "with handler(provider):\n", - " print(\"=== Story with moral ===\")\n", - " print(write_story(\"a curious cat\", \"moral\"))\n", - " print()\n", - " print(\"=== Funny story ===\")\n", - " print(write_story(\"a curious cat\", \"funny\"))" + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "id": "bd25826d", - "metadata": {}, - "source": [ - "## Retrying LLM Requests\n", - "LLM calls can sometimes fail due to transient errors or produce invalid outputs. The `RetryLLMHandler` automatically retries failed template calls and can also surface tool/runtime errors as tool messages:\n", - "\n", - "- `num_retries`: Maximum number of retry attempts (default: 3)\n", - "- `include_traceback`: When `True`, include traceback details in the error feedback (default: False)\n", - "- `catch_tool_errors`: Exception type(s) to catch during tool execution (default: `Exception`)\n" - ] - }, - { - "cell_type": "markdown", - "id": "bafc0a96", - "metadata": {}, - "source": [ - "Example usage: having an unstable service that seldomly fail." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4334d07a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Error: Service unavailable! Attempt 1/3. Please retry.\n", - "Result: The data fetched from the unstable service is: `[1, 2, 3]`. Retries: 3\n" - ] - } - ], - "source": [ - "call_count = 0\n", - "REQUIRED_RETRIES = 3\n", - "\n", - "\n", - "@Tool.define\n", - "def unstable_service() -> str:\n", - " \"\"\"Fetch data from an unstable external service. May require retries.\"\"\"\n", - " global call_count\n", - " call_count += 1\n", - " if call_count < REQUIRED_RETRIES:\n", - " raise ConnectionError(\n", - " f\"Service unavailable! Attempt {call_count}/{REQUIRED_RETRIES}. Please retry.\"\n", - " )\n", - " return \"{ 'status': 'ok', 'data': [1, 2, 3] }\"\n", - "\n", - "\n", - "@Template.define # unstable_service auto-captured from lexical scope\n", - "def fetch_data() -> str:\n", - " \"\"\"Use the unstable_service tool to fetch data.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "with handler(provider):\n", - " try:\n", - " result = fetch_data()\n", - " except Exception as e:\n", - " print(f\"Error: {e}\")\n", - "\n", - "with handler(provider), handler(RetryLLMHandler(num_retries=3)):\n", - " result = fetch_data()\n", - " print(f\"Result: {result}\", \"Retries:\", call_count)" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "A simple smiley face with a yellow background, featuring two black dots for eyes and a curved line for a mouth, typically used to convey happiness or friendliness.\n" + ] + } + ], + "source": [ + "image_base64 = (\n", + " \"iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAhElEQVR4nO2W4QqA\"\n", + " \"MAiEVXr/VzYWDGoMdk7Cgrt/sUs/DqZTd3EplFU2JwATYAJMoOlAB4bq89s95+Mg\"\n", + " \"+gyAchsKAYplBBBA43hFhfxnUixDjdEUUL8hpr7R0KLdt9qElzcyiu8As+Kr8zQA\"\n", + " \"mgLavAl+kIzFZyCRxtsAmWb/voZvqRzgBE1sIDuVFX4eAAAAAElFTkSuQmCC\"\n", + ")\n", + "image = Image.open(io.BytesIO(base64.b64decode(image_base64)))\n", + "\n", + "\n", + "@Template.define\n", + "def describe_image(image: Image.Image) -> str:\n", + " \"\"\"Return a short description of the following image.\n", + " {image}\n", + " \"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "with handler(provider):\n", + " display(\n", + " HTML(\n", + " f'\"Example'\n", + " )\n", + " )\n", + " print(describe_image(image))" + ] + }, + { + "cell_type": "markdown", + "id": "3d221feb", + "metadata": {}, + "source": [ + "## Structured Output Generation\n", + "\n", + "Constrained generation is used for any type that is convertible to a Pydantic model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17668ac8", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "4ac00e01", - "metadata": {}, - "source": [ - "## Retrying with Validation Errors\n", - "As noted above, the `RetryHandler` can also be used to retry on runtime/validation error:" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "> You are onstage at a comedy club. You tell the following joke:\n", + "Knock knock.\n", + "Who's there?\n", + "Lizard.\n", + "Lizard who?\n", + "Lizard who? Lizard you wonder, there's a gecko at your door!\n", + "> The crowd laughs politely.\n" + ] + } + ], + "source": [ + "@dataclasses.dataclass\n", + "class KnockKnockJoke:\n", + " whos_there: str\n", + " punchline: str\n", + "\n", + "\n", + "@Template.define\n", + "def write_joke(theme: str) -> KnockKnockJoke:\n", + " \"\"\"Write a knock-knock joke on the theme of {theme}. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "@Template.define\n", + "def rate_joke(joke: KnockKnockJoke) -> bool:\n", + " \"\"\"Decide if {joke} is funny or not. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "def do_comedy():\n", + " joke = write_joke(\"lizards\")\n", + " print(\"> You are onstage at a comedy club. You tell the following joke:\")\n", + " print(\n", + " f\"Knock knock.\\nWho's there?\\n{joke.whos_there}.\\n{joke.whos_there} who?\\n{joke.punchline}\"\n", + " )\n", + " if rate_joke(joke):\n", + " print(\"> The crowd laughs politely.\")\n", + " else:\n", + " print(\"> The crowd stares in stony silence.\")\n", + "\n", + "\n", + "with handler(provider):\n", + " do_comedy()" + ] + }, + { + "cell_type": "markdown", + "id": "c0003944", + "metadata": {}, + "source": [ + "## Template Composition\n", + "\n", + "Templates defined in the lexical scope are also captured, enabling template composition. One template can use the result of another template in a pipeline:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78a4bf44", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "id": "39b2b225", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Error: Error decoding response: 1 validation error for Response\n", - "value.score\n", - " score must be 1–5, got 9 [type=invalid_score, input_value=9, input_type=int]. Please provide a valid response and try again.\n", - "Score: 5/5\n", - "Explanation: Die Hard is widely acclaimed as one of the best action films of all time and earns a perfect score of 5 out of 5. Its success is attributed to a gripping storyline, memorable performances, particularly by Bruce Willis as John McClane, and its innovative approach to action sequences. Its mix of humor, suspense, and holiday-themed backdrop makes it a perennial favorite, cementing its status as a cultural icon.\n" - ] - } - ], - "source": [ - "@pydantic.dataclasses.dataclass\n", - "class Rating:\n", - " score: int\n", - " explanation: str\n", - "\n", - " @field_validator(\"score\")\n", - " @classmethod\n", - " def check_score(cls, v):\n", - " if v < 1 or v > 5:\n", - " raise PydanticCustomError(\n", - " \"invalid_score\",\n", - " \"score must be 1–5, got {v}\",\n", - " {\"v\": v},\n", - " )\n", - " return v\n", - "\n", - " @field_validator(\"explanation\")\n", - " @classmethod\n", - " def check_explanation_contains_score(cls, v, info):\n", - " score = info.data.get(\"score\", None)\n", - " if score is not None and str(score) not in v:\n", - " raise PydanticCustomError(\n", - " \"invalid_explanation\",\n", - " \"explanation must mention the score {score}, got '{explanation}'\",\n", - " {\"score\": score, \"explanation\": v},\n", - " )\n", - " return v\n", - "\n", - "\n", - "@Template.define\n", - "def give_rating_for_movie(movie_name: str) -> Rating:\n", - " \"\"\"Give a rating for {movie_name}. The explanation MUST include the numeric score. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "with handler(provider):\n", - " try:\n", - " rating = give_rating_for_movie(\"Die Hard\")\n", - " except Exception as e:\n", - " print(f\"Error: {e}\")\n", - "\n", - "with handler(provider), handler(RetryLLMHandler(num_retries=3)):\n", - " rating = give_rating_for_movie(\"Die Hard\")\n", - " print(f\"Score: {rating.score}/5\")\n", - " print(f\"Explanation: {rating.explanation}\")" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Sub-templates available to write_story: dict_keys(['describe_image', 'draw_simple_icon', 'limerick', 'haiku_no_cache', 'primes', 'count_char', 'cities', 'weather', 'vacation', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny'])\n", + "=== Story with moral ===\n", + "\n", + "\n", + "In the case of Whiskers, it was his understanding of this balance that brought him safely home, with both stories and lessons to cherish and share.\n", + "\n", + "=== Funny story ===\n", + "\n", + "\n", + "The End.\n" + ] + } + ], + "source": [ + "# Sub-templates for different story styles\n", + "@Template.define\n", + "def story_with_moral(topic: str) -> str:\n", + " \"\"\"Write a short story about {topic} and end with a moral lesson. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "@Template.define\n", + "def story_funny(topic: str) -> str:\n", + " \"\"\"Write a funny, humorous story about {topic}. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "# Main orchestrator template - has access to sub-templates\n", + "@Template.define\n", + "def write_story(topic: str, style: str) -> str:\n", + " \"\"\"Write a story about {topic} in the style: {style}.\n", + " Available styles: 'moral' for a story with a lesson, 'funny' for humor. Use story_funny for humor, story_with_moral for a story with a lesson.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "# Verify sub-templates are captured in write_story's lexical context\n", + "assert story_with_moral in write_story.tools.values()\n", + "assert story_funny in write_story.tools.values()\n", + "print(\"Sub-templates available to write_story:\", write_story.tools.keys())\n", + "\n", + "with handler(provider):\n", + " print(\"=== Story with moral ===\")\n", + " print(write_story(\"a curious cat\", \"moral\"))\n", + " print()\n", + " print(\"=== Funny story ===\")\n", + " print(write_story(\"a curious cat\", \"funny\"))" + ] + }, + { + "cell_type": "markdown", + "id": "bd25826d", + "metadata": {}, + "source": [ + "## Retrying LLM Requests\n", + "LLM calls can sometimes fail due to transient errors or produce invalid outputs. The `RetryLLMHandler` automatically retries failed template calls and can also surface tool/runtime errors as tool messages:\n", + "\n", + "- `num_retries`: Maximum number of retry attempts (default: 3)\n", + "- `include_traceback`: When `True`, include traceback details in the error feedback (default: False)\n", + "- `catch_tool_errors`: Exception type(s) to catch during tool execution (default: `Exception`)\n" + ] + }, + { + "cell_type": "markdown", + "id": "bafc0a96", + "metadata": {}, + "source": [ + "Example usage: having an unstable service that seldomly fail." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4334d07a", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "aec0632c", - "metadata": {}, - "source": [ - "## Generating higher-order functions\n", - "Finally, we can generate higher-order functions that can call templates as well:" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Error: Service unavailable! Attempt 1/3. Please retry.\n", + "Result: The data fetched from the unstable service is: `[1, 2, 3]`. Retries: 3\n" + ] + } + ], + "source": [ + "call_count = 0\n", + "REQUIRED_RETRIES = 3\n", + "\n", + "\n", + "@Tool.define\n", + "def unstable_service() -> str:\n", + " \"\"\"Fetch data from an unstable external service. May require retries.\"\"\"\n", + " global call_count\n", + " call_count += 1\n", + " if call_count < REQUIRED_RETRIES:\n", + " raise ConnectionError(\n", + " f\"Service unavailable! Attempt {call_count}/{REQUIRED_RETRIES}. Please retry.\"\n", + " )\n", + " return \"{ 'status': 'ok', 'data': [1, 2, 3] }\"\n", + "\n", + "\n", + "@Template.define # unstable_service auto-captured from lexical scope\n", + "def fetch_data() -> str:\n", + " \"\"\"Use the unstable_service tool to fetch data.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "with handler(provider):\n", + " try:\n", + " result = fetch_data()\n", + " except Exception as e:\n", + " print(f\"Error: {e}\")\n", + "\n", + "with handler(provider), handler(RetryLLMHandler(num_retries=3)):\n", + " result = fetch_data()\n", + " print(f\"Result: {result}\", \"Retries:\", call_count)" + ] + }, + { + "cell_type": "markdown", + "id": "4ac00e01", + "metadata": {}, + "source": [ + "## Retrying with Validation Errors\n", + "As noted above, the `RetryHandler` can also be used to retry on runtime/validation error:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b2b225", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "id": "9d02bc67", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Sub-templates available to write_story: dict_keys(['describe_image', 'draw_simple_icon', 'limerick', 'haiku_no_cache', 'primes', 'count_char', 'cities', 'weather', 'vacation', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny', 'write_story', 'unstable_service', 'fetch_data', 'give_rating_for_movie', 'write_chapter', 'judge_chapter'])\n", - "=== Story with moral ===\n", - "def create_moral_story(topic: str) -> str:\n", - " # Start with an introduction and establish the story theme\n", - " chapter_1 = write_chapter(1, f\"Introduction to {topic}\")\n", - " \n", - " # Develop the plot with a challenge or situation related to the topic\n", - " chapter_2 = write_chapter(2, f\"The Challenge of {topic}\")\n", - " \n", - " # Introduce a turning point or decision-making moment involving the topic\n", - " chapter_3 = write_chapter(3, f\"Decisions and Consequences of {topic}\")\n", - " \n", - " # Conclusion wrapping up the story and highlighting the moral\n", - " chapter_4 = write_chapter(4, f\"Moral and Lessons of {topic}\")\n", - " \n", - " # Combine all chapters into one coherent story\n", - " full_story = \"\\n\\n\".join([chapter_1, chapter_2, chapter_3, chapter_4])\n", - " \n", - " return full_story\n", - "**Title: The Journey of One**\n", - "\n", - "In a land far beyond imagination, where numbers were not just symbols but beings with feelings and desires, there was a little number known as One. Though often underestimated, One had a dream larger than any universe: to find its true purpose.\n", - "\n", - "One was simple, yet unique. It started each day by enjoying the sunrise, counting the seconds in silent appreciation of the continuum of time. Yet, in the grand tapestry of numbers, One felt ordinary and unnoticed, especially among the grandiosity of larger numbers like Millions and Billions, which often boasted about their size and importance.\n", - "\n", - "One day, O came across Zero, a soft-spoken and kind companion, often seen lingering in the shadows of others. \"Why do you look so glum, One?\" asked Zero as they both watched the twinkling stars above.\n", - "\n", - "\"I feel small in a world full of giants. What significance do I hold when everyone seems to multiply and magnify everything far beyond my own capacity?\"\n", - "\n", - "Zero smiled softly. \"You must explore, dear One. For you might be small, but with the right touch, you can change everything. You are the beginning of dreams, the spark that starts a continuum.\"\n", - "\n", - "Taking this advice to heart, One embarked on a journey to discover its true power and potential. As it wandered through the Land of Mathematics, it met Addition, the kind-hearted magician, who taught One how it could transform nothing into something, just by joining in a dance.\n", - "\n", - "With Multiplication, One learned coordination and rhythm, expanding its influence exponentially with a simple step forward. There were days spent in the Company of Fractions, shrinking itself to explore the depth of intricacy plus seeing life from a new perspective.\n", - "\n", - "Finally, it found itself near the great figure of Unity, where all numbers whether large or small, participated in harmony. Here, One discovered its greatest potential—to bring completeness. When used wisely, One could complete a perfect circle or spell disaster if miscalculated.\n", - "\n", - "In its quest, One realized its strength was simplicity itself. As small as it was, it was the foundation upon which countless worlds depended. Without One, there was nothing to start; no Number Line, no Life Progression.\n", - "\n", - "And so, One returned to its place in the universe, no longer ordinary but extraordinary in its ability to bring beginnings.\n", - "\n", - "Thus, the moral of the story: No matter how small or insignificant you feel, remember that you have the power to change everything. You are the first step in your journey and those of others. Embrace your role and start with conviction, for you are One. \n", - "\n", - "And sometimes, that's all you need to be remarkable.\n", - "\n", - "Once upon a time in the quaint village of Digiton, nestled in the Valley of Numerals, lived the number 2. In this village, each number had their unique talents and ways to contribute to the community. Number 2 was known for its ability to find wonderful pairings and create harmony.\n", - "\n", - "It was a bright, sunny morning when 2 decided it was time to plan the grand Numerals Gala, an event celebrated by all numbers from 1 to 9. This year's theme was \"Unity in Pairs,\" and 2 took the responsibility seriously.\n", - "\n", - "With a checklist in hand, 2 began to organize the event. First, 2 visited its oldest friend, the number 1. \"Would you be one half of a winning pair, dear friend?\" 2 asked. \"Of course,\" replied 1, \"together we make the perfect pair of Unity, everyone knows!\"\n", - "\n", - "Next, 2 approached the number 3. Though sometimes perceived as a little off-kilter, 3 was eager to join and suggested pairing with 4 to symbolize growth and progression: 3 plus 4 always added up to 7—a lucky number for all.\n", - "\n", - "Eager to ensure everyone was included, 2 made a special stop at number 5's cheerful blue cottage. \"5, would you create a bridge with me?\" 2 proposed. \"Together we form \"7\", the lucky charm—how can I resist?\" giggled 5.\n", - "\n", - "Day by day, the excitement in Digiton grew. Numbers periodically gathered in the square to rehearse their speeches and musical acts. Finally, the day of the Gala arrived, and pairs paraded on stage, highlighting unity through their performances. The pairing of 6 and 7 showcased a dance of luck and prosperity, while 8 and 9 painted visions of a dreamy future.\n", - "\n", - "As everyone settled down for the final speech, number 2 took the stage, its heart full of joy. \"Dear friends,\" 2 began, \"thank you for showing us the beauty of partnership. Alone, each of us is a number, but together, we build the world. Let us remember that two is a bond that shows love, loyalty, and peace.\"\n", - "\n", - "With a warm round of applause, the Gala concluded, but in their hearts, every number knew that it was 2's thoughtful pairing that showed them the profound harmony within.\n", - "\n", - "And so, 2's legacy in Digiton was etched as a gentle, powerful reminder that the most meaningful journeys are those taken with another by your side. Such was the wisdom of number 2.\n", - "\n", - "Once upon a time, nestled in the quiet and serene landscape of Numerland, there was a unique and charismatic number named \"Three.\" Unlike the other numbers, Three was proudly quirky and adventurous. Sporting three shining points, he dazzled with a triangular shape that made him quite distinctive among his peers.\n", - "\n", - "Three lived in the bustling community of Tallytown, a place where numbers came together to form equations, solve problems, and have numerical debates. But Three often felt that Tallytown was too caught up in linear thinking. He liked to think outside the box—or pyramid, in his case.\n", - "\n", - "One sunny day, Three decided to embark on an adventure across the wide fields of positivity. His first stop was Addition Avenue, a lively street where numbers piled atop each other, eagerly building bridges to larger sums. While there, Three met other numbers like Six and Nine, who greeted him warmly.\n", - "\n", - "“Why travel, Three?” asked Six.\n", - "\n", - "\"I'm seeking something more,\" Three replied. \"I feel like there's a whole world of meritorious multiplicities and radiant reciprocals waiting for me!\"\n", - "\n", - "With a friendly nod, Three continued on his journey. He navigated through Subtraction Square, where he learned to appreciate simplicity. As he passed through, Two’s counsel resonated: “Sometimes less is more, Three.”\n", - "\n", - "Eventually, Three found himself at the multipliers' meadow, a wide expanse where numbers did cartwheels, creating exponential wonders. It was here he met Zero, who diffidently warned, \"Multiply with me, and I'll vanish you into nothingness!\"\n", - "\n", - "Three chuckled at the paradox and moved on. He rolled over to Division Dale, where he admired the symmetry of parts and ratios. Three realized he was not just a number but a part of something truly wondrous.\n", - "\n", - "Finally, gazing at the starry skyscape of Infinity Lane, Three discovered his true potential—he was a constant, reliable factor that held significance beyond simple numerical value. Each point of his triangular form seemed to twinkle with this newfound wisdom.\n", - "\n", - "As he made his way back home to Tallytown, Three felt renewed, armed with appreciation for his uniqueness and the harmony between all numbers. He returned not just as Three, but as the representation of balance, creativity, and the beautiful geometric world from which he drew his strength.\n", - "\n", - "And so, in the land of Numerland, Three lived happily, not just a simple integer, but a remarkable journey in and of itself—a point of convergence in a universe of endless possibilities.\n", - "\n", - "**The Tale of Four Friends**\n", - "\n", - "Once upon a time, in the cozy town of Little Numbers, there dwelt a modest fellow known simply as \"4.\" Though he appeared ordinary, 4 was actually quite special. He had three devoted friends: 1, 2, and 3. Together, they formed a dynamic quartet of remarkable adventures.\n", - "\n", - "One bright spring morning, they embarked on a journey to solve the mystery of the Lost Sequence. It was said that the sequence held the secret to solving any mathematical problem, and possessing it would mean endless possibilities.\n", - "\n", - "4, ever confident in his stability, led the group with enthusiasm. \"We can decipher any riddle with our unity,\" he declared, his square-shaped stature conveying authority.\n", - "\n", - "Their first challenge arrived at the Great Divide Canyon, a vast gap that seemed insurmountable. \"Fear not!\" said 2, offering help with her talent for pairing. She balanced 1 on her left and 3 on her right. Effortlessly, they formed a bridge sturdy enough for 4 to cross, leading them all safely to the other side.\n", - "\n", - "The team soon reached the Valley of Equations, where intricate puzzles befuddled passersby. With 4's knack for balance and proportion, they made short work of the conundrums. 1's simplicity, combined with 3's creative approach, solved complex equations, while 2's knack for harmonizing detected patterns invisible to others.\n", - "\n", - "As they journeyed deeper, they encountered the enigma known as the Paradox Terrain. Here, problems that seemed unsolvable loomed ominously. \"Let us remember,\" 4 reminded them, \"that solutions are often nearer than they appear.\"\n", - "\n", - "With a fresh perspective, 3 noticed a pattern: each unsolvable problem required going back to basic principles. By retracing steps, simplifying assumptions, and adding unique insights, they cracked the paradox.\n", - "\n", - "At last, the quartet arrived at the Chamber of the Lost Sequence, where wisdom awaited them. The mystical sequence unveiled itself, revealing the elegance of mathematical harmony, in which each number played a crucial role.\n", - "\n", - "Embracing the sequence, the friends returned to Little Numbers, wiser and more united than ever. Thus, in the camaraderie of 4 and his friends, the town learned a timeless lesson: the greatest strength comes not from singular achievement, but from the harmony of collective unity.\n", - "\n", - "And so, they lived happily and mathematically ever after.\n", - "\n" - ] - } - ], - "source": [ - "# Sub-templates for different story styles\n", - "@Template.define\n", - "def write_chapter(chapter_number: int, chapter_name: str) -> str:\n", - " \"\"\"Write a short story about {chapter_number}. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "@Template.define\n", - "def judge_chapter(story_so_far: str, chapter_number: int) -> bool:\n", - " \"\"\"Decide if the new chapter is coherence with the story so far. Do not use any tools.\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "# Main orchestrator template - has access to sub-templates\n", - "@Template.define\n", - "def write_multi_chapter_story(style: Literal[\"moral\", \"funny\"]) -> Callable[[str], str]:\n", - " \"\"\"Generate a function that writes a story in style: {style} about the given topic.\n", - "\n", - " The program can use helper functions defined elsewhere (DO NOT REDEFINE THEM):\n", - " - write_chapter(chapter_number: int, chapter_name: str) -> str\n", - " - judge_chapter(story_so_far: str, chapter_number: int) -> bool\"\"\"\n", - " raise NotHandled\n", - "\n", - "\n", - "# Verify sub-templates are captured in write_story's lexical context\n", - "print(\"Sub-templates available to write_story:\", write_multi_chapter_story.tools.keys())\n", - "\n", - "with (\n", - " handler(RetryLLMHandler(num_retries=3)),\n", - " handler(provider),\n", - " handler(UnsafeEvalProvider()),\n", - "):\n", - " print(\"=== Story with moral ===\")\n", - " function_that_writes_story = write_multi_chapter_story(\"moral\")\n", - " print(inspect.getsource(function_that_writes_story))\n", - " print(function_that_writes_story(\"a curious cat\"))\n", - " print()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Error: Error decoding response: 1 validation error for Response\n", + "value.score\n", + " score must be 1–5, got 9 [type=invalid_score, input_value=9, input_type=int]. Please provide a valid response and try again.\n", + "Score: 5/5\n", + "Explanation: Die Hard is widely acclaimed as one of the best action films of all time and earns a perfect score of 5 out of 5. Its success is attributed to a gripping storyline, memorable performances, particularly by Bruce Willis as John McClane, and its innovative approach to action sequences. Its mix of humor, suspense, and holiday-themed backdrop makes it a perennial favorite, cementing its status as a cultural icon.\n" + ] } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" + ], + "source": [ + "@pydantic.dataclasses.dataclass\n", + "class Rating:\n", + " score: int\n", + " explanation: str\n", + "\n", + " @field_validator(\"score\")\n", + " @classmethod\n", + " def check_score(cls, v):\n", + " if v < 1 or v > 5:\n", + " raise PydanticCustomError(\n", + " \"invalid_score\",\n", + " \"score must be 1–5, got {v}\",\n", + " {\"v\": v},\n", + " )\n", + " return v\n", + "\n", + " @field_validator(\"explanation\")\n", + " @classmethod\n", + " def check_explanation_contains_score(cls, v, info):\n", + " score = info.data.get(\"score\", None)\n", + " if score is not None and str(score) not in v:\n", + " raise PydanticCustomError(\n", + " \"invalid_explanation\",\n", + " \"explanation must mention the score {score}, got '{explanation}'\",\n", + " {\"score\": score, \"explanation\": v},\n", + " )\n", + " return v\n", + "\n", + "\n", + "@Template.define\n", + "def give_rating_for_movie(movie_name: str) -> Rating:\n", + " \"\"\"Give a rating for {movie_name}. The explanation MUST include the numeric score. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "with handler(provider):\n", + " try:\n", + " rating = give_rating_for_movie(\"Die Hard\")\n", + " except Exception as e:\n", + " print(f\"Error: {e}\")\n", + "\n", + "with handler(provider), handler(RetryLLMHandler(num_retries=3)):\n", + " rating = give_rating_for_movie(\"Die Hard\")\n", + " print(f\"Score: {rating.score}/5\")\n", + " print(f\"Explanation: {rating.explanation}\")" + ] + }, + { + "cell_type": "markdown", + "id": "aec0632c", + "metadata": {}, + "source": [ + "## Generating higher-order functions\n", + "Finally, we can generate higher-order functions that can call templates as well:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d02bc67", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sub-templates available to write_story: dict_keys(['describe_image', 'draw_simple_icon', 'limerick', 'haiku_no_cache', 'primes', 'count_char', 'cities', 'weather', 'vacation', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny', 'write_story', 'unstable_service', 'fetch_data', 'give_rating_for_movie', 'write_chapter', 'judge_chapter'])\n", + "=== Story with moral ===\n", + "def create_moral_story(topic: str) -> str:\n", + " # Start with an introduction and establish the story theme\n", + " chapter_1 = write_chapter(1, f\"Introduction to {topic}\")\n", + " \n", + " # Develop the plot with a challenge or situation related to the topic\n", + " chapter_2 = write_chapter(2, f\"The Challenge of {topic}\")\n", + " \n", + " # Introduce a turning point or decision-making moment involving the topic\n", + " chapter_3 = write_chapter(3, f\"Decisions and Consequences of {topic}\")\n", + " \n", + " # Conclusion wrapping up the story and highlighting the moral\n", + " chapter_4 = write_chapter(4, f\"Moral and Lessons of {topic}\")\n", + " \n", + " # Combine all chapters into one coherent story\n", + " full_story = \"\\n\\n\".join([chapter_1, chapter_2, chapter_3, chapter_4])\n", + " \n", + " return full_story\n", + "**Title: The Journey of One**\n", + "\n", + "In a land far beyond imagination, where numbers were not just symbols but beings with feelings and desires, there was a little number known as One. Though often underestimated, One had a dream larger than any universe: to find its true purpose.\n", + "\n", + "One was simple, yet unique. It started each day by enjoying the sunrise, counting the seconds in silent appreciation of the continuum of time. Yet, in the grand tapestry of numbers, One felt ordinary and unnoticed, especially among the grandiosity of larger numbers like Millions and Billions, which often boasted about their size and importance.\n", + "\n", + "One day, O came across Zero, a soft-spoken and kind companion, often seen lingering in the shadows of others. \"Why do you look so glum, One?\" asked Zero as they both watched the twinkling stars above.\n", + "\n", + "\"I feel small in a world full of giants. What significance do I hold when everyone seems to multiply and magnify everything far beyond my own capacity?\"\n", + "\n", + "Zero smiled softly. \"You must explore, dear One. For you might be small, but with the right touch, you can change everything. You are the beginning of dreams, the spark that starts a continuum.\"\n", + "\n", + "Taking this advice to heart, One embarked on a journey to discover its true power and potential. As it wandered through the Land of Mathematics, it met Addition, the kind-hearted magician, who taught One how it could transform nothing into something, just by joining in a dance.\n", + "\n", + "With Multiplication, One learned coordination and rhythm, expanding its influence exponentially with a simple step forward. There were days spent in the Company of Fractions, shrinking itself to explore the depth of intricacy plus seeing life from a new perspective.\n", + "\n", + "Finally, it found itself near the great figure of Unity, where all numbers whether large or small, participated in harmony. Here, One discovered its greatest potential—to bring completeness. When used wisely, One could complete a perfect circle or spell disaster if miscalculated.\n", + "\n", + "In its quest, One realized its strength was simplicity itself. As small as it was, it was the foundation upon which countless worlds depended. Without One, there was nothing to start; no Number Line, no Life Progression.\n", + "\n", + "And so, One returned to its place in the universe, no longer ordinary but extraordinary in its ability to bring beginnings.\n", + "\n", + "Thus, the moral of the story: No matter how small or insignificant you feel, remember that you have the power to change everything. You are the first step in your journey and those of others. Embrace your role and start with conviction, for you are One. \n", + "\n", + "And sometimes, that's all you need to be remarkable.\n", + "\n", + "Once upon a time in the quaint village of Digiton, nestled in the Valley of Numerals, lived the number 2. In this village, each number had their unique talents and ways to contribute to the community. Number 2 was known for its ability to find wonderful pairings and create harmony.\n", + "\n", + "It was a bright, sunny morning when 2 decided it was time to plan the grand Numerals Gala, an event celebrated by all numbers from 1 to 9. This year's theme was \"Unity in Pairs,\" and 2 took the responsibility seriously.\n", + "\n", + "With a checklist in hand, 2 began to organize the event. First, 2 visited its oldest friend, the number 1. \"Would you be one half of a winning pair, dear friend?\" 2 asked. \"Of course,\" replied 1, \"together we make the perfect pair of Unity, everyone knows!\"\n", + "\n", + "Next, 2 approached the number 3. Though sometimes perceived as a little off-kilter, 3 was eager to join and suggested pairing with 4 to symbolize growth and progression: 3 plus 4 always added up to 7—a lucky number for all.\n", + "\n", + "Eager to ensure everyone was included, 2 made a special stop at number 5's cheerful blue cottage. \"5, would you create a bridge with me?\" 2 proposed. \"Together we form \"7\", the lucky charm—how can I resist?\" giggled 5.\n", + "\n", + "Day by day, the excitement in Digiton grew. Numbers periodically gathered in the square to rehearse their speeches and musical acts. Finally, the day of the Gala arrived, and pairs paraded on stage, highlighting unity through their performances. The pairing of 6 and 7 showcased a dance of luck and prosperity, while 8 and 9 painted visions of a dreamy future.\n", + "\n", + "As everyone settled down for the final speech, number 2 took the stage, its heart full of joy. \"Dear friends,\" 2 began, \"thank you for showing us the beauty of partnership. Alone, each of us is a number, but together, we build the world. Let us remember that two is a bond that shows love, loyalty, and peace.\"\n", + "\n", + "With a warm round of applause, the Gala concluded, but in their hearts, every number knew that it was 2's thoughtful pairing that showed them the profound harmony within.\n", + "\n", + "And so, 2's legacy in Digiton was etched as a gentle, powerful reminder that the most meaningful journeys are those taken with another by your side. Such was the wisdom of number 2.\n", + "\n", + "Once upon a time, nestled in the quiet and serene landscape of Numerland, there was a unique and charismatic number named \"Three.\" Unlike the other numbers, Three was proudly quirky and adventurous. Sporting three shining points, he dazzled with a triangular shape that made him quite distinctive among his peers.\n", + "\n", + "Three lived in the bustling community of Tallytown, a place where numbers came together to form equations, solve problems, and have numerical debates. But Three often felt that Tallytown was too caught up in linear thinking. He liked to think outside the box—or pyramid, in his case.\n", + "\n", + "One sunny day, Three decided to embark on an adventure across the wide fields of positivity. His first stop was Addition Avenue, a lively street where numbers piled atop each other, eagerly building bridges to larger sums. While there, Three met other numbers like Six and Nine, who greeted him warmly.\n", + "\n", + "“Why travel, Three?” asked Six.\n", + "\n", + "\"I'm seeking something more,\" Three replied. \"I feel like there's a whole world of meritorious multiplicities and radiant reciprocals waiting for me!\"\n", + "\n", + "With a friendly nod, Three continued on his journey. He navigated through Subtraction Square, where he learned to appreciate simplicity. As he passed through, Two’s counsel resonated: “Sometimes less is more, Three.”\n", + "\n", + "Eventually, Three found himself at the multipliers' meadow, a wide expanse where numbers did cartwheels, creating exponential wonders. It was here he met Zero, who diffidently warned, \"Multiply with me, and I'll vanish you into nothingness!\"\n", + "\n", + "Three chuckled at the paradox and moved on. He rolled over to Division Dale, where he admired the symmetry of parts and ratios. Three realized he was not just a number but a part of something truly wondrous.\n", + "\n", + "Finally, gazing at the starry skyscape of Infinity Lane, Three discovered his true potential—he was a constant, reliable factor that held significance beyond simple numerical value. Each point of his triangular form seemed to twinkle with this newfound wisdom.\n", + "\n", + "As he made his way back home to Tallytown, Three felt renewed, armed with appreciation for his uniqueness and the harmony between all numbers. He returned not just as Three, but as the representation of balance, creativity, and the beautiful geometric world from which he drew his strength.\n", + "\n", + "And so, in the land of Numerland, Three lived happily, not just a simple integer, but a remarkable journey in and of itself—a point of convergence in a universe of endless possibilities.\n", + "\n", + "**The Tale of Four Friends**\n", + "\n", + "Once upon a time, in the cozy town of Little Numbers, there dwelt a modest fellow known simply as \"4.\" Though he appeared ordinary, 4 was actually quite special. He had three devoted friends: 1, 2, and 3. Together, they formed a dynamic quartet of remarkable adventures.\n", + "\n", + "One bright spring morning, they embarked on a journey to solve the mystery of the Lost Sequence. It was said that the sequence held the secret to solving any mathematical problem, and possessing it would mean endless possibilities.\n", + "\n", + "4, ever confident in his stability, led the group with enthusiasm. \"We can decipher any riddle with our unity,\" he declared, his square-shaped stature conveying authority.\n", + "\n", + "Their first challenge arrived at the Great Divide Canyon, a vast gap that seemed insurmountable. \"Fear not!\" said 2, offering help with her talent for pairing. She balanced 1 on her left and 3 on her right. Effortlessly, they formed a bridge sturdy enough for 4 to cross, leading them all safely to the other side.\n", + "\n", + "The team soon reached the Valley of Equations, where intricate puzzles befuddled passersby. With 4's knack for balance and proportion, they made short work of the conundrums. 1's simplicity, combined with 3's creative approach, solved complex equations, while 2's knack for harmonizing detected patterns invisible to others.\n", + "\n", + "As they journeyed deeper, they encountered the enigma known as the Paradox Terrain. Here, problems that seemed unsolvable loomed ominously. \"Let us remember,\" 4 reminded them, \"that solutions are often nearer than they appear.\"\n", + "\n", + "With a fresh perspective, 3 noticed a pattern: each unsolvable problem required going back to basic principles. By retracing steps, simplifying assumptions, and adding unique insights, they cracked the paradox.\n", + "\n", + "At last, the quartet arrived at the Chamber of the Lost Sequence, where wisdom awaited them. The mystical sequence unveiled itself, revealing the elegance of mathematical harmony, in which each number played a crucial role.\n", + "\n", + "Embracing the sequence, the friends returned to Little Numbers, wiser and more united than ever. Thus, in the camaraderie of 4 and his friends, the town learned a timeless lesson: the greatest strength comes not from singular achievement, but from the harmony of collective unity.\n", + "\n", + "And so, they lived happily and mathematically ever after.\n", + "\n" + ] } + ], + "source": [ + "# Sub-templates for different story styles\n", + "@Template.define\n", + "def write_chapter(chapter_number: int, chapter_name: str) -> str:\n", + " \"\"\"Write a short story about {chapter_number}. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "@Template.define\n", + "def judge_chapter(story_so_far: str, chapter_number: int) -> bool:\n", + " \"\"\"Decide if the new chapter is coherence with the story so far. Do not use any tools.\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "# Main orchestrator template - has access to sub-templates\n", + "@Template.define\n", + "def write_multi_chapter_story(style: Literal[\"moral\", \"funny\"]) -> Callable[[str], str]:\n", + " \"\"\"Generate a function that writes a story in style: {style} about the given topic.\n", + "\n", + " The program can use helper functions defined elsewhere (DO NOT REDEFINE THEM):\n", + " - write_chapter(chapter_number: int, chapter_name: str) -> str\n", + " - judge_chapter(story_so_far: str, chapter_number: int) -> bool\"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "# Verify sub-templates are captured in write_story's lexical context\n", + "print(\"Sub-templates available to write_story:\", write_multi_chapter_story.tools.keys())\n", + "\n", + "with (\n", + " handler(RetryLLMHandler(num_retries=3)),\n", + " handler(provider),\n", + " handler(UnsafeEvalProvider()),\n", + "):\n", + " print(\"=== Story with moral ===\")\n", + " function_that_writes_story = write_multi_chapter_story(\"moral\")\n", + " print(inspect.getsource(function_that_writes_story))\n", + " print(function_that_writes_story(\"a curious cat\"))\n", + " print()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 5 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/effectful/handlers/llm/evaluation.py b/effectful/handlers/llm/evaluation.py index 90fc4e52..01c2f241 100644 --- a/effectful/handlers/llm/evaluation.py +++ b/effectful/handlers/llm/evaluation.py @@ -96,21 +96,6 @@ def exec( ) -@defop -def doctest_check(obj: object, ctx: typing.Mapping[str, Any]) -> None: - """ - Run doctests for an object under the given context. - - obj: The object whose doctests should be tested. - ctx: The namespace used to run doctest examples. - - Returns None, raises TypeError on doctest failure. - """ - raise NotImplementedError( - "An eval provider must be installed in order to run doctests." - ) - - @defop def test(obj: object, ctx: typing.Mapping[str, Any]) -> None: """ @@ -705,10 +690,6 @@ def type_check( ) -> None: mypy_type_check(module, ctx, expected_params, expected_return) - @implements(doctest_check) - def doctest_check(self, obj: object, ctx: typing.Mapping[str, Any]) -> None: - _run_doctests(obj, ctx) - @implements(parse) def parse(self, source: str, filename: str) -> ast.Module: # Cache source under `filename` so inspect.getsource() can retrieve it later. @@ -769,10 +750,6 @@ def type_check( ) -> None: mypy_type_check(module, ctx, expected_params, expected_return) - @implements(doctest_check) - def doctest_check(self, obj: object, ctx: typing.Mapping[str, Any]) -> None: - _run_doctests(obj, ctx) - @implements(parse) def parse(self, source: str, filename: str) -> ast.Module: # Keep inspect.getsource() working for dynamically-defined objects. From 24c6dfb36e295eec022d26340637a0df9984a383 Mon Sep 17 00:00:00 2001 From: datvo06 Date: Fri, 6 Feb 2026 18:03:31 -0500 Subject: [PATCH 03/18] Adding test --- tests/test_handlers_llm_doctest.py | 92 ++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 tests/test_handlers_llm_doctest.py diff --git a/tests/test_handlers_llm_doctest.py b/tests/test_handlers_llm_doctest.py new file mode 100644 index 00000000..cdaea55a --- /dev/null +++ b/tests/test_handlers_llm_doctest.py @@ -0,0 +1,92 @@ +import os +from collections.abc import Callable + +import pytest + +from effectful.handlers.llm import Template +from effectful.handlers.llm.completions import LiteLLMProvider, ResultDecodingError +from effectful.handlers.llm.encoding import Encodable, SynthesizedFunction +from effectful.handlers.llm.evaluation import DoctestHandler, UnsafeEvalProvider +from effectful.ops.semantics import NotHandled, handler + +HAS_OPENAI_KEY = "OPENAI_API_KEY" in os.environ and os.environ["OPENAI_API_KEY"] +requires_openai = pytest.mark.skipif( + not HAS_OPENAI_KEY, reason="OPENAI_API_KEY environment variable not set" +) + + +@Template.define +def synthesize_counter_with_doctest(char: str) -> Callable[[str], int]: + """Generate a Python function named count_char that counts occurrences of the character '{char}' + in a given input string. + + The function should be case-sensitive. + + Examples: + >>> count_char("banana") + 4 + """ + raise NotHandled + + +@Template.define +def synthesize_inner_with_doctest(char: str) -> Callable[[str], int]: + """Generate a Python function named count_char that counts occurrences of the character '{char}' + in a given input string. + + The function should be case-sensitive. + + Examples: + >>> count_char("orange") + 3 + """ + raise NotHandled + + +@Template.define +def synthesize_outer(char: str) -> Callable[[str], int]: + """Use the synthesize_inner_with_doctest tool to produce the function and return it. + Do not implement the function yourself. + """ + raise NotHandled + + +class TestDoctestExecution: + """Tests for doctest execution during callable synthesis.""" + + def test_decode_runs_doctest(self): + encodable = Encodable.define(Callable[[str], int], {}) + func_source = SynthesizedFunction( + module_code="def count_char(input_string: str) -> int:\n" + " return input_string.count('a')" + ) + doctest_handler = DoctestHandler() + doctest_handler._doctest_stack.append(">>> count_char('banana')\n4\n") + with ( + handler(UnsafeEvalProvider()), + handler(doctest_handler), + ): + with pytest.raises(TypeError, match="doctest failed"): + encodable.decode(func_source) + + @requires_openai + def test_template_doctest_runs(self): + provider = LiteLLMProvider(model="gpt-4o-mini") + with ( + handler(provider), + handler(UnsafeEvalProvider()), + handler(DoctestHandler()), + ): + with pytest.raises(ResultDecodingError, match="doctest failed"): + synthesize_counter_with_doctest("a") + + @requires_openai + def test_nested_synthesis_doctest_runs(self): + provider = LiteLLMProvider(model="gpt-4o-mini") + with ( + handler(provider), + handler(UnsafeEvalProvider()), + handler(DoctestHandler()), + ): + with pytest.raises(ResultDecodingError, match="doctest failed"): + synthesize_outer("o") From 7e09561fd1bd7e39f155d020125876e748b0431a Mon Sep 17 00:00:00 2001 From: datvo06 Date: Mon, 9 Feb 2026 10:37:37 -0500 Subject: [PATCH 04/18] Remove unnecessary DocTestHandler --- effectful/handlers/llm/evaluation.py | 8 ++++---- tests/test_handlers_llm_encoding.py | 26 -------------------------- tests/test_handlers_llm_provider.py | 6 ------ 3 files changed, 4 insertions(+), 36 deletions(-) diff --git a/effectful/handlers/llm/evaluation.py b/effectful/handlers/llm/evaluation.py index 9892def6..087ba92d 100644 --- a/effectful/handlers/llm/evaluation.py +++ b/effectful/handlers/llm/evaluation.py @@ -1,8 +1,8 @@ import ast import builtins import collections.abc -import doctest import copy +import doctest import inspect import keyword import linecache @@ -107,10 +107,10 @@ def test(obj: object, ctx: typing.Mapping[str, Any]) -> None: obj: The synthesized module object. ctx: The namespace used to run doctest examples. + + No-op by default; install a DoctestHandler to actually run doctests. """ - raise NotImplementedError( - "A doctest handler must be installed in order to run doctests." - ) + pass # Type checking implementation diff --git a/tests/test_handlers_llm_encoding.py b/tests/test_handlers_llm_encoding.py index 8daea4ed..a542ff98 100644 --- a/tests/test_handlers_llm_encoding.py +++ b/tests/test_handlers_llm_encoding.py @@ -11,7 +11,6 @@ from effectful.handlers.llm.encoding import Encodable, SynthesizedFunction from effectful.handlers.llm.evaluation import ( - DoctestHandler, RestrictedEvalProvider, UnsafeEvalProvider, ) @@ -753,7 +752,6 @@ def add(a: int, b: int) -> int: with ( handler(eval_provider), - handler(DoctestHandler()), ): decoded = encodable.decode(encoded) assert callable(decoded) @@ -771,7 +769,6 @@ def test_decode_with_ellipsis_params(self, eval_provider): ) with ( handler(eval_provider), - handler(DoctestHandler()), ): decoded = encodable.decode(func_source) assert callable(decoded) @@ -788,7 +785,6 @@ def test_decode_with_env(self, eval_provider): with ( handler(eval_provider), - handler(DoctestHandler()), ): decoded = encodable.decode(source) assert callable(decoded) @@ -831,7 +827,6 @@ def test_decode_no_function_at_end_raises(self, eval_provider): ): with ( handler(eval_provider), - handler(DoctestHandler()), ): encodable.decode(source) @@ -848,7 +843,6 @@ def bar() -> int: ) with ( handler(eval_provider), - handler(DoctestHandler()), ): decoded = encodable.decode(source) assert callable(decoded) @@ -873,7 +867,6 @@ def greet(self): ): with ( handler(eval_provider), - handler(DoctestHandler()), ): encodable.decode(source) @@ -885,7 +878,6 @@ def greet(name: str) -> str: encodable = Encodable.define(Callable[[str], str], {}) with ( handler(eval_provider), - handler(DoctestHandler()), ): encoded = encodable.encode(greet) decoded = encodable.decode(encoded) @@ -928,7 +920,6 @@ def test_decode_validates_last_statement(self, eval_provider): ): with ( handler(eval_provider), - handler(DoctestHandler()), ): encodable.decode(source) @@ -951,7 +942,6 @@ def test_typed_callable_validates_param_count(self, eval_provider): with pytest.raises(ValueError, match="expected function with 2 parameters"): with ( handler(eval_provider), - handler(DoctestHandler()), ): encodable.decode(source) @@ -967,7 +957,6 @@ def test_typed_callable_validates_return_type(self, eval_provider): with pytest.raises(TypeError, match="Incompatible types in assignment"): with ( handler(eval_provider), - handler(DoctestHandler()), ): encodable.decode(source) @@ -986,7 +975,6 @@ def test_typed_callable_requires_return_annotation(self, eval_provider): ): with ( handler(eval_provider), - handler(DoctestHandler()), ): encodable.decode(source) @@ -1001,7 +989,6 @@ def test_typed_callable_accepts_correct_signature(self, eval_provider): ) with ( handler(eval_provider), - handler(DoctestHandler()), ): result = encodable.decode(source) assert callable(result) @@ -1044,7 +1031,6 @@ def test_ellipsis_callable_skips_param_validation(self, eval_provider): ) with ( handler(eval_provider), - handler(DoctestHandler()), ): result = encodable.decode(source) assert callable(result) @@ -1087,7 +1073,6 @@ def test_validates_param_count_via_ast(self, eval_provider): with pytest.raises(ValueError, match="expected function with 2 parameters"): with ( handler(eval_provider), - handler(DoctestHandler()), ): encodable.decode(source) @@ -1104,7 +1089,6 @@ def test_validates_param_count_zero_params(self, eval_provider): with pytest.raises(ValueError, match="expected function with 0 parameters"): with ( handler(eval_provider), - handler(DoctestHandler()), ): encodable.decode(source) @@ -1119,7 +1103,6 @@ def test_validates_accepts_zero_params(self, eval_provider): ) with ( handler(eval_provider), - handler(DoctestHandler()), ): result = encodable.decode(source) assert callable(result) @@ -1146,7 +1129,6 @@ def test_ellipsis_callable_validates_return_type(self, eval_provider): with pytest.raises(TypeError, match="Incompatible types in assignment"): with ( handler(eval_provider), - handler(DoctestHandler()), ): encodable.decode(source) @@ -1160,7 +1142,6 @@ def test_callable_with_single_param(self, eval_provider): ) with ( handler(eval_provider), - handler(DoctestHandler()), ): result = encodable.decode(source) assert callable(result) @@ -1176,7 +1157,6 @@ def test_callable_with_many_params(self, eval_provider): ) with ( handler(eval_provider), - handler(DoctestHandler()), ): result = encodable.decode(source) assert callable(result) @@ -1192,7 +1172,6 @@ def test_callable_with_bool_return(self, eval_provider): ) with ( handler(eval_provider), - handler(DoctestHandler()), ): result = encodable.decode(source) assert callable(result) @@ -1233,7 +1212,6 @@ def test_restricted_blocks_private_attribute_access(self): with pytest.raises(Exception): # Could be NameError or AttributeError with ( handler(RestrictedEvalProvider()), - handler(DoctestHandler()), ): fn = encodable.decode(source) fn("test") @@ -1277,7 +1255,6 @@ def test_builtins_in_env_does_not_bypass_security(self): with pytest.raises(Exception): # Could be NameError, ValueError, or other with ( handler(RestrictedEvalProvider()), - handler(DoctestHandler()), ): fn = encodable_open.decode(source_open) # If decode succeeded (shouldn't), calling should still fail @@ -1293,7 +1270,6 @@ def test_builtins_in_env_does_not_bypass_security(self): with pytest.raises(Exception): with ( handler(RestrictedEvalProvider()), - handler(DoctestHandler()), ): fn = encodable_import.decode(source_import) fn() @@ -1307,7 +1283,6 @@ def test_builtins_in_env_does_not_bypass_security(self): ) with ( handler(RestrictedEvalProvider()), - handler(DoctestHandler()), ): fn = encodable_safe.decode(source_safe) assert fn(2, 3) == 5, "Safe code should still work" @@ -1321,7 +1296,6 @@ def test_builtins_in_env_does_not_bypass_security(self): with pytest.raises(Exception): with ( handler(RestrictedEvalProvider()), - handler(DoctestHandler()), ): fn = encodable_private.decode(source_private) fn("test") diff --git a/tests/test_handlers_llm_provider.py b/tests/test_handlers_llm_provider.py index 2028396a..60113078 100644 --- a/tests/test_handlers_llm_provider.py +++ b/tests/test_handlers_llm_provider.py @@ -1123,7 +1123,6 @@ def test_synthesize_adder_function(self, request): with ( handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")), handler(UnsafeEvalProvider()), - handler(DoctestHandler()), handler(LimitLLMCallsHandler(max_calls=1)), ): add_func = synthesize_adder() @@ -1140,7 +1139,6 @@ def test_synthesize_string_processor(self, request): with ( handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")), handler(UnsafeEvalProvider()), - handler(DoctestHandler()), handler(LimitLLMCallsHandler(max_calls=1)), ): process_func = synthesize_string_processor() @@ -1157,7 +1155,6 @@ def test_synthesize_counter_with_parameter(self, request): with ( handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")), handler(UnsafeEvalProvider()), - handler(DoctestHandler()), handler(LimitLLMCallsHandler(max_calls=3)), ): count_a = synthesize_counter("a") @@ -1200,7 +1197,6 @@ def test_synthesized_function_roundtrip(self, request): with ( handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")), handler(UnsafeEvalProvider()), - handler(DoctestHandler()), handler(LimitLLMCallsHandler(max_calls=1)), ): # Synthesize a function @@ -1225,7 +1221,6 @@ def test_synthesize_bool_return_type(self, request): with ( handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")), handler(UnsafeEvalProvider()), - handler(DoctestHandler()), handler(LimitLLMCallsHandler(max_calls=1)), ): is_even = synthesize_is_even() @@ -1248,7 +1243,6 @@ def test_synthesize_three_params(self, request): with ( handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")), handler(UnsafeEvalProvider()), - handler(DoctestHandler()), handler(LimitLLMCallsHandler(max_calls=1)), ): multiply_three = synthesize_three_param_func() From 72459dd254fdf4c4c64dacbcd34246b941c33cbd Mon Sep 17 00:00:00 2001 From: datvo06 Date: Mon, 9 Feb 2026 10:44:50 -0500 Subject: [PATCH 05/18] Lint --- docs/source/llm.ipynb | 181 ++++++++++++++---------------------------- 1 file changed, 60 insertions(+), 121 deletions(-) diff --git a/docs/source/llm.ipynb b/docs/source/llm.ipynb index d10a95be..77858caa 100644 --- a/docs/source/llm.ipynb +++ b/docs/source/llm.ipynb @@ -16,22 +16,10 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 1, "id": "5aaf649f", "metadata": {}, - "outputs": [ - { - "ename": "ImportError", - "evalue": "cannot import name 'DoctestHandler' from 'effectful.handlers.llm.evaluation' (/Users/nguyendat/Marc/effectful/effectful/handlers/llm/evaluation.py)", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mImportError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[18]\u001b[39m\u001b[32m, line 22\u001b[39m\n\u001b[32m 17\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01meffectful\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mhandlers\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Template, Tool\n\u001b[32m 18\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01meffectful\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mhandlers\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcompletions\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 19\u001b[39m LiteLLMProvider,\n\u001b[32m 20\u001b[39m RetryLLMHandler,\n\u001b[32m 21\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m22\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01meffectful\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mhandlers\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mevaluation\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m DoctestHandler, UnsafeEvalProvider\n\u001b[32m 23\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01meffectful\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mops\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01msemantics\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m NotHandled, handler\n\u001b[32m 25\u001b[39m provider = LiteLLMProvider()\n", - "\u001b[31mImportError\u001b[39m: cannot import name 'DoctestHandler' from 'effectful.handlers.llm.evaluation' (/Users/nguyendat/Marc/effectful/effectful/handlers/llm/evaluation.py)" - ] - } - ], + "outputs": [], "source": [ "import base64\n", "import dataclasses\n", @@ -111,17 +99,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "In the sea where the silver fish play,\n", - "A salmon once swam by the bay.\n", - "With a splash and a swirl,\n", - "Past the shells it would twirl,\n", - "And vanish at the end of the day.\n", + "There once was a curious fish,\n", + "Who swam with a tail that could swish.\n", + "To the sea it would dart,\n", + "Like a work of fine art,\n", + "In the ocean it made quite a splish!\n", "----------------------------------------\n", - "In the ocean, where fish freely roam,\n", - "A small school called a coral reef home.\n", - "With fins all aglow,\n", - "They dart to and fro,\n", - "Dancing waves as sea breezes comb.\n" + "In the ocean where fishes freely roam,\n", + "A clownfish felt quite at home.\n", + "He said with a glint,\n", + "“I’m orange with a hint,\n", + "But I'm not just a brush in sea's foam!”\n" ] } ], @@ -151,18 +139,18 @@ "output_type": "stream", "text": [ "\n", - "Silent water glides,\n", - "Scales shimmering through currents—\n", - "Nature's art in waves.\n", + "Silent streams below,\n", + "Fish glide and dance through water—\n", + "Nature's quiet grace.\n", "----------------------------------------\n", - "Silent water glides,\n", - "Scales shimmering through currents—\n", - "Nature's art in waves.\n", + "Silent streams below,\n", + "Fish glide and dance through water—\n", + "Nature's quiet grace.\n", "\n" ] }, { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ "/Users/nguyendat/Marc/effectful/.venv/lib/python3.12/site-packages/pydantic/main.py:528: UserWarning: Pydantic serializer warnings:\n", @@ -175,21 +163,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "Silent waters gleam, \n", - "Fish dart in a moonlit stream, \n", - "Nature's quiet dream.\n", + "Silent streams flowing,\n", + "Silver scales shimmer below—\n", + "Fish in quiet dance.\n", "----------------------------------------\n", - "Silent waters dance, \n", - "Scales shimmer in fleeting light, \n", - "Fish glide through soft dreams.\n", + "Gentle fish gliding \n", + "Silent ripples in the stream \n", + "Nature's dance beneath.\n", "\n", - "Fish swim through blue waves, \n", - "Their scales gleam like silver jewels, \n", - "Silent in the deep.\n", + "Silent waters glide,\n", + "Scales shimmer beneath the light—\n", + "Fish dance in the deep.\n", "----------------------------------------\n", "Silent waters flow,\n", - "Fish dart beneath gentle waves—\n", - "Nature's dance below.\n" + "Silver glimmers dance below—\n", + "Fish weave through shadows.\n" ] } ], @@ -272,54 +260,17 @@ "execution_count": null, "id": "c83bbdc0", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "def count_char_a(text: str) -> int:\n", - " \"\"\"\n", - " Counts the occurrences of 'a' in the given string.\n", - "\n", - " Args:\n", - " text (str): The string to search within.\n", - "\n", - " Returns:\n", - " int: The count of 'a' characters in the string.\n", - "\n", - " Examples:\n", - " >>> count_char_a('banana')\n", - " 3\n", - " >>> count_char_a('apple')\n", - " 1\n", - " >>> count_char_a('cherry')\n", - " 0\n", - " \"\"\"\n", - " count = 0\n", - " for char in text:\n", - " if char == 'a':\n", - " count += 1\n", - " return count\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "@Template.define\n", "def count_char(char: str) -> Callable[[str], int]:\n", - " \"\"\"Write a function named count_char which takes a string and counts the occurrances of '{char}'. Do not use any tools.\n", - "\n", - " Examples:\n", - " >>> count_char(\"banana\")\n", - " 3\n", - " >>> count_char(\"cherry\")\n", - " 0\n", - " \"\"\"\n", + " \"\"\"Write a function named count_char which takes a string and counts the occurrances of '{char}'. Do not use any tools.\"\"\"\n", " raise NotHandled\n", "\n", "\n", "# Use UnsafeEvalProvider for simple examples; RestrictedEvalProvider may need extra globals.\n", - "with handler(provider), handler(UnsafeEvalProvider()), handler(DoctestHandler()):\n", + "# DoctestHandler is not required for synthesis -- it is optional.\n", + "with handler(provider), handler(UnsafeEvalProvider()):\n", " count_a = count_char(\"a\")\n", " assert callable(count_a)\n", " assert count_a(\"banana\") == 3\n", @@ -335,7 +286,9 @@ "source": [ "### Doctest Feedback\n", "\n", - "Write doctests in the template docstring for callable synthesis. The docstring is formatted with the call arguments, and the doctests are executed during decoding." + "You can optionally install a `DoctestHandler` to run doctests from the template docstring during callable synthesis.\n", + "Without `DoctestHandler`, synthesis succeeds even if the docstring contains examples that don't match the synthesized function.\n", + "With `DoctestHandler`, the doctests are executed and a `ResultDecodingError` is raised on failure." ] }, { @@ -343,51 +296,37 @@ "execution_count": 7, "id": "793b12a5", "metadata": {}, - "outputs": [ - { - "ename": "ResultDecodingError", - "evalue": "Error decoding response: mypy type check failed:\n:170: \u001b[1m\u001b[31merror:\u001b[m Name \u001b[m\u001b[1m\"count_char\"\u001b[m already defined on line 159 \u001b[m\u001b[33m[no-redef]\u001b[m\n\nimport collections\nimport collections.abc\nimport IPython\nimport IPython.core\nimport IPython.core.alias\nimport IPython.core.application\nimport IPython.core.async_helpers\nimport IPython.core.autocall\nimport IPython.core.builtin_trap\nimport IPython.core.compilerop\nimport IPython.core.completer\nimport IPython.core.completerlib\nimport IPython.core.crashhandler\nimport IPython.core.debugger\nimport IPython.core.debugger_backport\nimport IPython.core.display\nimport IPython.core.display_functions\nimport IPython.core.display_trap\nimport IPython.core.displayhook\nimport IPython.core.displaypub\nimport IPython.core.doctb\nimport IPython.core.error\nimport IPython.core.events\nimport IPython.core.extensions\nimport IPython.core.formatters\nimport IPython.core.getipython\nimport IPython.core.guarded_eval\nimport IPython.core.history\nimport IPython.core.hooks\nimport IPython.core.inputtransformer2\nimport IPython.core.interactiveshell\nimport IPython.core.latex_symbols\nimport IPython.core.logger\nimport IPython.core.macro\nimport IPython.core.magic\nimport IPython.core.magic_arguments\nimport IPython.core.magics\nimport IPython.core.magics.ast_mod\nimport IPython.core.magics.auto\nimport IPython.core.magics.basic\nimport IPython.core.magics.code\nimport IPython.core.magics.config\nimport IPython.core.magics.display\nimport IPython.core.magics.execution\nimport IPython.core.magics.extension\nimport IPython.core.magics.history\nimport IPython.core.magics.logging\nimport IPython.core.magics.namespace\nimport IPython.core.magics.osm\nimport IPython.core.magics.packaging\nimport IPython.core.magics.pylab\nimport IPython.core.magics.script\nimport IPython.core.oinspect\nimport IPython.core.page\nimport IPython.core.payload\nimport IPython.core.prefilter\nimport IPython.core.profiledir\nimport IPython.core.release\nimport IPython.core.shellapp\nimport IPython.core.splitinput\nimport IPython.core.tbtools\nimport IPython.core.tips\nimport IPython.core.ultratb\nimport IPython.core.usage\nimport IPython.display\nimport IPython.extensions\nimport IPython.extensions.storemagic\nimport IPython.external\nimport IPython.external.pickleshare\nimport IPython.lib\nimport IPython.lib.clipboard\nimport IPython.lib.display\nimport IPython.lib.pretty\nimport IPython.paths\nimport IPython.terminal\nimport IPython.terminal.debugger\nimport IPython.terminal.embed\nimport IPython.terminal.interactiveshell\nimport IPython.terminal.ipapp\nimport IPython.terminal.magics\nimport IPython.terminal.prompts\nimport IPython.terminal.pt_inputhooks\nimport IPython.terminal.ptutils\nimport IPython.terminal.shortcuts\nimport IPython.terminal.shortcuts.auto_match\nimport IPython.terminal.shortcuts.auto_suggest\nimport IPython.terminal.shortcuts.filters\nimport IPython.testing\nimport IPython.testing.skipdoctest\nimport IPython.utils\nimport IPython.utils.PyColorize\nimport IPython.utils._process_common\nimport IPython.utils._process_posix\nimport IPython.utils._sysinfo\nimport IPython.utils.capture\nimport IPython.utils.contexts\nimport IPython.utils.data\nimport IPython.utils.decorators\nimport IPython.utils.dir2\nimport IPython.utils.docs\nimport IPython.utils.encoding\nimport IPython.utils.frame\nimport IPython.utils.generics\nimport IPython.utils.importstring\nimport IPython.utils.io\nimport IPython.utils.ipstruct\nimport IPython.utils.module_paths\nimport IPython.utils.openpy\nimport IPython.utils.path\nimport IPython.utils.process\nimport IPython.utils.py3compat\nimport IPython.utils.sentinel\nimport IPython.utils.strdispatch\nimport IPython.utils.sysinfo\nimport IPython.utils.syspathcontext\nimport IPython.utils.terminal\nimport IPython.utils.text\nimport IPython.utils.timing\nimport IPython.utils.tokenutil\nimport IPython.utils.wildcard\nimport collections\nimport collections.abc\nimport effectful\nimport effectful.handlers\nimport effectful.handlers.llm\nimport effectful.handlers.llm.completions\nimport effectful.handlers.llm.encoding\nimport effectful.handlers.llm.evaluation\nimport effectful.handlers.llm.template\nimport effectful.internals\nimport effectful.internals.runtime\nimport effectful.internals.unification\nimport effectful.ops\nimport effectful.ops.semantics\nimport effectful.ops.syntax\nimport effectful.ops.types\nimport pathlib\nIn: collections.abc.MutableSequence[str]\nOut: dict\n_: str\n__: str\n___: str\n__vsc_ipynb_file__: str\n_dh: collections.abc.MutableSequence[pathlib.PosixPath]\n_i: str\n_i1: str\n_i2: str\n_i3: str\n_i4: str\n_i5: str\n_i6: str\n_i7: str\n_ih: collections.abc.MutableSequence[str]\n_ii: str\n_iii: str\n_oh: dict\nchar: str\ncount_a: collections.abc.Callable[[str], int]\ncount_char: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\ncount_char_with_doctest: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\nexit: IPython.core.autocall.ZMQExitAutocall\nhaiku: collections.abc.Callable[[str], str]\nhaiku_no_cache: effectful.ops.types.Operation[[str], str]\nlimerick: effectful.ops.types.Operation[[str], str]\nprimes: effectful.ops.types.Operation[[int], int]\nprovider: effectful.ops.types.Interpretation\nprovider_cached: effectful.ops.types.Interpretation\nquit: IPython.core.autocall.ZMQExitAutocall\n\ndef count_char(s: str) -> int:\n \"\"\"\n Count the occurrences of the character 'a' in the given string.\n\n Args:\n s (str): The string in which to count occurrences of 'a'.\n\n Returns:\n int: The number of times 'a' appears in the string.\n\n Examples:\n >>> count_char('banana')\n 3\n >>> count_char('apple')\n 1\n >>> count_char('cherry')\n 0\n \"\"\"\n return s.count('a')\n_synthesized_check: collections.abc.Callable[[str], int] = count_char. Please provide a valid response and try again.", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mTypeError\u001b[39m Traceback (most recent call last)", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/completions.py:302\u001b[39m, in \u001b[36mcall_assistant\u001b[39m\u001b[34m(tools, response_format, model, **kwargs)\u001b[39m\n\u001b[32m 301\u001b[39m raw_result = response_model.model_validate_json(serialized_result)\n\u001b[32m--> \u001b[39m\u001b[32m302\u001b[39m result = \u001b[43mresponse_format\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mraw_result\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[32m 303\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (pydantic.ValidationError, \u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mSyntaxError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/encoding.py:452\u001b[39m, in \u001b[36mCallableEncodable.decode\u001b[39m\u001b[34m(self, encoded_value)\u001b[39m\n\u001b[32m 451\u001b[39m \u001b[38;5;66;03m# Type-check with mypy; pass original module_code so mypy sees exact source\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m452\u001b[39m \u001b[43mevaluation\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtype_check\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 453\u001b[39m \u001b[43m \u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mctx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexpected_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexpected_return\u001b[49m\n\u001b[32m 454\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 456\u001b[39m \u001b[38;5;66;03m# Compile and execute\u001b[39;00m\n\u001b[32m 457\u001b[39m \u001b[38;5;66;03m# https://docs.python.org/3/library/functions.html#exec\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:494\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 493\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m self_handler \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m494\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mself_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 495\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m args \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(args[\u001b[32m0\u001b[39m], Operation) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m args[\u001b[32m0\u001b[39m].__apply__:\n\u001b[32m 496\u001b[39m \u001b[38;5;66;03m# Prevent infinite recursion when calling self.apply directly\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/internals/runtime.py:70\u001b[39m, in \u001b[36m_set_prompt..bound_body\u001b[39m\u001b[34m(*a, **k)\u001b[39m\n\u001b[32m 69\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler({prompt: handler({prompt: next_cont})(cont)}):\n\u001b[32m---> \u001b[39m\u001b[32m70\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbody\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/internals/runtime.py:56\u001b[39m, in \u001b[36m_save_args.._cont_wrapper\u001b[39m\u001b[34m(*a, **k)\u001b[39m\n\u001b[32m 55\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler({_get_args: \u001b[38;5;28;01mlambda\u001b[39;00m: (a, k)}):\n\u001b[32m---> \u001b[39m\u001b[32m56\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/evaluation.py:628\u001b[39m, in \u001b[36mUnsafeEvalProvider.type_check\u001b[39m\u001b[34m(self, module, ctx, expected_params, expected_return)\u001b[39m\n\u001b[32m 620\u001b[39m \u001b[38;5;129m@implements\u001b[39m(type_check)\n\u001b[32m 621\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mtype_check\u001b[39m(\n\u001b[32m 622\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 626\u001b[39m expected_return: \u001b[38;5;28mtype\u001b[39m,\n\u001b[32m 627\u001b[39m ) -> \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m628\u001b[39m \u001b[43mmypy_type_check\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mctx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexpected_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexpected_return\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/evaluation.py:586\u001b[39m, in \u001b[36mmypy_type_check\u001b[39m\u001b[34m(module, ctx, expected_params, expected_return)\u001b[39m\n\u001b[32m 585\u001b[39m report = (stdout \u001b[38;5;129;01mor\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m) + (stderr \u001b[38;5;129;01mor\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m586\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mmypy type check failed:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mreport\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00msource\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 587\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", - "\u001b[31mTypeError\u001b[39m: mypy type check failed:\n:170: \u001b[1m\u001b[31merror:\u001b[m Name \u001b[m\u001b[1m\"count_char\"\u001b[m already defined on line 159 \u001b[m\u001b[33m[no-redef]\u001b[m\n\nimport collections\nimport collections.abc\nimport IPython\nimport IPython.core\nimport IPython.core.alias\nimport IPython.core.application\nimport IPython.core.async_helpers\nimport IPython.core.autocall\nimport IPython.core.builtin_trap\nimport IPython.core.compilerop\nimport IPython.core.completer\nimport IPython.core.completerlib\nimport IPython.core.crashhandler\nimport IPython.core.debugger\nimport IPython.core.debugger_backport\nimport IPython.core.display\nimport IPython.core.display_functions\nimport IPython.core.display_trap\nimport IPython.core.displayhook\nimport IPython.core.displaypub\nimport IPython.core.doctb\nimport IPython.core.error\nimport IPython.core.events\nimport IPython.core.extensions\nimport IPython.core.formatters\nimport IPython.core.getipython\nimport IPython.core.guarded_eval\nimport IPython.core.history\nimport IPython.core.hooks\nimport IPython.core.inputtransformer2\nimport IPython.core.interactiveshell\nimport IPython.core.latex_symbols\nimport IPython.core.logger\nimport IPython.core.macro\nimport IPython.core.magic\nimport IPython.core.magic_arguments\nimport IPython.core.magics\nimport IPython.core.magics.ast_mod\nimport IPython.core.magics.auto\nimport IPython.core.magics.basic\nimport IPython.core.magics.code\nimport IPython.core.magics.config\nimport IPython.core.magics.display\nimport IPython.core.magics.execution\nimport IPython.core.magics.extension\nimport IPython.core.magics.history\nimport IPython.core.magics.logging\nimport IPython.core.magics.namespace\nimport IPython.core.magics.osm\nimport IPython.core.magics.packaging\nimport IPython.core.magics.pylab\nimport IPython.core.magics.script\nimport IPython.core.oinspect\nimport IPython.core.page\nimport IPython.core.payload\nimport IPython.core.prefilter\nimport IPython.core.profiledir\nimport IPython.core.release\nimport IPython.core.shellapp\nimport IPython.core.splitinput\nimport IPython.core.tbtools\nimport IPython.core.tips\nimport IPython.core.ultratb\nimport IPython.core.usage\nimport IPython.display\nimport IPython.extensions\nimport IPython.extensions.storemagic\nimport IPython.external\nimport IPython.external.pickleshare\nimport IPython.lib\nimport IPython.lib.clipboard\nimport IPython.lib.display\nimport IPython.lib.pretty\nimport IPython.paths\nimport IPython.terminal\nimport IPython.terminal.debugger\nimport IPython.terminal.embed\nimport IPython.terminal.interactiveshell\nimport IPython.terminal.ipapp\nimport IPython.terminal.magics\nimport IPython.terminal.prompts\nimport IPython.terminal.pt_inputhooks\nimport IPython.terminal.ptutils\nimport IPython.terminal.shortcuts\nimport IPython.terminal.shortcuts.auto_match\nimport IPython.terminal.shortcuts.auto_suggest\nimport IPython.terminal.shortcuts.filters\nimport IPython.testing\nimport IPython.testing.skipdoctest\nimport IPython.utils\nimport IPython.utils.PyColorize\nimport IPython.utils._process_common\nimport IPython.utils._process_posix\nimport IPython.utils._sysinfo\nimport IPython.utils.capture\nimport IPython.utils.contexts\nimport IPython.utils.data\nimport IPython.utils.decorators\nimport IPython.utils.dir2\nimport IPython.utils.docs\nimport IPython.utils.encoding\nimport IPython.utils.frame\nimport IPython.utils.generics\nimport IPython.utils.importstring\nimport IPython.utils.io\nimport IPython.utils.ipstruct\nimport IPython.utils.module_paths\nimport IPython.utils.openpy\nimport IPython.utils.path\nimport IPython.utils.process\nimport IPython.utils.py3compat\nimport IPython.utils.sentinel\nimport IPython.utils.strdispatch\nimport IPython.utils.sysinfo\nimport IPython.utils.syspathcontext\nimport IPython.utils.terminal\nimport IPython.utils.text\nimport IPython.utils.timing\nimport IPython.utils.tokenutil\nimport IPython.utils.wildcard\nimport collections\nimport collections.abc\nimport effectful\nimport effectful.handlers\nimport effectful.handlers.llm\nimport effectful.handlers.llm.completions\nimport effectful.handlers.llm.encoding\nimport effectful.handlers.llm.evaluation\nimport effectful.handlers.llm.template\nimport effectful.internals\nimport effectful.internals.runtime\nimport effectful.internals.unification\nimport effectful.ops\nimport effectful.ops.semantics\nimport effectful.ops.syntax\nimport effectful.ops.types\nimport pathlib\nIn: collections.abc.MutableSequence[str]\nOut: dict\n_: str\n__: str\n___: str\n__vsc_ipynb_file__: str\n_dh: collections.abc.MutableSequence[pathlib.PosixPath]\n_i: str\n_i1: str\n_i2: str\n_i3: str\n_i4: str\n_i5: str\n_i6: str\n_i7: str\n_ih: collections.abc.MutableSequence[str]\n_ii: str\n_iii: str\n_oh: dict\nchar: str\ncount_a: collections.abc.Callable[[str], int]\ncount_char: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\ncount_char_with_doctest: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\nexit: IPython.core.autocall.ZMQExitAutocall\nhaiku: collections.abc.Callable[[str], str]\nhaiku_no_cache: effectful.ops.types.Operation[[str], str]\nlimerick: effectful.ops.types.Operation[[str], str]\nprimes: effectful.ops.types.Operation[[int], int]\nprovider: effectful.ops.types.Interpretation\nprovider_cached: effectful.ops.types.Interpretation\nquit: IPython.core.autocall.ZMQExitAutocall\n\ndef count_char(s: str) -> int:\n \"\"\"\n Count the occurrences of the character 'a' in the given string.\n\n Args:\n s (str): The string in which to count occurrences of 'a'.\n\n Returns:\n int: The number of times 'a' appears in the string.\n\n Examples:\n >>> count_char('banana')\n 3\n >>> count_char('apple')\n 1\n >>> count_char('cherry')\n 0\n \"\"\"\n return s.count('a')\n_synthesized_check: collections.abc.Callable[[str], int] = count_char", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[31mResultDecodingError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 10\u001b[39m\n\u001b[32m 6\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m NotHandled\n\u001b[32m 9\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler(provider), handler(UnsafeEvalProvider()):\n\u001b[32m---> \u001b[39m\u001b[32m10\u001b[39m count_a = \u001b[43mcount_char_with_doctest\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43ma\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 11\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m count_a(\u001b[33m\"\u001b[39m\u001b[33mbanana\u001b[39m\u001b[33m\"\u001b[39m) == \u001b[32m3\u001b[39m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:499\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__default__(*args, **kwargs)\n\u001b[32m 498\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m499\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__apply__\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:494\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 492\u001b[39m self_handler = intp.get(\u001b[38;5;28mself\u001b[39m)\n\u001b[32m 493\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m self_handler \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m494\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mself_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 495\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m args \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(args[\u001b[32m0\u001b[39m], Operation) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m args[\u001b[32m0\u001b[39m].__apply__:\n\u001b[32m 496\u001b[39m \u001b[38;5;66;03m# Prevent infinite recursion when calling self.apply directly\u001b[39;00m\n\u001b[32m 497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__default__(*args, **kwargs)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/internals/runtime.py:70\u001b[39m, in \u001b[36m_set_prompt..bound_body\u001b[39m\u001b[34m(*a, **k)\u001b[39m\n\u001b[32m 68\u001b[39m next_cont = get_interpretation().get(prompt, prompt.__default_rule__)\n\u001b[32m 69\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler({prompt: handler({prompt: next_cont})(cont)}):\n\u001b[32m---> \u001b[39m\u001b[32m70\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbody\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/internals/runtime.py:56\u001b[39m, in \u001b[36m_save_args.._cont_wrapper\u001b[39m\u001b[34m(*a, **k)\u001b[39m\n\u001b[32m 53\u001b[39m \u001b[38;5;129m@functools\u001b[39m.wraps(fn)\n\u001b[32m 54\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_cont_wrapper\u001b[39m(*a: P.args, **k: P.kwargs) -> T:\n\u001b[32m 55\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m handler({_get_args: \u001b[38;5;28;01mlambda\u001b[39;00m: (a, k)}):\n\u001b[32m---> \u001b[39m\u001b[32m56\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/completions.py:521\u001b[39m, in \u001b[36mLiteLLMProvider._call\u001b[39m\u001b[34m(self, template, *args, **kwargs)\u001b[39m\n\u001b[32m 519\u001b[39m result: T | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 520\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m message[\u001b[33m\"\u001b[39m\u001b[33mrole\u001b[39m\u001b[33m\"\u001b[39m] != \u001b[33m\"\u001b[39m\u001b[33massistant\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m tool_calls:\n\u001b[32m--> \u001b[39m\u001b[32m521\u001b[39m message, tool_calls, result = \u001b[43mcall_assistant\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 522\u001b[39m \u001b[43m \u001b[49m\u001b[43mtemplate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_model\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mconfig\u001b[49m\n\u001b[32m 523\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 524\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m tool_call \u001b[38;5;129;01min\u001b[39;00m tool_calls:\n\u001b[32m 525\u001b[39m message = call_tool(tool_call)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:499\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__default__(*args, **kwargs)\n\u001b[32m 498\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m499\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__apply__\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:497\u001b[39m, in \u001b[36mOperation.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 494\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m self_handler(*args, **kwargs)\n\u001b[32m 495\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m args \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(args[\u001b[32m0\u001b[39m], Operation) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m args[\u001b[32m0\u001b[39m].__apply__:\n\u001b[32m 496\u001b[39m \u001b[38;5;66;03m# Prevent infinite recursion when calling self.apply directly\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__default__\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 498\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 499\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__apply__(\u001b[38;5;28mself\u001b[39m, *args, **kwargs)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:548\u001b[39m, in \u001b[36m__apply__\u001b[39m\u001b[34m(op, *args, **kwargs)\u001b[39m\n\u001b[32m 519\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__apply__\u001b[39m[**A, B](op: Operation[A, B], *args: A.args, **kwargs: A.kwargs) -> B:\n\u001b[32m 520\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Apply ``op`` to ``args``, ``kwargs`` in interpretation ``intp``.\u001b[39;00m\n\u001b[32m 521\u001b[39m \n\u001b[32m 522\u001b[39m \u001b[33;03m Handling :func:`Operation.__apply__` changes the evaluation strategy of terms.\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 546\u001b[39m \n\u001b[32m 547\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m548\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mop\u001b[49m\u001b[43m.\u001b[49m\u001b[43m__default_rule__\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/ops/types.py:350\u001b[39m, in \u001b[36mOperation.__default_rule__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 345\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"The default rule is used when the operation is not handled.\u001b[39;00m\n\u001b[32m 346\u001b[39m \n\u001b[32m 347\u001b[39m \u001b[33;03mIf no default rule is supplied, the free rule is used instead.\u001b[39;00m\n\u001b[32m 348\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 349\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m350\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__default__\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 351\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m NotHandled:\n\u001b[32m 352\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01meffectful\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mops\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01msyntax\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m defdata\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Marc/effectful/effectful/handlers/llm/completions.py:304\u001b[39m, in \u001b[36mcall_assistant\u001b[39m\u001b[34m(tools, response_format, model, **kwargs)\u001b[39m\n\u001b[32m 302\u001b[39m result = response_format.decode(raw_result.value) \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[32m 303\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (pydantic.ValidationError, \u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mSyntaxError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m--> \u001b[39m\u001b[32m304\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m ResultDecodingError(e, raw_message=raw_message) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01me\u001b[39;00m\n\u001b[32m 306\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m (raw_message, tool_calls, result)\n", - "\u001b[31mResultDecodingError\u001b[39m: Error decoding response: mypy type check failed:\n:170: \u001b[1m\u001b[31merror:\u001b[m Name \u001b[m\u001b[1m\"count_char\"\u001b[m already defined on line 159 \u001b[m\u001b[33m[no-redef]\u001b[m\n\nimport collections\nimport collections.abc\nimport IPython\nimport IPython.core\nimport IPython.core.alias\nimport IPython.core.application\nimport IPython.core.async_helpers\nimport IPython.core.autocall\nimport IPython.core.builtin_trap\nimport IPython.core.compilerop\nimport IPython.core.completer\nimport IPython.core.completerlib\nimport IPython.core.crashhandler\nimport IPython.core.debugger\nimport IPython.core.debugger_backport\nimport IPython.core.display\nimport IPython.core.display_functions\nimport IPython.core.display_trap\nimport IPython.core.displayhook\nimport IPython.core.displaypub\nimport IPython.core.doctb\nimport IPython.core.error\nimport IPython.core.events\nimport IPython.core.extensions\nimport IPython.core.formatters\nimport IPython.core.getipython\nimport IPython.core.guarded_eval\nimport IPython.core.history\nimport IPython.core.hooks\nimport IPython.core.inputtransformer2\nimport IPython.core.interactiveshell\nimport IPython.core.latex_symbols\nimport IPython.core.logger\nimport IPython.core.macro\nimport IPython.core.magic\nimport IPython.core.magic_arguments\nimport IPython.core.magics\nimport IPython.core.magics.ast_mod\nimport IPython.core.magics.auto\nimport IPython.core.magics.basic\nimport IPython.core.magics.code\nimport IPython.core.magics.config\nimport IPython.core.magics.display\nimport IPython.core.magics.execution\nimport IPython.core.magics.extension\nimport IPython.core.magics.history\nimport IPython.core.magics.logging\nimport IPython.core.magics.namespace\nimport IPython.core.magics.osm\nimport IPython.core.magics.packaging\nimport IPython.core.magics.pylab\nimport IPython.core.magics.script\nimport IPython.core.oinspect\nimport IPython.core.page\nimport IPython.core.payload\nimport IPython.core.prefilter\nimport IPython.core.profiledir\nimport IPython.core.release\nimport IPython.core.shellapp\nimport IPython.core.splitinput\nimport IPython.core.tbtools\nimport IPython.core.tips\nimport IPython.core.ultratb\nimport IPython.core.usage\nimport IPython.display\nimport IPython.extensions\nimport IPython.extensions.storemagic\nimport IPython.external\nimport IPython.external.pickleshare\nimport IPython.lib\nimport IPython.lib.clipboard\nimport IPython.lib.display\nimport IPython.lib.pretty\nimport IPython.paths\nimport IPython.terminal\nimport IPython.terminal.debugger\nimport IPython.terminal.embed\nimport IPython.terminal.interactiveshell\nimport IPython.terminal.ipapp\nimport IPython.terminal.magics\nimport IPython.terminal.prompts\nimport IPython.terminal.pt_inputhooks\nimport IPython.terminal.ptutils\nimport IPython.terminal.shortcuts\nimport IPython.terminal.shortcuts.auto_match\nimport IPython.terminal.shortcuts.auto_suggest\nimport IPython.terminal.shortcuts.filters\nimport IPython.testing\nimport IPython.testing.skipdoctest\nimport IPython.utils\nimport IPython.utils.PyColorize\nimport IPython.utils._process_common\nimport IPython.utils._process_posix\nimport IPython.utils._sysinfo\nimport IPython.utils.capture\nimport IPython.utils.contexts\nimport IPython.utils.data\nimport IPython.utils.decorators\nimport IPython.utils.dir2\nimport IPython.utils.docs\nimport IPython.utils.encoding\nimport IPython.utils.frame\nimport IPython.utils.generics\nimport IPython.utils.importstring\nimport IPython.utils.io\nimport IPython.utils.ipstruct\nimport IPython.utils.module_paths\nimport IPython.utils.openpy\nimport IPython.utils.path\nimport IPython.utils.process\nimport IPython.utils.py3compat\nimport IPython.utils.sentinel\nimport IPython.utils.strdispatch\nimport IPython.utils.sysinfo\nimport IPython.utils.syspathcontext\nimport IPython.utils.terminal\nimport IPython.utils.text\nimport IPython.utils.timing\nimport IPython.utils.tokenutil\nimport IPython.utils.wildcard\nimport collections\nimport collections.abc\nimport effectful\nimport effectful.handlers\nimport effectful.handlers.llm\nimport effectful.handlers.llm.completions\nimport effectful.handlers.llm.encoding\nimport effectful.handlers.llm.evaluation\nimport effectful.handlers.llm.template\nimport effectful.internals\nimport effectful.internals.runtime\nimport effectful.internals.unification\nimport effectful.ops\nimport effectful.ops.semantics\nimport effectful.ops.syntax\nimport effectful.ops.types\nimport pathlib\nIn: collections.abc.MutableSequence[str]\nOut: dict\n_: str\n__: str\n___: str\n__vsc_ipynb_file__: str\n_dh: collections.abc.MutableSequence[pathlib.PosixPath]\n_i: str\n_i1: str\n_i2: str\n_i3: str\n_i4: str\n_i5: str\n_i6: str\n_i7: str\n_ih: collections.abc.MutableSequence[str]\n_ii: str\n_iii: str\n_oh: dict\nchar: str\ncount_a: collections.abc.Callable[[str], int]\ncount_char: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\ncount_char_with_doctest: effectful.ops.types.Operation[[str], collections.abc.Callable[[str], int]]\nexit: IPython.core.autocall.ZMQExitAutocall\nhaiku: collections.abc.Callable[[str], str]\nhaiku_no_cache: effectful.ops.types.Operation[[str], str]\nlimerick: effectful.ops.types.Operation[[str], str]\nprimes: effectful.ops.types.Operation[[int], int]\nprovider: effectful.ops.types.Interpretation\nprovider_cached: effectful.ops.types.Interpretation\nquit: IPython.core.autocall.ZMQExitAutocall\n\ndef count_char(s: str) -> int:\n \"\"\"\n Count the occurrences of the character 'a' in the given string.\n\n Args:\n s (str): The string in which to count occurrences of 'a'.\n\n Returns:\n int: The number of times 'a' appears in the string.\n\n Examples:\n >>> count_char('banana')\n 3\n >>> count_char('apple')\n 1\n >>> count_char('cherry')\n 0\n \"\"\"\n return s.count('a')\n_synthesized_check: collections.abc.Callable[[str], int] = count_char. Please provide a valid response and try again." - ] - } - ], + "outputs": [], "source": [ + "from effectful.handlers.llm.completions import ResultDecodingError\n", + "\n", + "\n", "@Template.define\n", - "def count_char_with_doctest(char: str) -> Callable[[str], int]:\n", + "def count_char_with_bad_doctest(char: str) -> Callable[[str], int]:\n", " \"\"\"Write a function named count_char that counts the occurrances of '{char}'.\n", " Do not use any tools.\n", + "\n", + " Examples:\n", + " >>> count_char(\"banana\")\n", + " 999\n", " \"\"\"\n", " raise NotHandled\n", "\n", "\n", - "with handler(provider), handler(UnsafeEvalProvider()), handler(DoctestHandler()):\n", - " count_a = count_char_with_doctest(\"a\")\n", - " assert count_a(\"banana\") == 3" + "# Without DoctestHandler: synthesis succeeds (doctest is NOT checked)\n", + "with handler(provider), handler(UnsafeEvalProvider()):\n", + " count_a = count_char_with_bad_doctest(\"a\")\n", + " assert callable(count_a)\n", + " print(\"Without DoctestHandler: synthesis succeeded (doctest not checked)\")\n", + " print(f\" count_a('banana') = {count_a('banana')}\")\n", + "\n", + "# With DoctestHandler: synthesis fails because the doctest expects 999 but gets 3\n", + "try:\n", + " with handler(provider), handler(UnsafeEvalProvider()), handler(DoctestHandler()):\n", + " count_a = count_char_with_bad_doctest(\"a\")\n", + "except ResultDecodingError as e:\n", + " print(\"With DoctestHandler: synthesis failed as expected\")\n", + " print(f\" Error: {type(e).__name__}\")" ] }, { From 12eb50d816e3de3e3ece00f4c622d778784e1a4a Mon Sep 17 00:00:00 2001 From: datvo06 Date: Mon, 9 Feb 2026 10:53:39 -0500 Subject: [PATCH 06/18] Remove duplicate tests --- tests/test_handlers_llm_provider.py | 54 +---------------------------- 1 file changed, 1 insertion(+), 53 deletions(-) diff --git a/tests/test_handlers_llm_provider.py b/tests/test_handlers_llm_provider.py index 60113078..89e8e649 100644 --- a/tests/test_handlers_llm_provider.py +++ b/tests/test_handlers_llm_provider.py @@ -9,7 +9,6 @@ import inspect import json import os -import typing from collections.abc import Callable from enum import StrEnum from pathlib import Path @@ -36,7 +35,7 @@ get_message_sequence, ) from effectful.handlers.llm.encoding import Encodable, SynthesizedFunction -from effectful.handlers.llm.evaluation import DoctestHandler, UnsafeEvalProvider +from effectful.handlers.llm.evaluation import UnsafeEvalProvider from effectful.ops.semantics import fwd, handler from effectful.ops.syntax import ObjectInterpretation, implements from effectful.ops.types import NotHandled @@ -1083,20 +1082,6 @@ def synthesize_counter(char: str) -> Callable[[str], int]: raise NotHandled -@Template.define -def synthesize_counter_with_doctest(char: str) -> Callable[[str], int]: - """Generate a Python function named count_char that counts occurrences of the character '{char}' - in a given input string. - - The function should be case-sensitive. - - Examples: - >>> count_char("banana") - 4 - """ - raise NotHandled - - @Template.define def synthesize_is_even() -> Callable[[int], bool]: """Generate a Python function that checks if a number is even. @@ -1165,31 +1150,6 @@ def test_synthesize_counter_with_parameter(self, request): assert count_a("aardvark") == 3 assert count_a("AAA") == 0 # case-sensitive - @requires_openai - def test_synthesized_doctest_runs(self, request): - """Test that doctests run for synthesized functions.""" - with ( - handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")), - handler(UnsafeEvalProvider()), - handler(DoctestHandler()), - handler(LimitLLMCallsHandler(max_calls=1)), - ): - with pytest.raises(ResultDecodingError, match="doctest failed"): - synthesize_counter_with_doctest("a") - - @requires_openai - def test_callable_type_signature_in_schema(self, request): - """Test that the callable type signature is communicated to the LLM.""" - - # Verify that the enc type includes the signature in its docstring - encodable = Encodable.define(Callable[[int, int], int], {}) - assert encodable.enc.__doc__ is not None - assert "Callable[[int, int], int]" in encodable.enc.__doc__ - - encodable2 = Encodable.define(Callable[[str], str], {}) - assert encodable2.enc.__doc__ is not None - assert "Callable[[str], str]" in encodable2.enc.__doc__ - @requires_openai def test_synthesized_function_roundtrip(self, request): """Test that a synthesized function can be encoded and decoded.""" @@ -1257,18 +1217,6 @@ def test_synthesize_three_params(self, request): assert multiply_three(1, 1, 1) == 1 assert multiply_three(5, 0, 10) == 0 - def test_synthesized_program_with_annotated_decodes(self): - """Decoding a synthesized program that uses typing.Annotated in source works.""" - encodable = Encodable.define(Callable[[int], int], {"typing": typing}) - source = SynthesizedFunction( - module_code='def f(x: typing.Annotated[int, "positive"]) -> int:\n return x' - ) - with handler(UnsafeEvalProvider()): - result = encodable.decode(source) - assert callable(result) - assert result(10) == 10 - - class TestMessageSequence: """Tests for MessageSequence message sequence tracking.""" From 2f9c6d1fcb07aab75a1c3665b072d7ec3e0a7e90 Mon Sep 17 00:00:00 2001 From: datvo06 Date: Mon, 9 Feb 2026 10:58:32 -0500 Subject: [PATCH 07/18] Restore old version of test_handler_llm_provdier --- tests/test_handlers_llm_provider.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/test_handlers_llm_provider.py b/tests/test_handlers_llm_provider.py index 89e8e649..1619ef9a 100644 --- a/tests/test_handlers_llm_provider.py +++ b/tests/test_handlers_llm_provider.py @@ -9,6 +9,7 @@ import inspect import json import os +import typing from collections.abc import Callable from enum import StrEnum from pathlib import Path @@ -1150,6 +1151,19 @@ def test_synthesize_counter_with_parameter(self, request): assert count_a("aardvark") == 3 assert count_a("AAA") == 0 # case-sensitive + @requires_openai + def test_callable_type_signature_in_schema(self, request): + """Test that the callable type signature is communicated to the LLM.""" + + # Verify that the enc type includes the signature in its docstring + encodable = Encodable.define(Callable[[int, int], int], {}) + assert encodable.enc.__doc__ is not None + assert "Callable[[int, int], int]" in encodable.enc.__doc__ + + encodable2 = Encodable.define(Callable[[str], str], {}) + assert encodable2.enc.__doc__ is not None + assert "Callable[[str], str]" in encodable2.enc.__doc__ + @requires_openai def test_synthesized_function_roundtrip(self, request): """Test that a synthesized function can be encoded and decoded.""" @@ -1217,6 +1231,18 @@ def test_synthesize_three_params(self, request): assert multiply_three(1, 1, 1) == 1 assert multiply_three(5, 0, 10) == 0 + def test_synthesized_program_with_annotated_decodes(self): + """Decoding a synthesized program that uses typing.Annotated in source works.""" + encodable = Encodable.define(Callable[[int], int], {"typing": typing}) + source = SynthesizedFunction( + module_code='def f(x: typing.Annotated[int, "positive"]) -> int:\n return x' + ) + with handler(UnsafeEvalProvider()): + result = encodable.decode(source) + assert callable(result) + assert result(10) == 10 + + class TestMessageSequence: """Tests for MessageSequence message sequence tracking.""" From ea8bb340269dca97c5407c8f7e65def3fe9023c5 Mon Sep 17 00:00:00 2001 From: datvo06 Date: Mon, 9 Feb 2026 11:03:31 -0500 Subject: [PATCH 08/18] Restore old version of test_handler_llm_handler_encoding --- tests/test_handlers_llm_encoding.py | 105 +++++++--------------------- 1 file changed, 26 insertions(+), 79 deletions(-) diff --git a/tests/test_handlers_llm_encoding.py b/tests/test_handlers_llm_encoding.py index a542ff98..f024e2b3 100644 --- a/tests/test_handlers_llm_encoding.py +++ b/tests/test_handlers_llm_encoding.py @@ -10,10 +10,7 @@ from RestrictedPython import RestrictingNodeTransformer from effectful.handlers.llm.encoding import Encodable, SynthesizedFunction -from effectful.handlers.llm.evaluation import ( - RestrictedEvalProvider, - UnsafeEvalProvider, -) +from effectful.handlers.llm.evaluation import RestrictedEvalProvider, UnsafeEvalProvider from effectful.ops.semantics import handler from effectful.ops.types import Operation, Term @@ -750,9 +747,7 @@ def add(a: int, b: int) -> int: assert "def add" in encoded.module_code assert "return a + b" in encoded.module_code - with ( - handler(eval_provider), - ): + with handler(eval_provider): decoded = encodable.decode(encoded) assert callable(decoded) assert decoded(2, 3) == 5 @@ -767,9 +762,7 @@ def test_decode_with_ellipsis_params(self, eval_provider): func_source = SynthesizedFunction( module_code="def double(x) -> int:\n return x * 2" ) - with ( - handler(eval_provider), - ): + with handler(eval_provider): decoded = encodable.decode(func_source) assert callable(decoded) assert decoded(5) == 10 @@ -783,9 +776,7 @@ def test_decode_with_env(self, eval_provider): return x * factor""" ) - with ( - handler(eval_provider), - ): + with handler(eval_provider): decoded = encodable.decode(source) assert callable(decoded) assert decoded(4) == 12 @@ -825,9 +816,7 @@ def test_decode_no_function_at_end_raises(self, eval_provider): with pytest.raises( ValueError, match="last statement to be a function definition" ): - with ( - handler(eval_provider), - ): + with handler(eval_provider): encodable.decode(source) @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS) @@ -841,9 +830,7 @@ def test_decode_multiple_functions_uses_last(self, eval_provider): def bar() -> int: return 2""" ) - with ( - handler(eval_provider), - ): + with handler(eval_provider): decoded = encodable.decode(source) assert callable(decoded) assert decoded.__name__ == "bar" @@ -865,9 +852,7 @@ def greet(self): with pytest.raises( ValueError, match="last statement to be a function definition" ): - with ( - handler(eval_provider), - ): + with handler(eval_provider): encodable.decode(source) @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS) @@ -876,9 +861,7 @@ def greet(name: str) -> str: return f"Hello, {name}!" encodable = Encodable.define(Callable[[str], str], {}) - with ( - handler(eval_provider), - ): + with handler(eval_provider): encoded = encodable.encode(greet) decoded = encodable.decode(encoded) @@ -918,9 +901,7 @@ def test_decode_validates_last_statement(self, eval_provider): with pytest.raises( ValueError, match="last statement to be a function definition" ): - with ( - handler(eval_provider), - ): + with handler(eval_provider): encodable.decode(source) def test_typed_callable_includes_signature_in_docstring(self): @@ -940,9 +921,7 @@ def test_typed_callable_validates_param_count(self, eval_provider): return a""" ) with pytest.raises(ValueError, match="expected function with 2 parameters"): - with ( - handler(eval_provider), - ): + with handler(eval_provider): encodable.decode(source) @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS) @@ -955,9 +934,7 @@ def test_typed_callable_validates_return_type(self, eval_provider): return str(a + b)""" ) with pytest.raises(TypeError, match="Incompatible types in assignment"): - with ( - handler(eval_provider), - ): + with handler(eval_provider): encodable.decode(source) @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS) @@ -973,9 +950,7 @@ def test_typed_callable_requires_return_annotation(self, eval_provider): ValueError, match="requires synthesized function to have a return type annotation", ): - with ( - handler(eval_provider), - ): + with handler(eval_provider): encodable.decode(source) @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS) @@ -987,9 +962,7 @@ def test_typed_callable_accepts_correct_signature(self, eval_provider): module_code="""def add(a: int, b: int) -> int: return a + b""" ) - with ( - handler(eval_provider), - ): + with handler(eval_provider): result = encodable.decode(source) assert callable(result) assert result(2, 3) == 5 @@ -1029,9 +1002,7 @@ def test_ellipsis_callable_skips_param_validation(self, eval_provider): module_code="""def anything(a, b, c, d, e) -> int: return 42""" ) - with ( - handler(eval_provider), - ): + with handler(eval_provider): result = encodable.decode(source) assert callable(result) assert result(1, 2, 3, 4, 5) == 42 @@ -1071,9 +1042,7 @@ def test_validates_param_count_via_ast(self, eval_provider): return a + b + c""" ) with pytest.raises(ValueError, match="expected function with 2 parameters"): - with ( - handler(eval_provider), - ): + with handler(eval_provider): encodable.decode(source) @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS) @@ -1087,9 +1056,7 @@ def test_validates_param_count_zero_params(self, eval_provider): return x""" ) with pytest.raises(ValueError, match="expected function with 0 parameters"): - with ( - handler(eval_provider), - ): + with handler(eval_provider): encodable.decode(source) @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS) @@ -1101,9 +1068,7 @@ def test_validates_accepts_zero_params(self, eval_provider): module_code="""def get_value() -> int: return 42""" ) - with ( - handler(eval_provider), - ): + with handler(eval_provider): result = encodable.decode(source) assert callable(result) assert result() == 42 @@ -1127,9 +1092,7 @@ def test_ellipsis_callable_validates_return_type(self, eval_provider): return "wrong type\"""" ) with pytest.raises(TypeError, match="Incompatible types in assignment"): - with ( - handler(eval_provider), - ): + with handler(eval_provider): encodable.decode(source) @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS) @@ -1140,9 +1103,7 @@ def test_callable_with_single_param(self, eval_provider): module_code="""def count_chars(s: str) -> int: return len(s)""" ) - with ( - handler(eval_provider), - ): + with handler(eval_provider): result = encodable.decode(source) assert callable(result) assert result("hello") == 5 @@ -1155,9 +1116,7 @@ def test_callable_with_many_params(self, eval_provider): module_code="""def sum_four(a: int, b: int, c: int, d: int) -> int: return a + b + c + d""" ) - with ( - handler(eval_provider), - ): + with handler(eval_provider): result = encodable.decode(source) assert callable(result) assert result(1, 2, 3, 4) == 10 @@ -1170,9 +1129,7 @@ def test_callable_with_bool_return(self, eval_provider): module_code="""def is_positive(x: int) -> bool: return x > 0""" ) - with ( - handler(eval_provider), - ): + with handler(eval_provider): result = encodable.decode(source) assert callable(result) assert result(5) is True @@ -1210,9 +1167,7 @@ def test_restricted_blocks_private_attribute_access(self): ) # Should raise due to restricted attribute access with pytest.raises(Exception): # Could be NameError or AttributeError - with ( - handler(RestrictedEvalProvider()), - ): + with handler(RestrictedEvalProvider()): fn = encodable.decode(source) fn("test") @@ -1253,9 +1208,7 @@ def test_builtins_in_env_does_not_bypass_security(self): return open(path).read()""" ) with pytest.raises(Exception): # Could be NameError, ValueError, or other - with ( - handler(RestrictedEvalProvider()), - ): + with handler(RestrictedEvalProvider()): fn = encodable_open.decode(source_open) # If decode succeeded (shouldn't), calling should still fail fn("/etc/passwd") @@ -1268,9 +1221,7 @@ def test_builtins_in_env_does_not_bypass_security(self): return os.name""" ) with pytest.raises(Exception): - with ( - handler(RestrictedEvalProvider()), - ): + with handler(RestrictedEvalProvider()): fn = encodable_import.decode(source_import) fn() @@ -1281,9 +1232,7 @@ def test_builtins_in_env_does_not_bypass_security(self): module_code="""def add(a: int, b: int) -> int: return a + b""" ) - with ( - handler(RestrictedEvalProvider()), - ): + with handler(RestrictedEvalProvider()): fn = encodable_safe.decode(source_safe) assert fn(2, 3) == 5, "Safe code should still work" @@ -1294,8 +1243,6 @@ def test_builtins_in_env_does_not_bypass_security(self): return s.__class__.__name__""" ) with pytest.raises(Exception): - with ( - handler(RestrictedEvalProvider()), - ): + with handler(RestrictedEvalProvider()): fn = encodable_private.decode(source_private) fn("test") From 541a273367c605b53f91c850a9fa3a6885c04907 Mon Sep 17 00:00:00 2001 From: datvo06 Date: Mon, 9 Feb 2026 12:28:33 -0500 Subject: [PATCH 09/18] Rerun --- docs/source/llm.ipynb | 259 +++++++++++++++++++----------------------- 1 file changed, 117 insertions(+), 142 deletions(-) diff --git a/docs/source/llm.ipynb b/docs/source/llm.ipynb index 77858caa..ae273141 100644 --- a/docs/source/llm.ipynb +++ b/docs/source/llm.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 29, "id": "5aaf649f", "metadata": {}, "outputs": [], @@ -68,7 +68,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 30, "id": "1e832675", "metadata": {}, "outputs": [], @@ -91,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 31, "id": "634f6533", "metadata": {}, "outputs": [ @@ -99,17 +99,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "There once was a curious fish,\n", - "Who swam with a tail that could swish.\n", - "To the sea it would dart,\n", - "Like a work of fine art,\n", - "In the ocean it made quite a splish!\n", + "In the ocean where brightly fish swim,\n", + "They dance with a graceful, sleek trim.\n", + "With scales that do shimmer,\n", + "They dart and they glimmer,\n", + "In the deep where the light is dim.\n", "----------------------------------------\n", - "In the ocean where fishes freely roam,\n", - "A clownfish felt quite at home.\n", - "He said with a glint,\n", - "“I’m orange with a hint,\n", - "But I'm not just a brush in sea's foam!”\n" + "In the sea swam a fish with delight,\n", + "Who glowed with a silvery light.\n", + "He danced in the waves,\n", + "Through coral-caved graves,\n", + "A master of day and of night.\n" ] } ], @@ -130,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 32, "id": "706ce53b", "metadata": {}, "outputs": [ @@ -139,13 +139,13 @@ "output_type": "stream", "text": [ "\n", - "Silent streams below,\n", - "Fish glide and dance through water—\n", - "Nature's quiet grace.\n", + "Beneath silver waves, \n", + "Fish dance in liquid sunlight, \n", + "Silent world abides.\n", "----------------------------------------\n", - "Silent streams below,\n", - "Fish glide and dance through water—\n", - "Nature's quiet grace.\n", + "Beneath silver waves, \n", + "Fish dance in liquid sunlight, \n", + "Silent world abides.\n", "\n" ] }, @@ -163,21 +163,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "Silent streams flowing,\n", - "Silver scales shimmer below—\n", - "Fish in quiet dance.\n", + "Whispers of rivers,\n", + "Scales shimmer like the sunrise,\n", + "Silent fins glide by.\n", "----------------------------------------\n", - "Gentle fish gliding \n", - "Silent ripples in the stream \n", - "Nature's dance beneath.\n", + "Gentle waves ripple, \n", + "Underwater world whispers, \n", + "Fish dance in silence.\n", "\n", - "Silent waters glide,\n", - "Scales shimmer beneath the light—\n", - "Fish dance in the deep.\n", + "Fish swim in clear streams, \n", + "Scales shimmer under sunlight, \n", + "Quietly they glide.\n", "----------------------------------------\n", - "Silent waters flow,\n", - "Silver glimmers dance below—\n", - "Fish weave through shadows.\n" + "In deep ocean blue,\n", + "Silent fins weave through currents,\n", + "Whispers of the deep.\n" ] } ], @@ -232,7 +232,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 33, "id": "2c766859", "metadata": {}, "outputs": [], @@ -257,10 +257,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "id": "c83bbdc0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "def count_char(s: str) -> int:\n", + " return s.count('a')\n" + ] + } + ], "source": [ "@Template.define\n", "def count_char(char: str) -> Callable[[str], int]:\n", @@ -293,10 +302,19 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 35, "id": "793b12a5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Without DoctestHandler: synthesis succeeded (doctest not checked)\n", + " count_a('banana') = 3\n" + ] + } + ], "source": [ "from effectful.handlers.llm.completions import ResultDecodingError\n", "\n", @@ -343,7 +361,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "id": "66711301", "metadata": {}, "outputs": [ @@ -351,13 +369,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "Based on the weather conditions:\n", + "Based on the weather information:\n", "\n", - "- **Chicago** is currently cold.\n", - "- **New York** is currently wet.\n", - "- **Barcelona** is currently sunny.\n", + "- **Chicago**: Cold\n", + "- **New York**: Wet\n", + "- **Barcelona**: Sunny\n", "\n", - "I suggest **Barcelona** as the city with good weather.\n" + "I suggest Barcelona as the city with good weather, as it is sunny there.\n" ] } ], @@ -397,7 +415,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "id": "89992702", "metadata": {}, "outputs": [ @@ -417,7 +435,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "A simple smiley face with a yellow background, featuring two black dots for eyes and a curved line for a mouth, typically used to convey happiness or friendliness.\n" + "A simple yellow smiley face with black eyes and a curved mouth, representing a happy expression.\n" ] } ], @@ -460,7 +478,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "id": "17668ac8", "metadata": {}, "outputs": [ @@ -473,7 +491,8 @@ "Who's there?\n", "Lizard.\n", "Lizard who?\n", - "Lizard who? Lizard you wonder, there's a gecko at your door!\n", + "Lizard who? \n", + "Lizard you been looking for a new pet? Because I'm ready to be your scaly buddy!\n", "> The crowd laughs politely.\n" ] } @@ -525,7 +544,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "id": "78a4bf44", "metadata": {}, "outputs": [ @@ -533,16 +552,18 @@ "name": "stdout", "output_type": "stream", "text": [ - "Sub-templates available to write_story: dict_keys(['describe_image', 'draw_simple_icon', 'limerick', 'haiku_no_cache', 'primes', 'count_char', 'cities', 'weather', 'vacation', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny'])\n", + "Sub-templates available to write_story: dict_keys(['limerick', 'haiku_no_cache', 'primes', 'count_char_with_bad_doctest', 'count_char', 'cities', 'weather', 'vacation', 'describe_image', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny', 'unstable_service', 'fetch_data', 'give_rating_for_movie', 'write_chapter', 'judge_chapter', 'write_multi_chapter_story'])\n", "=== Story with moral ===\n", "\n", "\n", - "In the case of Whiskers, it was his understanding of this balance that brought him safely home, with both stories and lessons to cherish and share.\n", + "When exploring new and unknown areas, it's important to be aware of your surroundings and potential dangers, so you can enjoy your adventure safely.\n", "\n", "=== Funny story ===\n", "\n", "\n", - "The End.\n" + "---\n", + "\n", + "Mr. Whiskers indeed ensured that the curious wanderlust of a cat kept the laughter and unexpected surprises alive in the quiet town of Whiskerville.\n" ] } ], @@ -604,7 +625,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "id": "4334d07a", "metadata": {}, "outputs": [ @@ -613,7 +634,11 @@ "output_type": "stream", "text": [ "Error: Service unavailable! Attempt 1/3. Please retry.\n", - "Result: The data fetched from the unstable service is: `[1, 2, 3]`. Retries: 3\n" + "Result: The unstable service call was successful on the second attempt, and the data fetched is:\n", + "\n", + "- 1\n", + "- 2\n", + "- 3 Retries: 3\n" ] } ], @@ -662,7 +687,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "id": "39b2b225", "metadata": {}, "outputs": [ @@ -674,7 +699,7 @@ "value.score\n", " score must be 1–5, got 9 [type=invalid_score, input_value=9, input_type=int]. Please provide a valid response and try again.\n", "Score: 5/5\n", - "Explanation: Die Hard is widely acclaimed as one of the best action films of all time and earns a perfect score of 5 out of 5. Its success is attributed to a gripping storyline, memorable performances, particularly by Bruce Willis as John McClane, and its innovative approach to action sequences. Its mix of humor, suspense, and holiday-themed backdrop makes it a perennial favorite, cementing its status as a cultural icon.\n" + "Explanation: Die Hard receives a score of 5 out of 5. It is widely acclaimed for its deft blend of intense action, engaging plot, and iconic performances, which have solidified its status as a classic in the action genre. Bruce Willis's portrayal of John McClane is both relatable and heroic, offering a character that resonates with audiences. The film's ability to maintain suspense and deliver memorable moments throughout, along with its cultural impact and influence on subsequent action movies, justifies the perfect score.\n" ] } ], @@ -737,7 +762,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "id": "9d02bc67", "metadata": {}, "outputs": [ @@ -745,112 +770,62 @@ "name": "stdout", "output_type": "stream", "text": [ - "Sub-templates available to write_story: dict_keys(['describe_image', 'draw_simple_icon', 'limerick', 'haiku_no_cache', 'primes', 'count_char', 'cities', 'weather', 'vacation', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny', 'write_story', 'unstable_service', 'fetch_data', 'give_rating_for_movie', 'write_chapter', 'judge_chapter'])\n", + "Sub-templates available to write_story: dict_keys(['limerick', 'haiku_no_cache', 'primes', 'count_char_with_bad_doctest', 'count_char', 'cities', 'weather', 'vacation', 'describe_image', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny', 'write_story', 'unstable_service', 'fetch_data', 'give_rating_for_movie', 'write_chapter', 'judge_chapter'])\n", "=== Story with moral ===\n", - "def create_moral_story(topic: str) -> str:\n", - " # Start with an introduction and establish the story theme\n", - " chapter_1 = write_chapter(1, f\"Introduction to {topic}\")\n", - " \n", - " # Develop the plot with a challenge or situation related to the topic\n", - " chapter_2 = write_chapter(2, f\"The Challenge of {topic}\")\n", - " \n", - " # Introduce a turning point or decision-making moment involving the topic\n", - " chapter_3 = write_chapter(3, f\"Decisions and Consequences of {topic}\")\n", + "def write_moral_story(topic: str) -> str:\n", + " story = \"\"\n", + " chapter_number = 1\n", + " chapter_name = f\"The Beginning of {topic}\"\n", " \n", - " # Conclusion wrapping up the story and highlighting the moral\n", - " chapter_4 = write_chapter(4, f\"Moral and Lessons of {topic}\")\n", + " while True:\n", + " chapter = write_chapter(chapter_number, chapter_name)\n", + " story += chapter\n", + " \n", + " if not judge_chapter(story, chapter_number):\n", + " break\n", + " \n", + " chapter_number += 1\n", + " chapter_name = f\"Chapter {chapter_number} of {topic}\"\n", " \n", - " # Combine all chapters into one coherent story\n", - " full_story = \"\\n\\n\".join([chapter_1, chapter_2, chapter_3, chapter_4])\n", - " \n", - " return full_story\n", - "**Title: The Journey of One**\n", - "\n", - "In a land far beyond imagination, where numbers were not just symbols but beings with feelings and desires, there was a little number known as One. Though often underestimated, One had a dream larger than any universe: to find its true purpose.\n", - "\n", - "One was simple, yet unique. It started each day by enjoying the sunrise, counting the seconds in silent appreciation of the continuum of time. Yet, in the grand tapestry of numbers, One felt ordinary and unnoticed, especially among the grandiosity of larger numbers like Millions and Billions, which often boasted about their size and importance.\n", - "\n", - "One day, O came across Zero, a soft-spoken and kind companion, often seen lingering in the shadows of others. \"Why do you look so glum, One?\" asked Zero as they both watched the twinkling stars above.\n", - "\n", - "\"I feel small in a world full of giants. What significance do I hold when everyone seems to multiply and magnify everything far beyond my own capacity?\"\n", - "\n", - "Zero smiled softly. \"You must explore, dear One. For you might be small, but with the right touch, you can change everything. You are the beginning of dreams, the spark that starts a continuum.\"\n", - "\n", - "Taking this advice to heart, One embarked on a journey to discover its true power and potential. As it wandered through the Land of Mathematics, it met Addition, the kind-hearted magician, who taught One how it could transform nothing into something, just by joining in a dance.\n", - "\n", - "With Multiplication, One learned coordination and rhythm, expanding its influence exponentially with a simple step forward. There were days spent in the Company of Fractions, shrinking itself to explore the depth of intricacy plus seeing life from a new perspective.\n", - "\n", - "Finally, it found itself near the great figure of Unity, where all numbers whether large or small, participated in harmony. Here, One discovered its greatest potential—to bring completeness. When used wisely, One could complete a perfect circle or spell disaster if miscalculated.\n", - "\n", - "In its quest, One realized its strength was simplicity itself. As small as it was, it was the foundation upon which countless worlds depended. Without One, there was nothing to start; no Number Line, no Life Progression.\n", - "\n", - "And so, One returned to its place in the universe, no longer ordinary but extraordinary in its ability to bring beginnings.\n", - "\n", - "Thus, the moral of the story: No matter how small or insignificant you feel, remember that you have the power to change everything. You are the first step in your journey and those of others. Embrace your role and start with conviction, for you are One. \n", - "\n", - "And sometimes, that's all you need to be remarkable.\n", - "\n", - "Once upon a time in the quaint village of Digiton, nestled in the Valley of Numerals, lived the number 2. In this village, each number had their unique talents and ways to contribute to the community. Number 2 was known for its ability to find wonderful pairings and create harmony.\n", - "\n", - "It was a bright, sunny morning when 2 decided it was time to plan the grand Numerals Gala, an event celebrated by all numbers from 1 to 9. This year's theme was \"Unity in Pairs,\" and 2 took the responsibility seriously.\n", - "\n", - "With a checklist in hand, 2 began to organize the event. First, 2 visited its oldest friend, the number 1. \"Would you be one half of a winning pair, dear friend?\" 2 asked. \"Of course,\" replied 1, \"together we make the perfect pair of Unity, everyone knows!\"\n", - "\n", - "Next, 2 approached the number 3. Though sometimes perceived as a little off-kilter, 3 was eager to join and suggested pairing with 4 to symbolize growth and progression: 3 plus 4 always added up to 7—a lucky number for all.\n", - "\n", - "Eager to ensure everyone was included, 2 made a special stop at number 5's cheerful blue cottage. \"5, would you create a bridge with me?\" 2 proposed. \"Together we form \"7\", the lucky charm—how can I resist?\" giggled 5.\n", - "\n", - "Day by day, the excitement in Digiton grew. Numbers periodically gathered in the square to rehearse their speeches and musical acts. Finally, the day of the Gala arrived, and pairs paraded on stage, highlighting unity through their performances. The pairing of 6 and 7 showcased a dance of luck and prosperity, while 8 and 9 painted visions of a dreamy future.\n", - "\n", - "As everyone settled down for the final speech, number 2 took the stage, its heart full of joy. \"Dear friends,\" 2 began, \"thank you for showing us the beauty of partnership. Alone, each of us is a number, but together, we build the world. Let us remember that two is a bond that shows love, loyalty, and peace.\"\n", - "\n", - "With a warm round of applause, the Gala concluded, but in their hearts, every number knew that it was 2's thoughtful pairing that showed them the profound harmony within.\n", - "\n", - "And so, 2's legacy in Digiton was etched as a gentle, powerful reminder that the most meaningful journeys are those taken with another by your side. Such was the wisdom of number 2.\n", - "\n", - "Once upon a time, nestled in the quiet and serene landscape of Numerland, there was a unique and charismatic number named \"Three.\" Unlike the other numbers, Three was proudly quirky and adventurous. Sporting three shining points, he dazzled with a triangular shape that made him quite distinctive among his peers.\n", - "\n", - "Three lived in the bustling community of Tallytown, a place where numbers came together to form equations, solve problems, and have numerical debates. But Three often felt that Tallytown was too caught up in linear thinking. He liked to think outside the box—or pyramid, in his case.\n", - "\n", - "One sunny day, Three decided to embark on an adventure across the wide fields of positivity. His first stop was Addition Avenue, a lively street where numbers piled atop each other, eagerly building bridges to larger sums. While there, Three met other numbers like Six and Nine, who greeted him warmly.\n", - "\n", - "“Why travel, Three?” asked Six.\n", - "\n", - "\"I'm seeking something more,\" Three replied. \"I feel like there's a whole world of meritorious multiplicities and radiant reciprocals waiting for me!\"\n", + " # Add a moral at the end of the story\n", + " story += f\"\\nMoral of the story: {topic} teaches us an important lesson about life.\"\n", + " return story\n", + "Once upon a time in an enchanted land, there was a peculiar valley where numbers roamed like mythical creatures. Among these numbers, the number 1 was often seen as the least significant, overshadowed by larger and more complex siblings like 7, 8, or 9. Despite his small size, Number 1 was ambitious and determined to prove his worth.\n", "\n", - "With a friendly nod, Three continued on his journey. He navigated through Subtraction Square, where he learned to appreciate simplicity. As he passed through, Two’s counsel resonated: “Sometimes less is more, Three.”\n", + "Every morning, while other numbers would boast about their additions and multiplications, Number 1 would practice tirelessly, joining forces with other numbers to create bigger values. His tenacity was a source of amusement for others, who thought he could never achieve much on his own.\n", "\n", - "Eventually, Three found himself at the multipliers' meadow, a wide expanse where numbers did cartwheels, creating exponential wonders. It was here he met Zero, who diffidently warned, \"Multiply with me, and I'll vanish you into nothingness!\"\n", + "One day, an elderly zero, known for her wisdom, approached Number 1. \"My dear,\" she said softly, \"I see a great future for you if only you believe in yourself. The world often forgets that even the greatest numbers are built from unity.\"\n", "\n", - "Three chuckled at the paradox and moved on. He rolled over to Division Dale, where he admired the symmetry of parts and ratios. Three realized he was not just a number but a part of something truly wondrous.\n", + "Encouraged by her words, Number 1 took on the daring quest to seek the Great Calculator, a mystical device rumored to grant numbers the power to be whatever they wished. Through forests of division and mountains of subtraction, he traveled, never giving up despite the odds.\n", "\n", - "Finally, gazing at the starry skyscape of Infinity Lane, Three discovered his true potential—he was a constant, reliable factor that held significance beyond simple numerical value. Each point of his triangular form seemed to twinkle with this newfound wisdom.\n", + "Upon reaching the calculator, Number 1 was challenged to prove his worth. With courage and the belief imparted by the wise zero, he declared, \"I wish to become the foundation upon which greatness is built!\"\n", "\n", - "As he made his way back home to Tallytown, Three felt renewed, armed with appreciation for his uniqueness and the harmony between all numbers. He returned not just as Three, but as the representation of balance, creativity, and the beautiful geometric world from which he drew his strength.\n", + "To everyone's astonishment, the Great Calculator whirred into life. It awarded him the power to unite numbers, demonstrating his undeniable importance. In the days that followed, Number 1 became the cornerstone of all numbers.\n", "\n", - "And so, in the land of Numerland, Three lived happily, not just a simple integer, but a remarkable journey in and of itself—a point of convergence in a universe of endless possibilities.\n", + "It was with his newfound status that 1 proved a simple yet profound truth: oneness—the unity of a single entity—paves the way for boundless possibilities. He no longer needed to compete in size or scale, for he had realized his true potential.\n", "\n", - "**The Tale of Four Friends**\n", + "And thus, the valley learned a valuable lesson: sometimes, the smallest voice can hold the greatest strength. The legacy of Number 1 taught others to look beyond size and recognize the power within.\n", "\n", - "Once upon a time, in the cozy town of Little Numbers, there dwelt a modest fellow known simply as \"4.\" Though he appeared ordinary, 4 was actually quite special. He had three devoted friends: 1, 2, and 3. Together, they formed a dynamic quartet of remarkable adventures.\n", + "The moral of the story is that value lies not in size or quantity, but in the unity and connections we create.**Title: The Tale of Two**\n", "\n", - "One bright spring morning, they embarked on a journey to solve the mystery of the Lost Sequence. It was said that the sequence held the secret to solving any mathematical problem, and possessing it would mean endless possibilities.\n", + "Once upon a time, in the vibrant land of Numerica, there lived a humble, yet essential number named 2. In Numerica, each number had its distinct role to play, and the harmony of the land depended on their cooperation.\n", "\n", - "4, ever confident in his stability, led the group with enthusiasm. \"We can decipher any riddle with our unity,\" he declared, his square-shaped stature conveying authority.\n", + "Number 2, though modest in appearance, was the backbone of relationships. It was natural, balanced, and often found itself in pairs, proudly proclaiming its capability to double anything it touched. Whether joining hands with 1 to form the couple 12 or standing proudly as 20, it carried its duty with unwavering grace.\n", "\n", - "Their first challenge arrived at the Great Divide Canyon, a vast gap that seemed insurmountable. \"Fear not!\" said 2, offering help with her talent for pairing. She balanced 1 on her left and 3 on her right. Effortlessly, they formed a bridge sturdy enough for 4 to cross, leading them all safely to the other side.\n", + "One sunny day, a great uproar engulfed the peaceful land. Zero, feeling worthless and overlooked, decided to disrupt the harmony among numbers. \"I'm tired of being seen as nothing,\" Zero declared. \"I shall stand in the middle of all, creating confusion!\"\n", "\n", - "The team soon reached the Valley of Equations, where intricate puzzles befuddled passersby. With 4's knack for balance and proportion, they made short work of the conundrums. 1's simplicity, combined with 3's creative approach, solved complex equations, while 2's knack for harmonizing detected patterns invisible to others.\n", + "Panic rippled through Numerica as Zero interjected itself into equations, causing mathematical mayhem. In a moment of crisis, the other numbers turned to 2, whose sense of balance was legendary. The wise elders of Numerica asked 2 to resolve the chaos.\n", "\n", - "As they journeyed deeper, they encountered the enigma known as the Paradox Terrain. Here, problems that seemed unsolvable loomed ominously. \"Let us remember,\" 4 reminded them, \"that solutions are often nearer than they appear.\"\n", + "Stepping forward, 2 approached Zero with understanding eyes. \"You have a place here, just as every number does,\" 2 consoled the unsettled Zero. \"You are the start and the end; infinite possibilities await when you team up with us.\"\n", "\n", - "With a fresh perspective, 3 noticed a pattern: each unsolvable problem required going back to basic principles. By retracing steps, simplifying assumptions, and adding unique insights, they cracked the paradox.\n", + "Zero, touched by 2's kindness, agreed to return to its place. Together, 2 and Zero created new possibilities: 20, 200, 2,000—a testament to harmony and cooperation. From that day, Zero embraced its potential with pride.\n", "\n", - "At last, the quartet arrived at the Chamber of the Lost Sequence, where wisdom awaited them. The mystical sequence unveiled itself, revealing the elegance of mathematical harmony, in which each number played a crucial role.\n", + "The land of Numerica returned to its serene order, and 2 continued to exemplify the essence of pairing and balance. Time worked its way through Numerica, and just like a pair of glasses brings clarity and focus, the number 2 continued to serve as a reminder of the power of partnership and unity.\n", "\n", - "Embracing the sequence, the friends returned to Little Numbers, wiser and more united than ever. Thus, in the camaraderie of 4 and his friends, the town learned a timeless lesson: the greatest strength comes not from singular achievement, but from the harmony of collective unity.\n", + "And thus, the land thrived ever more, singing praises of the humble number 2, whose quiet strength and harmony kept everything at peace.\n", "\n", - "And so, they lived happily and mathematically ever after.\n", + "**Moral:** Too often do we overlook the importance of partnerships and balance. It is through cooperation with others that true potential is realized.\n", + "Moral of the story: a curious cat teaches us an important lesson about life.\n", "\n" ] } From 533eb63660196c00d6c86854c2ee6867cab343f0 Mon Sep 17 00:00:00 2001 From: datvo06 Date: Mon, 9 Feb 2026 12:59:16 -0500 Subject: [PATCH 10/18] Fix test --- tests/conftest.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index e6ad3e07..aba4529f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,3 +18,19 @@ def pytest_runtest_call(item): pytest.xfail(str(e)) else: raise e + + +def pytest_collect_modifyitems(config, items): + """Remove auto-collected doctests from LLM template functions. + + Template docstrings contain ``>>>`` examples that serve as LLM prompts + for the DoctestHandler, not as standalone doctests for pytest to run. + """ + items[:] = [ + item + for item in items + if not ( + type(item).__name__ == "DoctestItem" + and "test_handlers_llm_doctest" in item.nodeid + ) + ] From 8d39e121bdfaa89115c26e0775bd49452d6745eb Mon Sep 17 00:00:00 2001 From: datvo06 Date: Mon, 9 Feb 2026 13:13:06 -0500 Subject: [PATCH 11/18] minor --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index aba4529f..e80b4f8f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,7 +20,7 @@ def pytest_runtest_call(item): raise e -def pytest_collect_modifyitems(config, items): +def pytest_collection_modifyitems(config, items): """Remove auto-collected doctests from LLM template functions. Template docstrings contain ``>>>`` examples that serve as LLM prompts From fcd5eafc7c1e3840e061e9a2237321f42fbd2368 Mon Sep 17 00:00:00 2001 From: datvo06 Date: Mon, 9 Feb 2026 17:02:03 -0500 Subject: [PATCH 12/18] Draft implementation --- docs/source/llm.ipynb | 286 ++++++++++++++---------- effectful/handlers/llm/doctest.py | 314 +++++++++++++++++++++++++++ effectful/handlers/llm/evaluation.py | 81 ------- tests/test_handlers_llm_doctest.py | 139 +++++++++++- 4 files changed, 616 insertions(+), 204 deletions(-) create mode 100644 effectful/handlers/llm/doctest.py diff --git a/docs/source/llm.ipynb b/docs/source/llm.ipynb index ae273141..36e88eca 100644 --- a/docs/source/llm.ipynb +++ b/docs/source/llm.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 1, "id": "5aaf649f", "metadata": {}, "outputs": [], @@ -42,7 +42,8 @@ " LiteLLMProvider,\n", " RetryLLMHandler,\n", ")\n", - "from effectful.handlers.llm.evaluation import DoctestHandler, UnsafeEvalProvider\n", + "from effectful.handlers.llm.doctest import DoctestHandler\n", + "from effectful.handlers.llm.evaluation import UnsafeEvalProvider\n", "from effectful.ops.semantics import NotHandled, handler\n", "\n", "provider = LiteLLMProvider()" @@ -68,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 2, "id": "1e832675", "metadata": {}, "outputs": [], @@ -91,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 3, "id": "634f6533", "metadata": {}, "outputs": [ @@ -99,17 +100,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "In the ocean where brightly fish swim,\n", - "They dance with a graceful, sleek trim.\n", - "With scales that do shimmer,\n", - "They dart and they glimmer,\n", - "In the deep where the light is dim.\n", + "In a pond where the waters are still,\n", + "Lived a fish with a notable skill,\n", + "He'd leap in the air,\n", + "With grace that was rare,\n", + "And land with a splash for a thrill!\n", "----------------------------------------\n", - "In the sea swam a fish with delight,\n", - "Who glowed with a silvery light.\n", - "He danced in the waves,\n", - "Through coral-caved graves,\n", - "A master of day and of night.\n" + "In the sea where the big fishes play,\n", + "A small fish swam every which way.\n", + "With eyes wide and bright,\n", + "It jumped with delight,\n", + "Dodging hooks while it danced in the bay.\n" ] } ], @@ -130,7 +131,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 4, "id": "706ce53b", "metadata": {}, "outputs": [ @@ -139,13 +140,13 @@ "output_type": "stream", "text": [ "\n", - "Beneath silver waves, \n", - "Fish dance in liquid sunlight, \n", - "Silent world abides.\n", + "In still waters, grace,\n", + "Silver scales in sunlit dance,\n", + "Fish swim, life unfolds.\n", "----------------------------------------\n", - "Beneath silver waves, \n", - "Fish dance in liquid sunlight, \n", - "Silent world abides.\n", + "In still waters, grace,\n", + "Silver scales in sunlit dance,\n", + "Fish swim, life unfolds.\n", "\n" ] }, @@ -163,21 +164,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "Whispers of rivers,\n", - "Scales shimmer like the sunrise,\n", - "Silent fins glide by.\n", + "Swimming in the deep, \n", + "Silent fins glide through the waves, \n", + "Secrets in the blue.\n", "----------------------------------------\n", - "Gentle waves ripple, \n", - "Underwater world whispers, \n", - "Fish dance in silence.\n", + "Silent waters gleam, \n", + "Fish weave tales beneath the waves, \n", + "Nature's fluid dance.\n", "\n", - "Fish swim in clear streams, \n", - "Scales shimmer under sunlight, \n", - "Quietly they glide.\n", + "Below ripples glide,\n", + "Silver scales in liquid dance,\n", + "Nature's grace in flow.\n", "----------------------------------------\n", - "In deep ocean blue,\n", - "Silent fins weave through currents,\n", - "Whispers of the deep.\n" + "Silent waters flow, \n", + "Where vibrant scales shimmer bright, \n", + "Fish dance in moon's glow.\n" ] } ], @@ -232,7 +233,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 5, "id": "2c766859", "metadata": {}, "outputs": [], @@ -257,7 +258,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 6, "id": "c83bbdc0", "metadata": {}, "outputs": [ @@ -293,16 +294,97 @@ "id": "0b6a7b48", "metadata": {}, "source": [ - "### Doctest Feedback\n", + "### Doctest Semantic Constraints\n", "\n", - "You can optionally install a `DoctestHandler` to run doctests from the template docstring during callable synthesis.\n", - "Without `DoctestHandler`, synthesis succeeds even if the docstring contains examples that don't match the synthesized function.\n", - "With `DoctestHandler`, the doctests are executed and a `ResultDecodingError` is raised on failure." + "The `DoctestHandler` uses `>>>` examples in template docstrings as **semantic constraints** rather than literal prompts. It handles two cases automatically based on the template's return type:\n", + "\n", + "- **Case 1 (tool-calling)**: When the template returns a non-`Callable` type (e.g. `str`, `int`), the handler runs a *calibration loop* once per template definition — calling the LLM with the doctest inputs, checking outputs, and caching the full conversation (including any corrections) as a few-shot prefix for future calls.\n", + "- **Case 2 (code generation)**: When the template returns a `Callable` type, the generated code must pass the doctests. A `ResultDecodingError` is raised on failure.\n", + "\n", + "Notes: *In both cases, `>>>` examples are **stripped from the prompt** so the LLM cannot simply memorise the expected outputs.*\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "b99a2603", + "metadata": {}, + "source": [ + "#### Case 1: Tool-Calling Calibration\n", + "\n", + "For templates that return a non-`Callable` type (e.g. `str`), `DoctestHandler` runs a calibration loop the first time the template is called. It invokes the LLM with each doctest input, checks whether the output matches, and appends corrective feedback if not. The entire conversation — successes *and* failures — is cached as a few-shot prefix that teaches the LLM the template's expected behaviour." ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 7, + "id": "b6524592", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Without DoctestHandler: Python is recognized for its readability as a high-level programming language.\n", + "With DoctestHandler: Python is a readable, high-level programming language.\n", + "\n", + "Calibration prefix cached: True\n", + "Prefix messages: 4\n" + ] + } + ], + "source": [ + "@Template.define\n", + "def summarize_doctest(text: str) -> str:\n", + " \"\"\"Summarize the following text into a single concise sentence: '{text}'\n", + "\n", + " >>> summarize_doctest(\"The quick brown fox jumps over the lazy dog near the river bank on a sunny afternoon.\")\n", + " 'A fox jumps over a lazy dog by a river on a sunny day.'\n", + " >>> summarize_doctest(\"What a beautiful day!\")\n", + " 'A beautiful day!'\n", + " \"\"\"\n", + " raise NotHandled\n", + "\n", + "\n", + "# Without DoctestHandler: the template works, but the LLM has no calibration prefix.\n", + "with handler(provider):\n", + " result_no_doctest = summarize_doctest(\n", + " \"Python is a high-level programming language known for its readability.\"\n", + " )\n", + " print(f\"Without DoctestHandler: {result_no_doctest}\")\n", + "\n", + "# With DoctestHandler: calibration runs once, building a few-shot prefix.\n", + "# Subsequent calls benefit from the prefix.\n", + "doctest_handler = DoctestHandler()\n", + "with handler(provider), handler(doctest_handler):\n", + " result_with_doctest = summarize_doctest(\n", + " \"Python is a high-level programming language known for its readability.\"\n", + " )\n", + " print(f\"With DoctestHandler: {result_with_doctest}\")\n", + "\n", + "# The calibration prefix is cached for the template.\n", + "print(\n", + " f\"\\nCalibration prefix cached: {summarize_doctest in doctest_handler._prefix_cache}\"\n", + ")\n", + "print(\n", + " f\"Prefix messages: {len(doctest_handler._prefix_cache.get(summarize_doctest, []))}\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "c8981316", + "metadata": {}, + "source": [ + "#### Case 2: Code Generation Validation\n", + "\n", + "When synthesising callable code, `DoctestHandler` validates that the generated function passes the doctests.\n", + "Without it, synthesis succeeds even if the docstring examples don't match the generated code." + ] + }, + { + "cell_type": "code", + "execution_count": 8, "id": "793b12a5", "metadata": {}, "outputs": [ @@ -311,7 +393,13 @@ "output_type": "stream", "text": [ "Without DoctestHandler: synthesis succeeded (doctest not checked)\n", - " count_a('banana') = 3\n" + " count_a('banana') = 3\n", + "**********************************************************************\n", + "1 items had failures:\n", + " 1 of 1 in __main__.__template_doctest__\n", + "***Test Failed*** 1 failures.\n", + "With DoctestHandler: synthesis failed as expected\n", + " Error: ResultDecodingError\n" ] } ], @@ -324,7 +412,6 @@ " \"\"\"Write a function named count_char that counts the occurrances of '{char}'.\n", " Do not use any tools.\n", "\n", - " Examples:\n", " >>> count_char(\"banana\")\n", " 999\n", " \"\"\"\n", @@ -361,7 +448,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 9, "id": "66711301", "metadata": {}, "outputs": [ @@ -369,13 +456,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Based on the weather information:\n", - "\n", - "- **Chicago**: Cold\n", - "- **New York**: Wet\n", - "- **Barcelona**: Sunny\n", - "\n", - "I suggest Barcelona as the city with good weather, as it is sunny there.\n" + "Among the cities listed, Barcelona currently has good weather, being described as \"sunny.\"\n" ] } ], @@ -415,7 +496,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 10, "id": "89992702", "metadata": {}, "outputs": [ @@ -435,7 +516,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "A simple yellow smiley face with black eyes and a curved mouth, representing a happy expression.\n" + "A simple yellow smiley face with black eyes and a smile.\n" ] } ], @@ -478,7 +559,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 11, "id": "17668ac8", "metadata": {}, "outputs": [ @@ -491,8 +572,7 @@ "Who's there?\n", "Lizard.\n", "Lizard who?\n", - "Lizard who? \n", - "Lizard you been looking for a new pet? Because I'm ready to be your scaly buddy!\n", + "Lizard who? Lizard you can't hear, but I'm still knocking!\n", "> The crowd laughs politely.\n" ] } @@ -544,7 +624,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 12, "id": "78a4bf44", "metadata": {}, "outputs": [ @@ -552,18 +632,18 @@ "name": "stdout", "output_type": "stream", "text": [ - "Sub-templates available to write_story: dict_keys(['limerick', 'haiku_no_cache', 'primes', 'count_char_with_bad_doctest', 'count_char', 'cities', 'weather', 'vacation', 'describe_image', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny', 'unstable_service', 'fetch_data', 'give_rating_for_movie', 'write_chapter', 'judge_chapter', 'write_multi_chapter_story'])\n", + "Sub-templates available to write_story: dict_keys(['limerick', 'haiku_no_cache', 'primes', 'count_char', 'summarize_doctest', 'count_char_with_bad_doctest', 'cities', 'weather', 'vacation', 'describe_image', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny'])\n", "=== Story with moral ===\n", "\n", "\n", - "When exploring new and unknown areas, it's important to be aware of your surroundings and potential dangers, so you can enjoy your adventure safely.\n", + "Whiskers learned this valuable lesson and became a wiser cat, embarking on adventures that were both thrilling and safe, living happily amidst his curious wonders.\n", "\n", "=== Funny story ===\n", "\n", "\n", "---\n", "\n", - "Mr. Whiskers indeed ensured that the curious wanderlust of a cat kept the laughter and unexpected surprises alive in the quiet town of Whiskerville.\n" + "Pickles' story teaches us that sometimes, embracing our curiosity can lead to the unexpected, and perhaps hilariously so! But, in embracing what makes us different, we might just bring joy to those around us, too.\n" ] } ], @@ -625,7 +705,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 13, "id": "4334d07a", "metadata": {}, "outputs": [ @@ -634,11 +714,9 @@ "output_type": "stream", "text": [ "Error: Service unavailable! Attempt 1/3. Please retry.\n", - "Result: The unstable service call was successful on the second attempt, and the data fetched is:\n", + "Result: The data fetched successfully is: `[1, 2, 3]`. \n", "\n", - "- 1\n", - "- 2\n", - "- 3 Retries: 3\n" + "If you need further assistance with this data or have any other queries, feel free to ask! Retries: 3\n" ] } ], @@ -687,7 +765,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 14, "id": "39b2b225", "metadata": {}, "outputs": [ @@ -699,7 +777,7 @@ "value.score\n", " score must be 1–5, got 9 [type=invalid_score, input_value=9, input_type=int]. Please provide a valid response and try again.\n", "Score: 5/5\n", - "Explanation: Die Hard receives a score of 5 out of 5. It is widely acclaimed for its deft blend of intense action, engaging plot, and iconic performances, which have solidified its status as a classic in the action genre. Bruce Willis's portrayal of John McClane is both relatable and heroic, offering a character that resonates with audiences. The film's ability to maintain suspense and deliver memorable moments throughout, along with its cultural impact and influence on subsequent action movies, justifies the perfect score.\n" + "Explanation: Die Hard is a classic action film that's widely regarded as one of the best in its genre. It features intense action sequences, memorable one-liners, and a charismatic performance by Bruce Willis. The film's clever plot and well-executed direction keep viewers on the edge of their seats. Given its enduring popularity and influence on the action genre, I would rate it a 5 out of 5.\n" ] } ], @@ -762,7 +840,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "id": "9d02bc67", "metadata": {}, "outputs": [ @@ -770,63 +848,34 @@ "name": "stdout", "output_type": "stream", "text": [ - "Sub-templates available to write_story: dict_keys(['limerick', 'haiku_no_cache', 'primes', 'count_char_with_bad_doctest', 'count_char', 'cities', 'weather', 'vacation', 'describe_image', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny', 'write_story', 'unstable_service', 'fetch_data', 'give_rating_for_movie', 'write_chapter', 'judge_chapter'])\n", + "Sub-templates available to write_story: dict_keys(['limerick', 'haiku_no_cache', 'primes', 'count_char', 'summarize_doctest', 'count_char_with_bad_doctest', 'cities', 'weather', 'vacation', 'describe_image', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny', 'write_story', 'unstable_service', 'fetch_data', 'give_rating_for_movie', 'write_chapter', 'judge_chapter'])\n", "=== Story with moral ===\n", "def write_moral_story(topic: str) -> str:\n", - " story = \"\"\n", + " story_parts = []\n", " chapter_number = 1\n", - " chapter_name = f\"The Beginning of {topic}\"\n", + " chapter_names = [\"Beginning\", \"Middle\", \"Climax\", \"Conclusion\"]\n", " \n", - " while True:\n", - " chapter = write_chapter(chapter_number, chapter_name)\n", - " story += chapter\n", - " \n", - " if not judge_chapter(story, chapter_number):\n", - " break\n", - " \n", + " for chapter_name in chapter_names:\n", + " current_chapter = write_chapter(chapter_number, chapter_name)\n", + " story_parts.append(current_chapter)\n", + " if not judge_chapter(' '.join(story_parts), chapter_number):\n", + " raise ValueError(f\"Chapter {chapter_number} is not coherent with the story so far.\")\n", " chapter_number += 1\n", - " chapter_name = f\"Chapter {chapter_number} of {topic}\"\n", " \n", - " # Add a moral at the end of the story\n", - " story += f\"\\nMoral of the story: {topic} teaches us an important lesson about life.\"\n", - " return story\n", - "Once upon a time in an enchanted land, there was a peculiar valley where numbers roamed like mythical creatures. Among these numbers, the number 1 was often seen as the least significant, overshadowed by larger and more complex siblings like 7, 8, or 9. Despite his small size, Number 1 was ambitious and determined to prove his worth.\n", - "\n", - "Every morning, while other numbers would boast about their additions and multiplications, Number 1 would practice tirelessly, joining forces with other numbers to create bigger values. His tenacity was a source of amusement for others, who thought he could never achieve much on his own.\n", - "\n", - "One day, an elderly zero, known for her wisdom, approached Number 1. \"My dear,\" she said softly, \"I see a great future for you if only you believe in yourself. The world often forgets that even the greatest numbers are built from unity.\"\n", - "\n", - "Encouraged by her words, Number 1 took on the daring quest to seek the Great Calculator, a mystical device rumored to grant numbers the power to be whatever they wished. Through forests of division and mountains of subtraction, he traveled, never giving up despite the odds.\n", - "\n", - "Upon reaching the calculator, Number 1 was challenged to prove his worth. With courage and the belief imparted by the wise zero, he declared, \"I wish to become the foundation upon which greatness is built!\"\n", - "\n", - "To everyone's astonishment, the Great Calculator whirred into life. It awarded him the power to unite numbers, demonstrating his undeniable importance. In the days that followed, Number 1 became the cornerstone of all numbers.\n", - "\n", - "It was with his newfound status that 1 proved a simple yet profound truth: oneness—the unity of a single entity—paves the way for boundless possibilities. He no longer needed to compete in size or scale, for he had realized his true potential.\n", - "\n", - "And thus, the valley learned a valuable lesson: sometimes, the smallest voice can hold the greatest strength. The legacy of Number 1 taught others to look beyond size and recognize the power within.\n", - "\n", - "The moral of the story is that value lies not in size or quantity, but in the unity and connections we create.**Title: The Tale of Two**\n", - "\n", - "Once upon a time, in the vibrant land of Numerica, there lived a humble, yet essential number named 2. In Numerica, each number had its distinct role to play, and the harmony of the land depended on their cooperation.\n", - "\n", - "Number 2, though modest in appearance, was the backbone of relationships. It was natural, balanced, and often found itself in pairs, proudly proclaiming its capability to double anything it touched. Whether joining hands with 1 to form the couple 12 or standing proudly as 20, it carried its duty with unwavering grace.\n", - "\n", - "One sunny day, a great uproar engulfed the peaceful land. Zero, feeling worthless and overlooked, decided to disrupt the harmony among numbers. \"I'm tired of being seen as nothing,\" Zero declared. \"I shall stand in the middle of all, creating confusion!\"\n", - "\n", - "Panic rippled through Numerica as Zero interjected itself into equations, causing mathematical mayhem. In a moment of crisis, the other numbers turned to 2, whose sense of balance was legendary. The wise elders of Numerica asked 2 to resolve the chaos.\n", - "\n", - "Stepping forward, 2 approached Zero with understanding eyes. \"You have a place here, just as every number does,\" 2 consoled the unsettled Zero. \"You are the start and the end; infinite possibilities await when you team up with us.\"\n", - "\n", - "Zero, touched by 2's kindness, agreed to return to its place. Together, 2 and Zero created new possibilities: 20, 200, 2,000—a testament to harmony and cooperation. From that day, Zero embraced its potential with pride.\n", - "\n", - "The land of Numerica returned to its serene order, and 2 continued to exemplify the essence of pairing and balance. Time worked its way through Numerica, and just like a pair of glasses brings clarity and focus, the number 2 continued to serve as a reminder of the power of partnership and unity.\n", - "\n", - "And thus, the land thrived ever more, singing praises of the humble number 2, whose quiet strength and harmony kept everything at peace.\n", - "\n", - "**Moral:** Too often do we overlook the importance of partnerships and balance. It is through cooperation with others that true potential is realized.\n", - "Moral of the story: a curious cat teaches us an important lesson about life.\n", - "\n" + " full_story = ' '.join(story_parts)\n", + " return full_story\n" + ] + }, + { + "ename": "ValueError", + "evalue": "Chapter 2 is not coherent with the story so far.", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mValueError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[17]\u001b[39m\u001b[32m, line 36\u001b[39m\n\u001b[32m 34\u001b[39m function_that_writes_story = write_multi_chapter_story(\u001b[33m\"\u001b[39m\u001b[33mmoral\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 35\u001b[39m \u001b[38;5;28mprint\u001b[39m(inspect.getsource(function_that_writes_story))\n\u001b[32m---> \u001b[39m\u001b[32m36\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[43mfunction_that_writes_story\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43ma curious cat\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m)\n\u001b[32m 37\u001b[39m \u001b[38;5;28mprint\u001b[39m()\n", + "\u001b[36mFile \u001b[39m\u001b[32m:12\u001b[39m, in \u001b[36mwrite_moral_story\u001b[39m\u001b[34m(topic)\u001b[39m\n\u001b[32m 10\u001b[39m story_parts.append(current_chapter)\n\u001b[32m 11\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m judge_chapter(\u001b[33m'\u001b[39m\u001b[33m \u001b[39m\u001b[33m'\u001b[39m.join(story_parts), chapter_number):\n\u001b[32m---> \u001b[39m\u001b[32m12\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mChapter \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mchapter_number\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m is not coherent with the story so far.\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 13\u001b[39m chapter_number += \u001b[32m1\u001b[39m\n\u001b[32m 15\u001b[39m full_story = \u001b[33m'\u001b[39m\u001b[33m \u001b[39m\u001b[33m'\u001b[39m.join(story_parts)\n", + "\u001b[31mValueError\u001b[39m: Chapter 2 is not coherent with the story so far." ] } ], @@ -848,6 +897,7 @@ "@Template.define\n", "def write_multi_chapter_story(style: Literal[\"moral\", \"funny\"]) -> Callable[[str], str]:\n", " \"\"\"Generate a function that writes a story in style: {style} about the given topic.\n", + " Try to return the story no matter what.\n", "\n", " The program can use helper functions defined elsewhere (DO NOT REDEFINE THEM):\n", " - write_chapter(chapter_number: int, chapter_name: str) -> str\n", diff --git a/effectful/handlers/llm/doctest.py b/effectful/handlers/llm/doctest.py new file mode 100644 index 00000000..e3f86332 --- /dev/null +++ b/effectful/handlers/llm/doctest.py @@ -0,0 +1,314 @@ +"""Doctest semantic constraints for Templates. + +Provides a :class:`DoctestHandler` that uses ``>>>`` examples in template +docstrings as semantic constraints rather than literal prompts. + +**Case 1 (tool-calling)**: When the template returns a non-Callable type, a +calibration loop runs the doctest inputs through the LLM once per template +definition and caches the entire conversation (including any incorrect +attempts) as a few-shot prefix for future calls, emulating a learning +process. + +**Case 2 (code generation)**: When the template returns a ``Callable`` type, +the generated code is required to pass the doctests as post-hoc validation. + +In both cases, ``>>>`` examples are stripped from the prompt sent to the LLM +so it cannot memorise the expected outputs. +""" + +import ast +import collections +import collections.abc +import doctest +import inspect +import textwrap +import typing +from collections.abc import Mapping +from typing import Any + +from effectful.handlers.llm.completions import ( + Message, + _make_message, + append_message, + call_user, + get_message_sequence, +) +from effectful.handlers.llm.evaluation import test +from effectful.handlers.llm.template import Template +from effectful.ops.semantics import fwd, handler +from effectful.ops.syntax import ObjectInterpretation, implements + +# --------------------------------------------------------------------------- +# Utility +# --------------------------------------------------------------------------- + + +def extract_doctests(docstring: str) -> tuple[str, list[doctest.Example]]: + """Separate a docstring into text-without-examples and a list of examples. + + Uses :class:`doctest.DocTestParser` to identify ``>>>`` blocks, then + reconstructs the docstring with those blocks removed. + + Returns ``(stripped_text, examples)`` where *stripped_text* is the + docstring with all interactive examples removed. + """ + parser = doctest.DocTestParser() + parts = parser.parse(docstring) + text_parts = [p for p in parts if isinstance(p, str)] + examples = [p for p in parts if isinstance(p, doctest.Example)] + return "".join(text_parts), examples + + +# --------------------------------------------------------------------------- +# Handler +# --------------------------------------------------------------------------- + + +def _is_callable_return(template: Template) -> bool: + """Return ``True`` if *template* synthesises a ``Callable``.""" + ret = template.__signature__.return_annotation + origin = typing.get_origin(ret) + if origin is not None: + # e.g. Callable[[str], int] -> origin is collections.abc.Callable + return origin is collections.abc.Callable + if isinstance(ret, type): + return issubclass(ret, collections.abc.Callable) # type: ignore[arg-type] + return False + + +class DoctestHandler(ObjectInterpretation): + """Use ``>>>`` examples in template docstrings as semantic constraints. + + Install with ``handler(DoctestHandler())`` alongside a provider and an + eval provider. See the module docstring for the two cases handled. + """ + + # Per-template extraction cache (stripped template + examples). + _extraction_cache: dict[Template, tuple[str, list[doctest.Example]]] + + # Case 1: calibration conversation prefix, cached per template. + _prefix_cache: dict[Template, list[Message]] + + # Case 2: per-call formatted doctest source for test() validation. + _doctest_stack: list[str] + + # Case 1: prefix messages to inject before the next call_user. + _pending_prefix: list[Message] | None + + # Re-entrancy guard for calibration. + _calibrating: bool + + def __init__(self) -> None: + self._extraction_cache = {} + self._doctest_stack = [] + self._prefix_cache = {} + self._pending_prefix = None + self._calibrating = False + + # -- helpers ------------------------------------------------------------ + + def _get_doctests(self, template: Template) -> tuple[str, list[doctest.Example]]: + """Return cached ``(stripped_template, examples)`` for *template*.""" + try: + return self._extraction_cache[template] + except KeyError: + result = extract_doctests(template.__prompt_template__) + self._extraction_cache[template] = result + return result + + @implements(Template.__apply__) + def _handle_template[**P, T]( + self, + template: Template[P, T], + *args: P.args, + **kwargs: P.kwargs, + ) -> T: + _, examples = self._get_doctests(template) + + if not examples: + return fwd() + + if _is_callable_return(template): + # Case 2 – code generation: push formatted doctests for test(). + bound_args = inspect.signature(template).bind(*args, **kwargs) + bound_args.apply_defaults() + env = template.__context__.new_child(bound_args.arguments) + formatted = textwrap.dedent(template.__prompt_template__).format_map(env) + self._doctest_stack.append(formatted) + return fwd() + + # Case 1 – tool-calling: calibration + prefix. + if not self._calibrating and template not in self._prefix_cache: + self._calibrate(template, examples) + + if template in self._prefix_cache and self._prefix_cache[template]: + # Schedule prefix injection for _strip_prompt, which runs + # after call_system (so the system message is already first). + self._pending_prefix = self._prefix_cache[template] + try: + return fwd() + finally: + self._pending_prefix = None + + return fwd() + + # -- call_user (stateless stripping) ------------------------------------ + + @implements(call_user) + def _strip_prompt( + self, + template: str, + env: Mapping[str, Any], + ) -> Message: + """Strip ``>>>`` examples and inject any pending calibration prefix. + + This runs after ``call_system`` has already appended the system + message, so injecting prefix messages here keeps the correct order: + system → prefix user/assistant turns → actual user message. + """ + # Inject cached calibration prefix (Case 1) into the message + # sequence before the actual user message. + if self._pending_prefix is not None: + for msg in self._pending_prefix: + append_message(msg) + self._pending_prefix = None + + stripped, _ = extract_doctests(template) + return fwd(stripped, env) + + # -- test (Case 2 validation) ------------------------------------------- + + @implements(test) + def _run_from_stack(self, obj: object, ctx: typing.Mapping[str, Any]) -> None: + if not self._doctest_stack: + return + doctest_source = self._doctest_stack.pop() + if not doctest_source.strip(): + return + + globs = dict(ctx) + parser = doctest.DocTestParser() + test_case = parser.get_doctest( + doctest_source, + globs, + name=( + f"{getattr(obj, '__name__', obj.__class__.__name__)}" + ".__template_doctest__" + ), + filename=None, + lineno=0, + ) + if not test_case.examples: + return + + output: list[str] = [] + runner = doctest.DocTestRunner(verbose=False) + runner.run(test_case, out=output.append) + results = runner.summarize(verbose=False) + if results.failed: + report = "".join(output).strip() + if not report: + report = ( + f"{results.failed} doctest(s) failed " + f"out of {results.attempted} attempted." + ) + raise TypeError(f"doctest failed:\n{report}") + + # -- Case 1 calibration ------------------------------------------------- + def _calibrate( + self, + template: Template, + examples: list[doctest.Example], + ) -> None: + """Run a calibration loop for tool-calling templates. + + For each doctest example that calls *template*, the template is + invoked with the example's arguments (prompt stripped of doctests). + All conversation turns — including any incorrect attempts — are + accumulated into a prefix that is cached for future calls, so the + LLM can learn from the full experience. + """ + prefix_messages: list[Message] = [] + self._calibrating = True + + try: + for example in examples: + call_args, call_kwargs = _parse_template_call( + example, template.__name__ + ) + if call_args is None: + continue # not a call to this template + + # Run in an isolated message sequence. + cal_msgs: collections.OrderedDict[str, Message] = ( + collections.OrderedDict() + ) + with handler({get_message_sequence: lambda: cal_msgs}): + result = template(*call_args, **call_kwargs) + + # Check output; append corrective feedback if wrong. + checker = doctest.OutputChecker() + actual = repr(result) + "\n" + # example.options is dict[int, bool]; reduce to int flags. + optionflags = 0 + for flag, val in example.options.items(): + if val: + optionflags |= flag + if not checker.check_output(example.want, actual, optionflags): + append_message( + _make_message( + { + "role": "user", + "content": ( + f"That was incorrect. " + f"Expected {example.want.strip()!r} " + f"but got {repr(result)!r}." + ), + } + ) + ) + + # Keep user/assistant turns (skip system messages since + # call_system will re-add it during the actual call). + prefix_messages.extend( + m for m in cal_msgs.values() if m["role"] != "system" + ) + finally: + self._calibrating = False + + self._prefix_cache[template] = prefix_messages + + +def _parse_template_call( + example: doctest.Example, template_name: str +) -> tuple[list[Any] | None, dict[str, Any] | None]: + """Extract positional and keyword args from a doctest example. + + Returns ``(args, kwargs)`` if the example is a call to *template_name*, + or ``(None, None)`` otherwise. + """ + source = example.source.strip() + try: + tree = ast.parse(source, mode="eval") + except SyntaxError: + return None, None + + expr = tree.body + if not isinstance(expr, ast.Call): + return None, None + if not isinstance(expr.func, ast.Name): + return None, None + if expr.func.id != template_name: + return None, None + + try: + pos_args = [ast.literal_eval(a) for a in expr.args] + kw_args = { + kw.arg: ast.literal_eval(kw.value) + for kw in expr.keywords + if kw.arg is not None + } + except (ValueError, TypeError): + return None, None + + return pos_args, kw_args diff --git a/effectful/handlers/llm/evaluation.py b/effectful/handlers/llm/evaluation.py index 087ba92d..f3f378d7 100644 --- a/effectful/handlers/llm/evaluation.py +++ b/effectful/handlers/llm/evaluation.py @@ -2,14 +2,12 @@ import builtins import collections.abc import copy -import doctest import inspect import keyword import linecache import random import string import sys -import textwrap import types import typing from collections.abc import Mapping @@ -26,9 +24,7 @@ safe_globals, ) -from effectful.handlers.llm.template import Template from effectful.internals.unification import nested_type -from effectful.ops.semantics import fwd from effectful.ops.syntax import ObjectInterpretation, defop, implements from effectful.ops.types import Operation @@ -688,83 +684,6 @@ def mypy_type_check( return None -def _run_doctests(obj: object, ctx: typing.Mapping[str, Any]) -> None: - name = getattr(obj, "__name__", obj.__class__.__name__) - globs = dict(ctx) - finder = doctest.DocTestFinder(exclude_empty=True) - if isinstance(obj, types.ModuleType): - tests = finder.find(obj, name=name, globs=globs, module=False) - else: - tests = finder.find(obj, name=name, globs=globs) - if not tests: - return - - output: list[str] = [] - runner = doctest.DocTestRunner(verbose=False) - for test in tests: - runner.run(test, out=output.append) - results = runner.summarize(verbose=False) - if results.failed: - report = "".join(output).strip() - if not report: - report = ( - f"{results.failed} doctest(s) failed " - f"out of {results.attempted} attempted." - ) - raise TypeError(f"doctest failed:\n{report}") - - -class DoctestHandler(ObjectInterpretation): - """Collect doctests from templates and run them on synthesis results.""" - - _doctest_stack: list[str] - - def __init__(self): - self._doctest_stack = [] - - @implements(Template.__apply__) - def _capture_doctest[**P, T]( - self, template: Template[P, T], *args: P.args, **kwargs: P.kwargs - ) -> T: - bound_args = inspect.signature(template).bind(*args, **kwargs) - bound_args.apply_defaults() - env = template.__context__.new_child(bound_args.arguments) - doctest_source = textwrap.dedent(template.__prompt_template__).format_map(env) - self._doctest_stack.append(doctest_source) - return fwd() - - @implements(test) - def _run_from_stack(self, obj: object, ctx: typing.Mapping[str, Any]) -> None: - if not self._doctest_stack: - return - doctest_source = self._doctest_stack.pop() - if not doctest_source.strip(): - return - globs = dict(ctx) - parser = doctest.DocTestParser() - test_case = parser.get_doctest( - doctest_source, - globs, - name=f"{getattr(obj, '__name__', obj.__class__.__name__)}.__template_doctest__", - filename=None, - lineno=0, - ) - if not test_case.examples: - return - output: list[str] = [] - runner = doctest.DocTestRunner(verbose=False) - runner.run(test_case, out=output.append) - results = runner.summarize(verbose=False) - if results.failed: - report = "".join(output).strip() - if not report: - report = ( - f"{results.failed} doctest(s) failed " - f"out of {results.attempted} attempted." - ) - raise TypeError(f"doctest failed:\n{report}") - - # Eval Providers diff --git a/tests/test_handlers_llm_doctest.py b/tests/test_handlers_llm_doctest.py index cdaea55a..09d4b5ee 100644 --- a/tests/test_handlers_llm_doctest.py +++ b/tests/test_handlers_llm_doctest.py @@ -4,9 +4,14 @@ import pytest from effectful.handlers.llm import Template -from effectful.handlers.llm.completions import LiteLLMProvider, ResultDecodingError +from effectful.handlers.llm.completions import ( + LiteLLMProvider, + ResultDecodingError, + call_user, +) +from effectful.handlers.llm.doctest import DoctestHandler, extract_doctests from effectful.handlers.llm.encoding import Encodable, SynthesizedFunction -from effectful.handlers.llm.evaluation import DoctestHandler, UnsafeEvalProvider +from effectful.handlers.llm.evaluation import UnsafeEvalProvider from effectful.ops.semantics import NotHandled, handler HAS_OPENAI_KEY = "OPENAI_API_KEY" in os.environ and os.environ["OPENAI_API_KEY"] @@ -22,7 +27,6 @@ def synthesize_counter_with_doctest(char: str) -> Callable[[str], int]: The function should be case-sensitive. - Examples: >>> count_char("banana") 4 """ @@ -36,7 +40,6 @@ def synthesize_inner_with_doctest(char: str) -> Callable[[str], int]: The function should be case-sensitive. - Examples: >>> count_char("orange") 3 """ @@ -51,8 +54,83 @@ def synthesize_outer(char: str) -> Callable[[str], int]: raise NotHandled +# --------------------------------------------------------------------------- +# Unit tests: extract_doctests +# --------------------------------------------------------------------------- + + +class TestExtractDoctests: + """Tests for the extract_doctests utility.""" + + def test_strips_examples(self): + docstring = ( + "Compute something.\n\n >>> foo(1)\n 2\n >>> foo(3)\n 4\n" + ) + stripped, examples = extract_doctests(docstring) + assert ">>>" not in stripped + assert len(examples) == 2 + assert examples[0].source.strip() == "foo(1)" + assert examples[0].want == "2\n" + assert examples[1].source.strip() == "foo(3)" + assert examples[1].want == "4\n" + + def test_no_examples(self): + docstring = "Just a description.\nNo examples here.\n" + stripped, examples = extract_doctests(docstring) + assert stripped == docstring + assert examples == [] + + def test_preserves_non_example_text(self): + docstring = "Title.\n\nSome details.\n\n >>> f(1)\n 42\n\nMore text.\n" + stripped, examples = extract_doctests(docstring) + assert "Title." in stripped + assert "Some details." in stripped + assert "More text." in stripped + assert ">>>" not in stripped + assert len(examples) == 1 + + +# --------------------------------------------------------------------------- +# Unit tests: Case 2 – prompt stripping +# --------------------------------------------------------------------------- + + +class TestCase2PromptStripping: + """Verify that call_user receives a stripped template (no >>> examples).""" + + def test_call_user_receives_stripped_template(self): + """The DoctestHandler should strip >>> from the template before fwd.""" + captured_templates: list[str] = [] + + def spy_call_user(template, env): + captured_templates.append(template) + # Return a dummy message + return { + "role": "user", + "content": template, + "id": "test-id", + } + + doctest_handler = DoctestHandler() + # DoctestHandler must be inner (most recent) so _strip_prompt runs + # first, then fwd() reaches the spy. + with handler({call_user: spy_call_user}), handler(doctest_handler): + # Directly invoke call_user with a template containing >>> + template_str = "Generate function.\n\n >>> foo(1)\n 42\n" + call_user(template_str, {}) + + assert len(captured_templates) == 1 + assert ">>>" not in captured_templates[0] + assert "Generate function." in captured_templates[0] + + +# --------------------------------------------------------------------------- +# Unit tests: Case 2 – doctest execution (existing tests, updated) +# --------------------------------------------------------------------------- + + class TestDoctestExecution: - """Tests for doctest execution during callable synthesis.""" + """Tests for doctest execution during callable synthesis (Case 2).""" def test_decode_runs_doctest(self): encodable = Encodable.define(Callable[[str], int], {}) @@ -90,3 +168,54 @@ def test_nested_synthesis_doctest_runs(self): ): with pytest.raises(ResultDecodingError, match="doctest failed"): synthesize_outer("o") + + +# --------------------------------------------------------------------------- +# Unit tests: Case 1 – calibration +# --------------------------------------------------------------------------- + + +@Template.define +def summarize(text: str) -> str: + """Summarize the following text into a single short sentence: '{text}' + + >>> summarize("The quick brown fox jumps over the lazy dog.") + 'A fox jumps over a dog.' + """ + raise NotHandled + + +class TestCase1Calibration: + """Tests for Case 1 (tool-calling) calibration and prefix caching.""" + + def test_callable_detection(self): + """Templates returning Callable should be Case 2, others Case 1.""" + from effectful.handlers.llm.doctest import _is_callable_return + + assert _is_callable_return(synthesize_counter_with_doctest) is True + assert _is_callable_return(summarize) is False + + def test_extraction_cache_populated(self): + """_get_doctests should populate the extraction cache.""" + dh = DoctestHandler() + stripped, examples = dh._get_doctests(summarize) + assert ">>>" not in stripped + assert len(examples) == 1 + # Second call should return cached result + stripped2, examples2 = dh._get_doctests(summarize) + assert stripped2 is stripped + assert examples2 is examples + + @requires_openai + def test_case1_calibration_integration(self): + """End-to-end: calibration should cache a prefix for tool-calling.""" + provider = LiteLLMProvider(model="gpt-4o-mini") + dh = DoctestHandler() + with handler(provider), handler(dh): + # This should trigger calibration for the summarize template + result = summarize("The quick brown fox jumps over the lazy dog.") + + # After the call, summarize should have a cached prefix + assert summarize in dh._prefix_cache + assert isinstance(result, str) + assert len(result) > 0 From dd2e3d6fcfda793c268d22a529d5d7077bbb32ce Mon Sep 17 00:00:00 2001 From: datvo06 Date: Mon, 9 Feb 2026 17:17:24 -0500 Subject: [PATCH 13/18] Minor --- docs/source/llm.ipynb | 186 +++++++++++++++++++----------- effectful/handlers/llm/doctest.py | 14 +-- 2 files changed, 125 insertions(+), 75 deletions(-) diff --git a/docs/source/llm.ipynb b/docs/source/llm.ipynb index 36e88eca..8bf07c64 100644 --- a/docs/source/llm.ipynb +++ b/docs/source/llm.ipynb @@ -100,17 +100,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "In a pond where the waters are still,\n", - "Lived a fish with a notable skill,\n", - "He'd leap in the air,\n", - "With grace that was rare,\n", - "And land with a splash for a thrill!\n", + "There once was a fish quite spry,\n", + "Who wanted to soar through the sky.\n", + "He jumped with great flare,\n", + "Flew high in the air,\n", + "But found he was bound to fry.\n", "----------------------------------------\n", - "In the sea where the big fishes play,\n", - "A small fish swam every which way.\n", - "With eyes wide and bright,\n", - "It jumped with delight,\n", - "Dodging hooks while it danced in the bay.\n" + "There once was a fish who could sing,\n", + "He'd belt out tunes in the spring.\n", + "But once ice would form,\n", + "He'd switch to a norm—\n", + "Charming all with his fishy lip sync!\n" ] } ], @@ -140,13 +140,17 @@ "output_type": "stream", "text": [ "\n", - "In still waters, grace,\n", - "Silver scales in sunlit dance,\n", - "Fish swim, life unfolds.\n", + "\n", + "\n", + "Gleaming finned glides swift, \n", + "Under ripples, life abounds, \n", + "Silent waters hold.\n", "----------------------------------------\n", - "In still waters, grace,\n", - "Silver scales in sunlit dance,\n", - "Fish swim, life unfolds.\n", + "\n", + "\n", + "Gleaming finned glides swift, \n", + "Under ripples, life abounds, \n", + "Silent waters hold.\n", "\n" ] }, @@ -164,21 +168,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "Swimming in the deep, \n", - "Silent fins glide through the waves, \n", - "Secrets in the blue.\n", + "Silent waters swim,\n", + "Silver scales shimmer beneath—\n", + "Nature's quiet dance.\n", "----------------------------------------\n", - "Silent waters gleam, \n", - "Fish weave tales beneath the waves, \n", - "Nature's fluid dance.\n", + "Gliding through cool streams, \n", + "Silver scales in sunlight glow, \n", + "Silent, swift they dart.\n", "\n", - "Below ripples glide,\n", - "Silver scales in liquid dance,\n", - "Nature's grace in flow.\n", + "Beneath waves they glide, \n", + "Scales flashing in sunlit beams, \n", + "Silent wanderers.\n", "----------------------------------------\n", - "Silent waters flow, \n", - "Where vibrant scales shimmer bright, \n", - "Fish dance in moon's glow.\n" + "Gliding scales shimmer,\n", + "Beneath rippling waters,\n", + "Silent whispers swim.\n" ] } ], @@ -325,8 +329,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Without DoctestHandler: Python is recognized for its readability as a high-level programming language.\n", - "With DoctestHandler: Python is a readable, high-level programming language.\n", + "Without DoctestHandler: Python is a user-friendly, high-level programming language renowned for its readability.\n", + "With DoctestHandler: Python is a high-level programming language valued for its readability.\n", "\n", "Calibration prefix cached: True\n", "Prefix messages: 4\n" @@ -456,7 +460,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Among the cities listed, Barcelona currently has good weather, being described as \"sunny.\"\n" + "Based on the current weather conditions, Barcelona has sunny weather and would be a great choice for a city with good weather.\n" ] } ], @@ -516,7 +520,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "A simple yellow smiley face with black eyes and a smile.\n" + "A simple digital smiley face with a yellow background and a basic black smile and eyes.\n" ] } ], @@ -572,7 +576,7 @@ "Who's there?\n", "Lizard.\n", "Lizard who?\n", - "Lizard who? Lizard you can't hear, but I'm still knocking!\n", + "Lizard who? Lizard you always telling people not to take life's scaly bits too seriously!\n", "> The crowd laughs politely.\n" ] } @@ -636,14 +640,10 @@ "=== Story with moral ===\n", "\n", "\n", - "Whiskers learned this valuable lesson and became a wiser cat, embarking on adventures that were both thrilling and safe, living happily amidst his curious wonders.\n", + "And so, Whiskers learned to balance her adventurous spirit with a touch of caution, prompting others to respect her wise curiosity as she continued to discover the world's wonders.\n", "\n", "=== Funny story ===\n", - "\n", - "\n", - "---\n", - "\n", - "Pickles' story teaches us that sometimes, embracing our curiosity can lead to the unexpected, and perhaps hilariously so! But, in embracing what makes us different, we might just bring joy to those around us, too.\n" + " 🐱😂\n" ] } ], @@ -714,9 +714,7 @@ "output_type": "stream", "text": [ "Error: Service unavailable! Attempt 1/3. Please retry.\n", - "Result: The data fetched successfully is: `[1, 2, 3]`. \n", - "\n", - "If you need further assistance with this data or have any other queries, feel free to ask! Retries: 3\n" + "Result: The data retrieved from the unstable service is: `[1, 2, 3]`. The service eventually responded successfully on the second attempt. Retries: 3\n" ] } ], @@ -777,7 +775,7 @@ "value.score\n", " score must be 1–5, got 9 [type=invalid_score, input_value=9, input_type=int]. Please provide a valid response and try again.\n", "Score: 5/5\n", - "Explanation: Die Hard is a classic action film that's widely regarded as one of the best in its genre. It features intense action sequences, memorable one-liners, and a charismatic performance by Bruce Willis. The film's clever plot and well-executed direction keep viewers on the edge of their seats. Given its enduring popularity and influence on the action genre, I would rate it a 5 out of 5.\n" + "Explanation: Die Hard is widely acclaimed as a quintessential action film, characterized by its intense narrative, well-crafted characters, and unforgettable lines. Bruce Willis's portrayal of John McClane remains iconic, solidifying the film's status in cinematic history. The film's masterful combination of action, humor, and suspense earns it a top score of 5 out of 5.\n" ] } ], @@ -840,7 +838,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "9d02bc67", "metadata": {}, "outputs": [ @@ -850,32 +848,84 @@ "text": [ "Sub-templates available to write_story: dict_keys(['limerick', 'haiku_no_cache', 'primes', 'count_char', 'summarize_doctest', 'count_char_with_bad_doctest', 'cities', 'weather', 'vacation', 'describe_image', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny', 'write_story', 'unstable_service', 'fetch_data', 'give_rating_for_movie', 'write_chapter', 'judge_chapter'])\n", "=== Story with moral ===\n", - "def write_moral_story(topic: str) -> str:\n", - " story_parts = []\n", + "def write_story_moral(topic: str) -> str:\n", + " story = \"\"\n", " chapter_number = 1\n", - " chapter_names = [\"Beginning\", \"Middle\", \"Climax\", \"Conclusion\"]\n", - " \n", - " for chapter_name in chapter_names:\n", - " current_chapter = write_chapter(chapter_number, chapter_name)\n", - " story_parts.append(current_chapter)\n", - " if not judge_chapter(' '.join(story_parts), chapter_number):\n", - " raise ValueError(f\"Chapter {chapter_number} is not coherent with the story so far.\")\n", + "\n", + " # Write up to 10 chapters or until the story is coherent\n", + " while chapter_number <= 10:\n", + " chapter_name = f\"Chapter {chapter_number} on {topic}\"\n", + " chapter = write_chapter(chapter_number, chapter_name)\n", + " story += chapter + \" \"\n", + "\n", + " if judge_chapter(story, chapter_number):\n", + " break\n", + "\n", " chapter_number += 1\n", - " \n", - " full_story = ' '.join(story_parts)\n", - " return full_story\n" - ] - }, - { - "ename": "ValueError", - "evalue": "Chapter 2 is not coherent with the story so far.", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mValueError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[17]\u001b[39m\u001b[32m, line 36\u001b[39m\n\u001b[32m 34\u001b[39m function_that_writes_story = write_multi_chapter_story(\u001b[33m\"\u001b[39m\u001b[33mmoral\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 35\u001b[39m \u001b[38;5;28mprint\u001b[39m(inspect.getsource(function_that_writes_story))\n\u001b[32m---> \u001b[39m\u001b[32m36\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[43mfunction_that_writes_story\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43ma curious cat\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m)\n\u001b[32m 37\u001b[39m \u001b[38;5;28mprint\u001b[39m()\n", - "\u001b[36mFile \u001b[39m\u001b[32m:12\u001b[39m, in \u001b[36mwrite_moral_story\u001b[39m\u001b[34m(topic)\u001b[39m\n\u001b[32m 10\u001b[39m story_parts.append(current_chapter)\n\u001b[32m 11\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m judge_chapter(\u001b[33m'\u001b[39m\u001b[33m \u001b[39m\u001b[33m'\u001b[39m.join(story_parts), chapter_number):\n\u001b[32m---> \u001b[39m\u001b[32m12\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mChapter \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mchapter_number\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m is not coherent with the story so far.\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 13\u001b[39m chapter_number += \u001b[32m1\u001b[39m\n\u001b[32m 15\u001b[39m full_story = \u001b[33m'\u001b[39m\u001b[33m \u001b[39m\u001b[33m'\u001b[39m.join(story_parts)\n", - "\u001b[31mValueError\u001b[39m: Chapter 2 is not coherent with the story so far." + "\n", + " return story.strip()\n", + "Once upon a time in the bustling city of Numeria, there stood a simple yet proud apartment building known as One Tower. Among its residents was a particularly special apartment: Apartment Number 1.\n", + "\n", + "Apartment 1 had seen many a resident pass through its doors, each leaving an indelible mark on its walls. The stories of joy, laughter, and sometimes, tears, filled the rooms with a palpable warmth. But none were as memorable as the story of little Benji, a bright-eyed boy with an insatiable curiosity.\n", + "\n", + "Benji moved in with his father, Mr. Ferguson, after the unfortunate passing of his mother. Despite the grief that shadowed their lives, Benji's curiosity never ceased. Each day, after school, he would sit in front of a large, dusty window and peer down at the vibrant street below.\n", + "\n", + "One day, as autumn leaves danced across the cityscape, Benji noticed a group of pigeons gathering around a mysterious figure in the park. This figure was dressed in an old, oversized coat, with a wide-brimmed hat casting a shadow over his face.\n", + "\n", + "Determined to uncover the secret of the \"Birdman\", as Benji called him, he began sneaking out every afternoon to observe him. Benji soon discovered that Birdman came to the park every day at exactly two post-meridian, feeding the pigeons and sometimes playing a gentle tune on a harmonica.\n", + "\n", + "Curiosity got the better of Benji, and one crisp afternoon, he mustered the courage to approach the Birdman. \"Excuse me, sir,\" Benji piped up, \"why do you come here every day and play for the pigeons?\"\n", + "\n", + "The Birdman paused, then turned to Benji with a smile. \"I was like you once, fascinated by the simplest things. You see, these pigeons remind me of the joy found in little moments. And the music? It's a way to share that joy, even if it's just with these feathered friends.\"\n", + "\n", + "Benji nodded, understanding a deeper lesson nestled in those words. The Birdman gifted him the harmonica, a melodic reminder that joy could be found and shared anywhere.\n", + "\n", + "Benji returned home, his heart light and content, his adventures now filled with music. Apartment 1 at One Tower had become not just a place of residence but a cocoon of discovery and hope. Here, in this little corner of Numeria, Benji learned that no matter how broken the world might seem, a simple melody could bring light into the darkest of places.\n", + "\n", + "And so, Apartment Number 1 continued to absorb the tales of its inhabitants, its walls a testament to the resilience of the human spirit, one story at a time. Once upon a time, in the whimsical land of Numerville, there lived a cheerful little number named Two. Unlike other numbers who busied themselves with addition and multiplication all day long, Two had a different perspective. He loved to explore new possibilities in the world of numbers.\n", + "\n", + "One sunny morning, as Two hopped along the number line, he stumbled upon a mysterious equation carved into the side of a hill. Curious, he tilted his head to read it. It stated, \"The secret power of Two lies in its ability to create pairs.\"\n", + "\n", + "Intrigued by this revelation, Two decided to test the theory. He went about pairing with anything he encountered, creating harmonious combinations everywhere he went. First, he joined forces with Three, forming the stable duo of \"Twosday-Tuesday\" which became a day off for numbers to unwind and pair up with their loves. Next, he met Seven, combining to create a portal known as \"Two-Seven-Heaven,\" a lovely meadow where numbers could relax and enjoy games of addition.\n", + "\n", + "As Two continued to explore and pair with various numbers, he discovered that his special ability brought balance and joy wherever he roamed. News of Two’s unifying pairs spread across Numerville, and soon even numbers on the farthest corners of the line eagerly sought to pair with him.\n", + "\n", + "One day, the Grand Mathematician of Numerville summoned Two to commend his efforts in spreading harmony and showcasing the power contained in unity. \"Two,\" boomed the Grand Mathematician, \"your journey teaches us that every number has its role, and when we unite our differences, we bring extraordinary solutions to the equation of life.\"\n", + "\n", + "With a humble smile, Two nodded, understanding that his simple act of pairing had created waves of change in Numerville. And from that day on, in honour of Two, every pairing was celebrated, strengthening the bonds between the numbers of the land.\n", + "\n", + "And so, Two lived happily, forever exploring new pairings and enriching the numerical fabric of Numerville with the magic of unity. His tale became a timeless reminder that even the smallest number can create the greatest impact when paired with purpose and kindness. **Title: The Tale of Three**\n", + "\n", + "Once upon a time in the bustling town of Numerland, where digits mingled freely, there lived a young number named Three. Unlike his neighbors, who boasted tens and twenties, Three was a modest number, yet he wore his simplicity with pride.\n", + "\n", + "Three spent most of his days helping Multiplication and Division. Despite his humble size, he was a favorite for his ability to transform into six or nine effortlessly. Whether playing tag with Addition or solving problems with Subtraction, Three was always full of energy.\n", + "\n", + "One day, a grand contest was announced by Count, the wise old mathematician. It was a test of capability, where numbers were tasked with forming equations that resulted in the grand number hundred. Eager to participate, Three paired up with his closest friends, Thirty and Sixty-Seven.\n", + "\n", + "The trio worked tirelessly, brainstorming day and night. Their journey to create the perfect equation was not easy. Many combinations failed them, when impatience led to miscalculations, or exhaustion dulled their thinking. But they pressed on, each learning from their missteps.\n", + "\n", + "On the day of the competition, the trio stepped up with confidence. They showcased their equation: Thirty plus Sixty-Seven plus Three. Their daring combination was simple yet precise, achieving exactly one hundred. The other numbers watched in awe as the practitioner judges nodded in approval.\n", + "\n", + "Three's crew won that day, proving that even the smallest numbers could achieve greatness with teamwork and determination. They were celebrated throughout Numerland as heroes, teaching everyone the valuable lesson that no number is too small to make a significant impact.\n", + "\n", + "From that day forward, Three was never underestimated again. Instead, he became an inspiration, always reminding others that with big dreams and the right comrades, anything is possible. Once upon a time, in the small village of Numerica, lived a unique character known as Four. Four wasn't just any number; he was proud to be an important figure in mathematics, holding a curious position between three and five.\n", + "\n", + "One day, the numbers in the village decided to organize a grand festival to celebrate the importance of numbers in everyday life. Each number was tasked with showcasing their unique attributes and contributions to the world.\n", + "\n", + "As the festival began, One proudly displayed the concept of singularity and leadership. Two showcased partnerships and teamwork. Three brought to life the idea of balance and stability, often seen in tripods and tricycles. Meanwhile, Five displayed living in harmony with nature, showing off the perfect number of fingers on a hand.\n", + "\n", + "When it was Four’s turn, he decided to illustrate the beauty of symmetry and balance. He reminded everyone of the four seasons that bring change and renewal, the four cardinal directions guiding travelers on their journeys, and the four limbs that enable many creatures to move with grace.\n", + "\n", + "The others nodded in agreement, but one doubt lingered: Four had a challenge to distinguish himself further. So, he told a story of his essential role in harmony and structure — such as providing the foundation for squares and rectangles, vital in architecture and art.\n", + "\n", + "As Four continued, the village listened intently and the other numbers began clapping. Four not only demonstrated his versatility but also showed that even in the world of numbers, unity and function are most vital when each part respects the whole.\n", + "\n", + "As the festival concluded, the numbers realized how entangled their functions were. Each number played an integral role in building the tapestry of reality. Four felt proud knowing he had highlighted the significance of his position and earned the respect of all his numerical peers.\n", + "\n", + "The celebration deepened friendships amongst all numbers and ended with a beautiful display of fireworks forming a giant Four in the sky, reminding everyone of his place in the mathematical universe.\n", + "\n", + "Thus, in Numerica, the festival ended with a valuable lesson: every number, no matter how small or large, holds importance and together, they create harmony in the world. And so, Four felt fulfilled, contributing to the unity of the lively village of Numerica.\n", + "\n" ] } ], diff --git a/effectful/handlers/llm/doctest.py b/effectful/handlers/llm/doctest.py index e3f86332..b6d4a203 100644 --- a/effectful/handlers/llm/doctest.py +++ b/effectful/handlers/llm/doctest.py @@ -95,15 +95,15 @@ class DoctestHandler(ObjectInterpretation): # Case 1: prefix messages to inject before the next call_user. _pending_prefix: list[Message] | None - # Re-entrancy guard for calibration. - _calibrating: bool + # Re-entrancy guard: set of templates currently being calibrated. + _calibrating: set[Template] def __init__(self) -> None: self._extraction_cache = {} self._doctest_stack = [] self._prefix_cache = {} self._pending_prefix = None - self._calibrating = False + self._calibrating = set() # -- helpers ------------------------------------------------------------ @@ -138,7 +138,7 @@ def _handle_template[**P, T]( return fwd() # Case 1 – tool-calling: calibration + prefix. - if not self._calibrating and template not in self._prefix_cache: + if template not in self._calibrating and template not in self._prefix_cache: self._calibrate(template, examples) if template in self._prefix_cache and self._prefix_cache[template]: @@ -229,14 +229,14 @@ def _calibrate( LLM can learn from the full experience. """ prefix_messages: list[Message] = [] - self._calibrating = True + self._calibrating.add(template) try: for example in examples: call_args, call_kwargs = _parse_template_call( example, template.__name__ ) - if call_args is None: + if call_args is None or call_kwargs is None: continue # not a call to this template # Run in an isolated message sequence. @@ -274,7 +274,7 @@ def _calibrate( m for m in cal_msgs.values() if m["role"] != "system" ) finally: - self._calibrating = False + self._calibrating.discard(template) self._prefix_cache[template] = prefix_messages From 5095567adc9f50a08048fa8c475be0fb80de9899 Mon Sep 17 00:00:00 2001 From: datvo06 Date: Thu, 26 Feb 2026 11:33:12 -0500 Subject: [PATCH 14/18] Update doctest handler --- effectful/handlers/llm/doctest.py | 127 +++++++++++++++-------------- tests/test_handlers_llm_doctest.py | 25 +++++- 2 files changed, 91 insertions(+), 61 deletions(-) diff --git a/effectful/handlers/llm/doctest.py b/effectful/handlers/llm/doctest.py index b6d4a203..9743b642 100644 --- a/effectful/handlers/llm/doctest.py +++ b/effectful/handlers/llm/doctest.py @@ -20,8 +20,6 @@ import collections import collections.abc import doctest -import inspect -import textwrap import typing from collections.abc import Mapping from typing import Any @@ -31,13 +29,14 @@ _make_message, append_message, call_user, - get_message_sequence, ) from effectful.handlers.llm.evaluation import test from effectful.handlers.llm.template import Template -from effectful.ops.semantics import fwd, handler +from effectful.ops.semantics import fwd from effectful.ops.syntax import ObjectInterpretation, implements +_SENTINEL = object() + # --------------------------------------------------------------------------- # Utility # --------------------------------------------------------------------------- @@ -89,8 +88,8 @@ class DoctestHandler(ObjectInterpretation): # Case 1: calibration conversation prefix, cached per template. _prefix_cache: dict[Template, list[Message]] - # Case 2: per-call formatted doctest source for test() validation. - _doctest_stack: list[str] + # Case 2: per-call cached doctest examples for test() validation. + _doctest_stack: list[list[doctest.Example]] # Case 1: prefix messages to inject before the next call_user. _pending_prefix: list[Message] | None @@ -129,12 +128,8 @@ def _handle_template[**P, T]( return fwd() if _is_callable_return(template): - # Case 2 – code generation: push formatted doctests for test(). - bound_args = inspect.signature(template).bind(*args, **kwargs) - bound_args.apply_defaults() - env = template.__context__.new_child(bound_args.arguments) - formatted = textwrap.dedent(template.__prompt_template__).format_map(env) - self._doctest_stack.append(formatted) + # Case 2 – code generation: push cached examples for test(). + self._doctest_stack.append(examples) return fwd() # Case 1 – tool-calling: calibration + prefix. @@ -182,24 +177,22 @@ def _strip_prompt( def _run_from_stack(self, obj: object, ctx: typing.Mapping[str, Any]) -> None: if not self._doctest_stack: return - doctest_source = self._doctest_stack.pop() - if not doctest_source.strip(): + examples = self._doctest_stack.pop() + if not examples: return - globs = dict(ctx) - parser = doctest.DocTestParser() - test_case = parser.get_doctest( - doctest_source, - globs, - name=( - f"{getattr(obj, '__name__', obj.__class__.__name__)}" - ".__template_doctest__" - ), + name = ( + f"{getattr(obj, '__name__', obj.__class__.__name__)}" + ".__template_doctest__" + ) + test_case = doctest.DocTest( + examples=examples, + globs=dict(ctx), + name=name, filename=None, lineno=0, + docstring=None, ) - if not test_case.examples: - return output: list[str] = [] runner = doctest.DocTestRunner(verbose=False) @@ -220,17 +213,29 @@ def _calibrate( template: Template, examples: list[doctest.Example], ) -> None: - """Run a calibration loop for tool-calling templates. - - For each doctest example that calls *template*, the template is - invoked with the example's arguments (prompt stripped of doctests). - All conversation turns — including any incorrect attempts — are - accumulated into a prefix that is cached for future calls, so the - LLM can learn from the full experience. + """Run calibration as a mini ReAct agent with Agent-style history. + + Reuses the same persistent-history mechanism as :class:`Agent`: a + shared :class:`~collections.OrderedDict` bound to + ``template.__history__`` that accumulates messages across calls. + Each doctest example is evaluated in order; the LLM sees all prior + conversation turns (including any corrective feedback for incorrect + answers) when processing subsequent examples, enabling it to learn + from the full experience. """ - prefix_messages: list[Message] = [] self._calibrating.add(template) + # Agent-style history: a single OrderedDict shared across all + # calibration examples, exactly like Agent.__history__. + shared_history: collections.OrderedDict[str, Message] = ( + collections.OrderedDict() + ) + + # Temporarily bind the shared history to the template, using the + # same mechanism Agent.__get__ uses. + old_history = getattr(template, "__history__", _SENTINEL) + template.__history__ = shared_history # type: ignore[attr-defined] + try: for example in examples: call_args, call_kwargs = _parse_template_call( @@ -239,44 +244,46 @@ def _calibrate( if call_args is None or call_kwargs is None: continue # not a call to this template - # Run in an isolated message sequence. - cal_msgs: collections.OrderedDict[str, Message] = ( - collections.OrderedDict() - ) - with handler({get_message_sequence: lambda: cal_msgs}): - result = template(*call_args, **call_kwargs) + # Call the template; the provider reads template.__history__ + # and writes back after completion, so messages naturally + # accumulate in shared_history. + result = template(*call_args, **call_kwargs) - # Check output; append corrective feedback if wrong. + # Check output; append corrective feedback if wrong so + # subsequent examples benefit from the correction. checker = doctest.OutputChecker() actual = repr(result) + "\n" - # example.options is dict[int, bool]; reduce to int flags. optionflags = 0 for flag, val in example.options.items(): if val: optionflags |= flag if not checker.check_output(example.want, actual, optionflags): - append_message( - _make_message( - { - "role": "user", - "content": ( - f"That was incorrect. " - f"Expected {example.want.strip()!r} " - f"but got {repr(result)!r}." - ), - } - ) + feedback = _make_message( + { + "role": "user", + "content": ( + f"That was incorrect. " + f"Expected {example.want.strip()!r} " + f"but got {repr(result)!r}." + ), + } ) - - # Keep user/assistant turns (skip system messages since - # call_system will re-add it during the actual call). - prefix_messages.extend( - m for m in cal_msgs.values() if m["role"] != "system" - ) + shared_history[feedback["id"]] = feedback finally: self._calibrating.discard(template) - - self._prefix_cache[template] = prefix_messages + # Restore original state. + if old_history is _SENTINEL: + try: + del template.__history__ # type: ignore[attr-defined] + except AttributeError: + pass + else: + template.__history__ = old_history # type: ignore[attr-defined] + + # Cache non-system messages as the prefix for future calls. + self._prefix_cache[template] = [ + m for m in shared_history.values() if m["role"] != "system" + ] def _parse_template_call( diff --git a/tests/test_handlers_llm_doctest.py b/tests/test_handlers_llm_doctest.py index 09d4b5ee..4ec655e4 100644 --- a/tests/test_handlers_llm_doctest.py +++ b/tests/test_handlers_llm_doctest.py @@ -1,3 +1,4 @@ +import doctest as _doctest import os from collections.abc import Callable @@ -139,7 +140,10 @@ def test_decode_runs_doctest(self): " return input_string.count('a')" ) doctest_handler = DoctestHandler() - doctest_handler._doctest_stack.append(">>> count_char('banana')\n4\n") + # Push cached Example objects (matching the new _doctest_stack type). + doctest_handler._doctest_stack.append( + [_doctest.Example("count_char('banana')\n", "4\n")] + ) with ( handler(UnsafeEvalProvider()), handler(doctest_handler), @@ -206,6 +210,22 @@ def test_extraction_cache_populated(self): assert stripped2 is stripped assert examples2 is examples + def test_calibration_uses_shared_history(self): + """Calibration should accumulate messages in a shared history + (Agent-style OrderedDict) rather than isolated per-example sequences.""" + from effectful.handlers.llm.doctest import _SENTINEL + + dh = DoctestHandler() + + # Verify template starts without __history__ + assert not hasattr(summarize, "__history__") + + # After calibration, __history__ should be cleaned up + # (We can't run full calibration without a provider, but we can + # verify the sentinel-based save/restore mechanism.) + old = getattr(summarize, "__history__", _SENTINEL) + assert old is _SENTINEL + @requires_openai def test_case1_calibration_integration(self): """End-to-end: calibration should cache a prefix for tool-calling.""" @@ -219,3 +239,6 @@ def test_case1_calibration_integration(self): assert summarize in dh._prefix_cache assert isinstance(result, str) assert len(result) > 0 + + # Calibration should clean up: no lingering __history__ on template + assert not hasattr(summarize, "__history__") From c7e997dbb5a3238e219efb61d8503e770808ce9f Mon Sep 17 00:00:00 2001 From: datvo06 Date: Thu, 26 Feb 2026 11:44:13 -0500 Subject: [PATCH 15/18] More cleanup --- effectful/handlers/llm/doctest.py | 293 ++++++++++++++--------------- tests/test_handlers_llm_doctest.py | 42 +++-- 2 files changed, 163 insertions(+), 172 deletions(-) diff --git a/effectful/handlers/llm/doctest.py b/effectful/handlers/llm/doctest.py index 9743b642..6daf5d7b 100644 --- a/effectful/handlers/llm/doctest.py +++ b/effectful/handlers/llm/doctest.py @@ -6,8 +6,8 @@ **Case 1 (tool-calling)**: When the template returns a non-Callable type, a calibration loop runs the doctest inputs through the LLM once per template definition and caches the entire conversation (including any incorrect -attempts) as a few-shot prefix for future calls, emulating a learning -process. +attempts) as a few-shot prefix for future calls, emulating a mini ReAct +agent that learns from its mistakes. **Case 2 (code generation)**: When the template returns a ``Callable`` type, the generated code is required to pass the doctests as post-hoc validation. @@ -18,7 +18,7 @@ import ast import collections -import collections.abc +import contextlib import doctest import typing from collections.abc import Mapping @@ -27,9 +27,9 @@ from effectful.handlers.llm.completions import ( Message, _make_message, - append_message, call_user, ) +from effectful.handlers.llm.encoding import CallableEncodable, Encodable from effectful.handlers.llm.evaluation import test from effectful.handlers.llm.template import Template from effectful.ops.semantics import fwd @@ -37,43 +37,6 @@ _SENTINEL = object() -# --------------------------------------------------------------------------- -# Utility -# --------------------------------------------------------------------------- - - -def extract_doctests(docstring: str) -> tuple[str, list[doctest.Example]]: - """Separate a docstring into text-without-examples and a list of examples. - - Uses :class:`doctest.DocTestParser` to identify ``>>>`` blocks, then - reconstructs the docstring with those blocks removed. - - Returns ``(stripped_text, examples)`` where *stripped_text* is the - docstring with all interactive examples removed. - """ - parser = doctest.DocTestParser() - parts = parser.parse(docstring) - text_parts = [p for p in parts if isinstance(p, str)] - examples = [p for p in parts if isinstance(p, doctest.Example)] - return "".join(text_parts), examples - - -# --------------------------------------------------------------------------- -# Handler -# --------------------------------------------------------------------------- - - -def _is_callable_return(template: Template) -> bool: - """Return ``True`` if *template* synthesises a ``Callable``.""" - ret = template.__signature__.return_annotation - origin = typing.get_origin(ret) - if origin is not None: - # e.g. Callable[[str], int] -> origin is collections.abc.Callable - return origin is collections.abc.Callable - if isinstance(ret, type): - return issubclass(ret, collections.abc.Callable) # type: ignore[arg-type] - return False - class DoctestHandler(ObjectInterpretation): """Use ``>>>`` examples in template docstrings as semantic constraints. @@ -91,9 +54,6 @@ class DoctestHandler(ObjectInterpretation): # Case 2: per-call cached doctest examples for test() validation. _doctest_stack: list[list[doctest.Example]] - # Case 1: prefix messages to inject before the next call_user. - _pending_prefix: list[Message] | None - # Re-entrancy guard: set of templates currently being calibrated. _calibrating: set[Template] @@ -101,33 +61,112 @@ def __init__(self) -> None: self._extraction_cache = {} self._doctest_stack = [] self._prefix_cache = {} - self._pending_prefix = None self._calibrating = set() # -- helpers ------------------------------------------------------------ + @classmethod + def extract_doctests(cls, docstring: str) -> tuple[str, list[doctest.Example]]: + """Separate a docstring into text-without-examples and a list of examples. + + Uses :class:`doctest.DocTestParser` to identify ``>>>`` blocks, then + reconstructs the docstring with those blocks removed. + + Returns ``(stripped_text, examples)`` where *stripped_text* is the + docstring with all interactive examples removed. + """ + parser = doctest.DocTestParser() + parts = parser.parse(docstring) + text_parts = [p for p in parts if isinstance(p, str)] + examples = [p for p in parts if isinstance(p, doctest.Example)] + return "".join(text_parts), examples + + @staticmethod + def _parse_template_call( + example: doctest.Example, template_name: str + ) -> tuple[list[Any] | None, dict[str, Any] | None]: + """Extract positional and keyword args from a doctest example. + + Returns ``(args, kwargs)`` if the example is a call to + *template_name*, or ``(None, None)`` otherwise. + """ + source = example.source.strip() + try: + tree = ast.parse(source, mode="eval") + except SyntaxError: + return None, None + + expr = tree.body + if not isinstance(expr, ast.Call): + return None, None + if not isinstance(expr.func, ast.Name): + return None, None + if expr.func.id != template_name: + return None, None + + try: + pos_args = [ast.literal_eval(a) for a in expr.args] + kw_args = { + kw.arg: ast.literal_eval(kw.value) + for kw in expr.keywords + if kw.arg is not None + } + except (ValueError, TypeError): + return None, None + + return pos_args, kw_args + def _get_doctests(self, template: Template) -> tuple[str, list[doctest.Example]]: """Return cached ``(stripped_template, examples)`` for *template*.""" try: return self._extraction_cache[template] except KeyError: - result = extract_doctests(template.__prompt_template__) + result = self.extract_doctests(template.__prompt_template__) self._extraction_cache[template] = result return result + @contextlib.contextmanager + def _bind_history( + self, + template: Template, + history: collections.OrderedDict[str, Message], + ): + """Temporarily bind *history* to ``template.__history__``. + + Uses the same attribute that :class:`Agent` binds via ``__get__``. + The provider reads and writes back to it, so messages accumulate. + """ + old = getattr(template, "__history__", _SENTINEL) + template.__history__ = history # type: ignore[attr-defined] + try: + yield + finally: + if old is _SENTINEL: + try: + del template.__history__ # type: ignore[attr-defined] + except AttributeError: + pass + else: + template.__history__ = old # type: ignore[attr-defined] + + # -- Template.__apply__ ------------------------------------------------- + @implements(Template.__apply__) def _handle_template[**P, T]( self, template: Template[P, T], - *args: P.args, - **kwargs: P.kwargs, + *_args: P.args, + **_kwargs: P.kwargs, ) -> T: _, examples = self._get_doctests(template) if not examples: return fwd() - if _is_callable_return(template): + if isinstance( + Encodable.define(template.__signature__.return_annotation), + CallableEncodable, + ): # Case 2 – code generation: push cached examples for test(). self._doctest_stack.append(examples) return fwd() @@ -136,18 +175,21 @@ def _handle_template[**P, T]( if template not in self._calibrating and template not in self._prefix_cache: self._calibrate(template, examples) - if template in self._prefix_cache and self._prefix_cache[template]: - # Schedule prefix injection for _strip_prompt, which runs - # after call_system (so the system message is already first). - self._pending_prefix = self._prefix_cache[template] - try: + prefix = self._prefix_cache.get(template, []) + if prefix: + # Pre-populate history with the cached calibration prefix + # (Agent-style); the provider will copy it and prepend the + # system message, so the LLM sees: + # system → prefix user/assistant turns → actual user message. + prefix_history: collections.OrderedDict[str, Message] = ( + collections.OrderedDict((m["id"], m) for m in prefix) + ) + with self._bind_history(template, prefix_history): return fwd() - finally: - self._pending_prefix = None return fwd() - # -- call_user (stateless stripping) ------------------------------------ + # -- call_user ---------------------------------------------------------- @implements(call_user) def _strip_prompt( @@ -155,20 +197,8 @@ def _strip_prompt( template: str, env: Mapping[str, Any], ) -> Message: - """Strip ``>>>`` examples and inject any pending calibration prefix. - - This runs after ``call_system`` has already appended the system - message, so injecting prefix messages here keeps the correct order: - system → prefix user/assistant turns → actual user message. - """ - # Inject cached calibration prefix (Case 1) into the message - # sequence before the actual user message. - if self._pending_prefix is not None: - for msg in self._pending_prefix: - append_message(msg) - self._pending_prefix = None - - stripped, _ = extract_doctests(template) + """Strip ``>>>`` examples from the prompt before the LLM sees it.""" + stripped, _ = self.extract_doctests(template) return fwd(stripped, env) # -- test (Case 2 validation) ------------------------------------------- @@ -208,6 +238,7 @@ def _run_from_stack(self, obj: object, ctx: typing.Mapping[str, Any]) -> None: raise TypeError(f"doctest failed:\n{report}") # -- Case 1 calibration ------------------------------------------------- + def _calibrate( self, template: Template, @@ -225,97 +256,51 @@ def _calibrate( """ self._calibrating.add(template) - # Agent-style history: a single OrderedDict shared across all - # calibration examples, exactly like Agent.__history__. shared_history: collections.OrderedDict[str, Message] = ( collections.OrderedDict() ) - # Temporarily bind the shared history to the template, using the - # same mechanism Agent.__get__ uses. - old_history = getattr(template, "__history__", _SENTINEL) - template.__history__ = shared_history # type: ignore[attr-defined] - - try: - for example in examples: - call_args, call_kwargs = _parse_template_call( - example, template.__name__ - ) - if call_args is None or call_kwargs is None: - continue # not a call to this template - - # Call the template; the provider reads template.__history__ - # and writes back after completion, so messages naturally - # accumulate in shared_history. - result = template(*call_args, **call_kwargs) - - # Check output; append corrective feedback if wrong so - # subsequent examples benefit from the correction. - checker = doctest.OutputChecker() - actual = repr(result) + "\n" - optionflags = 0 - for flag, val in example.options.items(): - if val: - optionflags |= flag - if not checker.check_output(example.want, actual, optionflags): - feedback = _make_message( - { - "role": "user", - "content": ( - f"That was incorrect. " - f"Expected {example.want.strip()!r} " - f"but got {repr(result)!r}." - ), - } + with self._bind_history(template, shared_history): + try: + for example in examples: + self._run_calibration_example( + template, example, shared_history ) - shared_history[feedback["id"]] = feedback - finally: - self._calibrating.discard(template) - # Restore original state. - if old_history is _SENTINEL: - try: - del template.__history__ # type: ignore[attr-defined] - except AttributeError: - pass - else: - template.__history__ = old_history # type: ignore[attr-defined] + finally: + self._calibrating.discard(template) - # Cache non-system messages as the prefix for future calls. self._prefix_cache[template] = [ m for m in shared_history.values() if m["role"] != "system" ] + def _run_calibration_example( + self, + template: Template, + example: doctest.Example, + history: collections.OrderedDict[str, Message], + ) -> None: + """Evaluate one doctest example and append corrective feedback.""" + call_args, call_kwargs = self._parse_template_call( + example, template.__name__ + ) + if call_args is None or call_kwargs is None: + return -def _parse_template_call( - example: doctest.Example, template_name: str -) -> tuple[list[Any] | None, dict[str, Any] | None]: - """Extract positional and keyword args from a doctest example. - - Returns ``(args, kwargs)`` if the example is a call to *template_name*, - or ``(None, None)`` otherwise. - """ - source = example.source.strip() - try: - tree = ast.parse(source, mode="eval") - except SyntaxError: - return None, None - - expr = tree.body - if not isinstance(expr, ast.Call): - return None, None - if not isinstance(expr.func, ast.Name): - return None, None - if expr.func.id != template_name: - return None, None - - try: - pos_args = [ast.literal_eval(a) for a in expr.args] - kw_args = { - kw.arg: ast.literal_eval(kw.value) - for kw in expr.keywords - if kw.arg is not None - } - except (ValueError, TypeError): - return None, None - - return pos_args, kw_args + result = template(*call_args, **call_kwargs) + + checker = doctest.OutputChecker() + actual = repr(result) + "\n" + optionflags = sum(f for f, v in example.options.items() if v) + + if not checker.check_output(example.want, actual, optionflags): + feedback = _make_message( + { + "role": "user", + "content": ( + f"That was incorrect. " + f"Expected {example.want.strip()!r} " + f"but got {repr(result)!r}." + ), + } + ) + history[feedback["id"]] = feedback diff --git a/tests/test_handlers_llm_doctest.py b/tests/test_handlers_llm_doctest.py index 4ec655e4..75d87615 100644 --- a/tests/test_handlers_llm_doctest.py +++ b/tests/test_handlers_llm_doctest.py @@ -10,7 +10,7 @@ ResultDecodingError, call_user, ) -from effectful.handlers.llm.doctest import DoctestHandler, extract_doctests +from effectful.handlers.llm.doctest import DoctestHandler from effectful.handlers.llm.encoding import Encodable, SynthesizedFunction from effectful.handlers.llm.evaluation import UnsafeEvalProvider from effectful.ops.semantics import NotHandled, handler @@ -61,13 +61,13 @@ def synthesize_outer(char: str) -> Callable[[str], int]: class TestExtractDoctests: - """Tests for the extract_doctests utility.""" + """Tests for the DoctestHandler.extract_doctests classmethod.""" def test_strips_examples(self): docstring = ( "Compute something.\n\n >>> foo(1)\n 2\n >>> foo(3)\n 4\n" ) - stripped, examples = extract_doctests(docstring) + stripped, examples = DoctestHandler.extract_doctests(docstring) assert ">>>" not in stripped assert len(examples) == 2 assert examples[0].source.strip() == "foo(1)" @@ -77,13 +77,13 @@ def test_strips_examples(self): def test_no_examples(self): docstring = "Just a description.\nNo examples here.\n" - stripped, examples = extract_doctests(docstring) + stripped, examples = DoctestHandler.extract_doctests(docstring) assert stripped == docstring assert examples == [] def test_preserves_non_example_text(self): docstring = "Title.\n\nSome details.\n\n >>> f(1)\n 42\n\nMore text.\n" - stripped, examples = extract_doctests(docstring) + stripped, examples = DoctestHandler.extract_doctests(docstring) assert "Title." in stripped assert "Some details." in stripped assert "More text." in stripped @@ -194,10 +194,16 @@ class TestCase1Calibration: def test_callable_detection(self): """Templates returning Callable should be Case 2, others Case 1.""" - from effectful.handlers.llm.doctest import _is_callable_return + from effectful.handlers.llm.encoding import CallableEncodable, Encodable - assert _is_callable_return(synthesize_counter_with_doctest) is True - assert _is_callable_return(summarize) is False + def is_callable_return(t): + return isinstance( + Encodable.define(t.__signature__.return_annotation), + CallableEncodable, + ) + + assert is_callable_return(synthesize_counter_with_doctest) + assert not is_callable_return(summarize) def test_extraction_cache_populated(self): """_get_doctests should populate the extraction cache.""" @@ -210,21 +216,21 @@ def test_extraction_cache_populated(self): assert stripped2 is stripped assert examples2 is examples - def test_calibration_uses_shared_history(self): - """Calibration should accumulate messages in a shared history - (Agent-style OrderedDict) rather than isolated per-example sequences.""" - from effectful.handlers.llm.doctest import _SENTINEL + def test_bind_history_restores_state(self): + """_bind_history should restore template.__history__ after use.""" + import collections dh = DoctestHandler() - # Verify template starts without __history__ + # Template starts without __history__ assert not hasattr(summarize, "__history__") - # After calibration, __history__ should be cleaned up - # (We can't run full calibration without a provider, but we can - # verify the sentinel-based save/restore mechanism.) - old = getattr(summarize, "__history__", _SENTINEL) - assert old is _SENTINEL + history = collections.OrderedDict() + with dh._bind_history(summarize, history): + assert summarize.__history__ is history # type: ignore[attr-defined] + + # Cleaned up after context exit + assert not hasattr(summarize, "__history__") @requires_openai def test_case1_calibration_integration(self): From aff5489e7e720c8e0bd99d18cce8f76102c23cf8 Mon Sep 17 00:00:00 2001 From: datvo06 Date: Thu, 26 Feb 2026 11:48:48 -0500 Subject: [PATCH 16/18] More cleanup --- effectful/handlers/llm/doctest.py | 15 +++++---------- tests/test_handlers_llm_doctest.py | 18 ++++++++++++------ 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/effectful/handlers/llm/doctest.py b/effectful/handlers/llm/doctest.py index 6daf5d7b..2e336b88 100644 --- a/effectful/handlers/llm/doctest.py +++ b/effectful/handlers/llm/doctest.py @@ -116,15 +116,6 @@ def _parse_template_call( return pos_args, kw_args - def _get_doctests(self, template: Template) -> tuple[str, list[doctest.Example]]: - """Return cached ``(stripped_template, examples)`` for *template*.""" - try: - return self._extraction_cache[template] - except KeyError: - result = self.extract_doctests(template.__prompt_template__) - self._extraction_cache[template] = result - return result - @contextlib.contextmanager def _bind_history( self, @@ -158,7 +149,11 @@ def _handle_template[**P, T]( *_args: P.args, **_kwargs: P.kwargs, ) -> T: - _, examples = self._get_doctests(template) + if template not in self._extraction_cache: + self._extraction_cache[template] = self.extract_doctests( + template.__prompt_template__ + ) + _, examples = self._extraction_cache[template] if not examples: return fwd() diff --git a/tests/test_handlers_llm_doctest.py b/tests/test_handlers_llm_doctest.py index 75d87615..ce54f7f8 100644 --- a/tests/test_handlers_llm_doctest.py +++ b/tests/test_handlers_llm_doctest.py @@ -206,15 +206,21 @@ def is_callable_return(t): assert not is_callable_return(summarize) def test_extraction_cache_populated(self): - """_get_doctests should populate the extraction cache.""" + """extract_doctests result should be cached per template.""" dh = DoctestHandler() - stripped, examples = dh._get_doctests(summarize) + assert summarize not in dh._extraction_cache + # Populate via extract_doctests + stripped, examples = DoctestHandler.extract_doctests( + summarize.__prompt_template__ + ) + dh._extraction_cache[summarize] = (stripped, examples) assert ">>>" not in stripped assert len(examples) == 1 - # Second call should return cached result - stripped2, examples2 = dh._get_doctests(summarize) - assert stripped2 is stripped - assert examples2 is examples + # Second access returns same objects + assert dh._extraction_cache[summarize] is (stripped, examples) or ( + dh._extraction_cache[summarize][0] is stripped + and dh._extraction_cache[summarize][1] is examples + ) def test_bind_history_restores_state(self): """_bind_history should restore template.__history__ after use.""" From 60144d9e20a36f938be47cf3a1123b55a0f38954 Mon Sep 17 00:00:00 2001 From: datvo06 Date: Thu, 26 Feb 2026 11:48:54 -0500 Subject: [PATCH 17/18] Formatting --- effectful/handlers/llm/doctest.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/effectful/handlers/llm/doctest.py b/effectful/handlers/llm/doctest.py index 2e336b88..27e41dc5 100644 --- a/effectful/handlers/llm/doctest.py +++ b/effectful/handlers/llm/doctest.py @@ -207,8 +207,7 @@ def _run_from_stack(self, obj: object, ctx: typing.Mapping[str, Any]) -> None: return name = ( - f"{getattr(obj, '__name__', obj.__class__.__name__)}" - ".__template_doctest__" + f"{getattr(obj, '__name__', obj.__class__.__name__)}.__template_doctest__" ) test_case = doctest.DocTest( examples=examples, @@ -258,9 +257,7 @@ def _calibrate( with self._bind_history(template, shared_history): try: for example in examples: - self._run_calibration_example( - template, example, shared_history - ) + self._run_calibration_example(template, example, shared_history) finally: self._calibrating.discard(template) @@ -275,9 +272,7 @@ def _run_calibration_example( history: collections.OrderedDict[str, Message], ) -> None: """Evaluate one doctest example and append corrective feedback.""" - call_args, call_kwargs = self._parse_template_call( - example, template.__name__ - ) + call_args, call_kwargs = self._parse_template_call(example, template.__name__) if call_args is None or call_kwargs is None: return From 9e8ebeef15d1c83ba0f7683efa4534f706fa3066 Mon Sep 17 00:00:00 2001 From: datvo06 Date: Thu, 26 Feb 2026 12:06:45 -0500 Subject: [PATCH 18/18] Lint --- tests/test_handlers_llm_doctest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_handlers_llm_doctest.py b/tests/test_handlers_llm_doctest.py index ce54f7f8..c5a5586a 100644 --- a/tests/test_handlers_llm_doctest.py +++ b/tests/test_handlers_llm_doctest.py @@ -233,7 +233,7 @@ def test_bind_history_restores_state(self): history = collections.OrderedDict() with dh._bind_history(summarize, history): - assert summarize.__history__ is history # type: ignore[attr-defined] + assert getattr(summarize, "__history__") is history # Cleaned up after context exit assert not hasattr(summarize, "__history__")