From 22df91d8f132014d14999946e6cbbbf7f3cc87b4 Mon Sep 17 00:00:00 2001 From: Krista Opsahl-Ong Date: Sat, 3 May 2025 15:28:51 -0400 Subject: [PATCH 1/4] adding in error messages & timeout for user permission message --- dspy/teleprompt/mipro_optimizer_v2.py | 67 +++++++++++++++++++-------- 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/dspy/teleprompt/mipro_optimizer_v2.py b/dspy/teleprompt/mipro_optimizer_v2.py index d650d7da71..3b33a25306 100644 --- a/dspy/teleprompt/mipro_optimizer_v2.py +++ b/dspy/teleprompt/mipro_optimizer_v2.py @@ -3,11 +3,14 @@ import textwrap from collections import defaultdict from typing import Any, Callable, Dict, List, Literal, Optional, Tuple +import select +import sys +import time import numpy as np import optuna from optuna.distributions import CategoricalDistribution - +import math import dspy from dspy.evaluate.evaluate import Evaluate from dspy.propose import GroundedProposer @@ -53,10 +56,8 @@ def __init__( teacher_settings: Dict = {}, max_bootstrapped_demos: int = 4, max_labeled_demos: int = 4, - auto: Optional[Literal["light", "medium", "heavy"]] = "medium", - num_candidates: int = 10, - num_fewshot_candidates: Optional[int] = None, - num_instruct_candidates: Optional[int] = None, + auto: Optional[Literal["light", "medium", "heavy"]] = "light", + num_candidates: Optional[int] = None, num_threads: Optional[int] = None, max_errors: int = 10, seed: int = 9, @@ -71,9 +72,9 @@ def __init__( if auto not in allowed_modes: raise ValueError(f"Invalid value for auto: {auto}. Must be one of {allowed_modes}.") self.auto = auto - - self.num_fewshot_candidates = num_fewshot_candidates or num_candidates - self.num_instruct_candidates = num_instruct_candidates or num_candidates + self.num_fewshot_candidates = num_candidates + self.num_instruct_candidates = num_candidates + self.num_candidates = num_candidates self.metric = metric self.init_temperature = init_temperature self.task_model = task_model if task_model else dspy.settings.lm @@ -99,7 +100,7 @@ def compile( trainset: List, teacher: Any = None, valset: Optional[List] = None, - num_trials: int = 30, + num_trials: Optional[int] = None, max_bootstrapped_demos: Optional[int] = None, max_labeled_demos: Optional[int] = None, seed: Optional[int] = None, @@ -114,6 +115,21 @@ def compile( requires_permission_to_run: bool = True, provide_traceback: Optional[bool] = None, ) -> Any: + + zeroshot_opt = (self.max_bootstrapped_demos == 0) and (self.max_labeled_demos == 0) + + # If auto is None, and num_trials is not provided (but num_candidates is), raise an error that suggests a good num_trials value + if self.auto is None and (self.num_candidates is not None and num_trials is None): + raise ValueError(f"If auto is None, num_trials must also be provided. Given num_candidates={self.num_candidates}, we'd recommend setting num_trials to ~{self._set_num_trials_from_num_candidates(student, zeroshot_opt, self.num_candidates)}.") + + # If auto is None, and num_candidates or num_trials is None, raise an error + if self.auto is None and (self.num_candidates is None or num_trials is None): + raise ValueError("If auto is None, num_candidates must also be provided.") + + # If auto is provided, and either num_candidates or num_trials is not None, raise an error + if self.auto is not None and (self.num_candidates is not None or num_trials is not None): + raise ValueError("If auto is not None, num_candidates and num_trials cannot be set, since they would be overrided by the auto settings. Please either set auto to None, or do not specify num_candidates and num_trials.") + # Set random seeds seed = seed or self.seed self._set_random_seeds(seed) @@ -128,7 +144,6 @@ def compile( trainset, valset = self._set_and_validate_datasets(trainset, valset) # Set hyperparameters based on run mode (if set) - zeroshot_opt = (self.max_bootstrapped_demos == 0) and (self.max_labeled_demos == 0) num_trials, valset, minibatch = self._set_hyperparams_from_run_mode( student, num_trials, minibatch, zeroshot_opt, valset ) @@ -204,6 +219,15 @@ def _set_random_seeds(self, seed): self.rng = random.Random(seed) np.random.seed(seed) + def _set_num_trials_from_num_candidates(self, program, zeroshot_opt, num_candidates): + num_vars = len(program.predictors()) + if not zeroshot_opt: + num_vars *= 2 # Account for few-shot examples + instruction variables + # Trials = MAX(c*M*log(N), c=2, 3/2*N) + num_trials = int(max(2 * num_vars * np.log2(num_candidates), 1.5 * num_candidates)) + + return num_trials + def _set_hyperparams_from_run_mode( self, program: Any, @@ -226,11 +250,7 @@ def _set_hyperparams_from_run_mode( self.num_instruct_candidates = auto_settings["n"] if zeroshot_opt else int(auto_settings["n"] * 0.5) self.num_fewshot_candidates = auto_settings["n"] - num_vars = len(program.predictors()) - if not zeroshot_opt: - num_vars *= 2 # Account for few-shot examples + instruction variables - # Trials = MAX(c*M*log(N), c=2, 3/2*N) - num_trials = max(2 * num_vars * np.log(auto_settings["n"]), 1.5 * auto_settings["n"]) + num_trials = self._set_num_trials_from_num_candidates(program, zeroshot_opt, auto_settings["n"]) return num_trials, valset, minibatch @@ -353,6 +373,7 @@ def _get_user_confirmation( user_confirmation_message = textwrap.dedent( f"""\ To proceed with the execution of this program, please confirm by typing {BLUE}'y'{ENDC} for yes or {BLUE}'n'{ENDC} for no. + If no input is received within 20 seconds, the program will proceed automatically. If you would like to bypass this confirmation step in future executions, set the {YELLOW}`requires_permission_to_run`{ENDC} flag to {YELLOW}`False`{ENDC} when calling compile. @@ -360,10 +381,18 @@ def _get_user_confirmation( """ ) - user_input = ( - input(f"{user_message}\n{user_confirmation_message}\nDo you wish to continue? (y/n): ").strip().lower() - ) - return user_input == "y" + print(f"{user_message}\n{user_confirmation_message}\nDo you wish to continue? (y/n): ", end='', flush=True) + + # Wait for input with timeout + start_time = time.time() + while time.time() - start_time < 20: + if select.select([sys.stdin], [], [], 0.1)[0]: + user_input = sys.stdin.readline().strip().lower() + return user_input == "y" + time.sleep(0.1) + + print("\nNo input received within 20 seconds. Proceeding with execution...") + return True def _bootstrap_fewshot_examples(self, program: Any, trainset: List, seed: int, teacher: Any) -> Optional[List]: logger.info("\n==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==") From f2408fab373fc6aa6980605c03994fffa2a582b1 Mon Sep 17 00:00:00 2001 From: Krista Opsahl-Ong Date: Mon, 2 Jun 2025 14:11:56 -0400 Subject: [PATCH 2/4] wip --- dspy/teleprompt/simba_utils.py | 89 ++++++++++++++++++++++++++-------- pyproject.toml | 2 +- 2 files changed, 69 insertions(+), 22 deletions(-) diff --git a/dspy/teleprompt/simba_utils.py b/dspy/teleprompt/simba_utils.py index 3765a33f1f..3a23eb6d11 100644 --- a/dspy/teleprompt/simba_utils.py +++ b/dspy/teleprompt/simba_utils.py @@ -3,20 +3,40 @@ import inspect import logging import textwrap +import re from dspy.adapters.utils import get_field_description_string from dspy.signatures import InputField, OutputField -from typing import Callable +from typing import Callable, Optional, Dict, Any logger = logging.getLogger(__name__) +def prepare_models_for_resampling(program: dspy.Module, n: int, teacher_settings: Optional[Dict] = None): + + models = [] + if teacher_settings: + with dspy.settings.context(trace=[], **teacher_settings): + lm = dspy.settings.lm + models.append(lm) -def prepare_models_for_resampling(program: dspy.Module, n: int): lm = program.get_lm() or dspy.settings.lm - temps = [lm.kwargs["temperature"]] + [0.5 + i * (0.5 / n) for i in range(n)] - temps = list(dict.fromkeys(temps))[:n] - return [lm.copy(temperature=t) for t in temps] + # Check to see if our model is a reasoning model, which means temp must stay as 1.0 + model_family = lm.model.split("/")[-1].lower() if "/" in lm.model else lm.model.lower() + model_pattern = re.match(r"^o([13])(?:-mini)?", model_family) + + if model_pattern: # Vary the seed + start_seed = 0 if "seed" not in lm.kwargs else lm.kwargs["seed"] + seeds = [start_seed + 1 + i for i in range(n-len(models))] + seeds = list(dict.fromkeys(seeds))[:(n-len(models))] + models.extend([lm.copy(seed=seed) for seed in seeds]) + else: # Vary the temperature + start_temp = 0 if "temperature" not in lm.kwargs else lm.kwargs["temperature"] + temps = [start_temp + 0.5 + i * (0.5 / n) for i in range(n-len(models))] + temps = list(dict.fromkeys(temps))[:(n-len(models))] + models.extend([lm.copy(temperature=t) for t in temps]) + + return models def wrap_program(program: dspy.Module, metric: Callable): def wrapped_program(example): @@ -25,33 +45,53 @@ def wrapped_program(example): try: prediction = program(**example.inputs()) except Exception as e: - print(e) + logger.info(e) trace = dspy.settings.trace.copy() + output = None + score = 0.0 + output_metadata = {} + try: - score = metric(example, prediction) + output = metric(example, prediction) + if isinstance(output, (int, float)): + score = output + elif isinstance(output, dspy.Prediction): + if not hasattr(output, 'score'): + raise ValueError("dspy.Prediction must contain a 'score' attribute") + score = output.score + # Just extract fields from _store, excluding 'score' + output_metadata = { + k: v for k, v in output._store.items() if k != "score" + } except Exception as e: - print(e) + logger.info(e) - # Include the `example` in the output for subsequent usage in buckets/strategies. return { "prediction": prediction, "trace": trace, "score": score, - "example": example + "example": example, + "output_metadata": output_metadata } return wrapped_program - - def append_a_demo(demo_input_field_maxlen): def append_a_demo_(bucket, system, **kwargs): predictor2name, name2predictor = kwargs["predictor2name"], kwargs["name2predictor"] + batch_10p_score = kwargs["batch_10p_score"] - trace = bucket[0]["trace"] + logger.info(f"Appending a demo with max length {demo_input_field_maxlen}") + + good = bucket[0] + trace = good["trace"] name2demo = {} + if good["score"] <= batch_10p_score: + logger.info(f"Skipping appending a demo as good score {good['score']} is at or below the 10th percentile (<={batch_10p_score}).") + return False + for step in trace: predictor, _inputs, _outputs = step @@ -62,28 +102,29 @@ def append_a_demo_(bucket, system, **kwargs): demo = dspy.Example(augmented=True, **_inputs, **_outputs) name = predictor2name[id(predictor)] name2demo[name] = demo # keep the last demo for each predictor - for name, demo in name2demo.items(): predictor = name2predictor[name] predictor.demos.append(demo) - logger.info(f"Added {len(name2demo)} demos (one each) across all predictors.") + logger.info(f"Added {len(name2demo)} demos (one each) across all predictors. Each predictor now has {len(predictor.demos)} demos total.") return True return append_a_demo_ def append_a_rule(bucket, system, **kwargs): + # Read in kwargs predictor2name = kwargs["predictor2name"] batch_10p_score, batch_90p_score = kwargs["batch_10p_score"], kwargs["batch_90p_score"] + prompt_model = kwargs["prompt_model"] or dspy.settings.lm module_names = [name for name, _ in system.named_predictors()] good, bad = bucket[0], bucket[-1] example = good["example"] - if good["score"] < batch_10p_score or bad["score"] > batch_90p_score: - logger.info(f"Skipping rule generation as good score {good['score']} is below the 10th percentile " - f"*or* bad score {bad['score']} is above the 90th percentile.") + if good["score"] <= batch_10p_score or bad["score"] >= batch_90p_score: + logger.info(f"Skipping rule generation as good score {good['score']} is at or below the 10th percentile (<={batch_10p_score}) " + f"*or* bad score {bad['score']} is at or above the 90th percentile, (>={batch_90p_score}).") return False if good["score"] <= bad["score"]: @@ -116,12 +157,17 @@ def append_a_rule(bucket, system, **kwargs): worse_program_outputs=dict(bad["prediction"] or {}), worse_reward_value=bad["score"], better_reward_value=good["score"], + worse_reward_info=bad["output_metadata"], + better_reward_info=good["output_metadata"], module_names=module_names, ) kwargs = {k: v if isinstance(v, str) else ujson.dumps(recursive_mask(v), indent=2) for k, v in kwargs.items()} - advice = dspy.Predict(OfferFeedback)(**kwargs).module_advice + + with dspy.settings.context(trace=[], lm=prompt_model): + advice_program = dspy.Predict(OfferFeedback) + advice = advice_program(**kwargs).module_advice for name, predictor in system.named_predictors(): if name in advice: @@ -155,11 +201,13 @@ class OfferFeedback(dspy.Signature): ) worse_program_outputs: str = InputField(desc="The outputs of the program that we are analyzing") worse_reward_value: float = InputField(desc="The reward value assigned to the program's outputs") + worse_reward_info: str = InputField(desc="Additional information that might be helpful to understanding the assigned reward value.") better_program_trajectory: str = InputField( desc="The trajectory of the program's execution, showing each module's I/O" ) better_program_outputs: str = InputField(desc="The outputs of the program that we are analyzing") better_reward_value: float = InputField(desc="The reward value assigned to the program's outputs") + better_reward_info: str = InputField(desc="Additional information that might be helpful to understanding the assigned reward value.") module_names: list[str] = InputField(desc="The names of the modules in the program, for which we seek advice") discussion: str = OutputField(desc="Discussing blame of where each module went wrong, if it did") module_advice: dict[str, str] = OutputField( @@ -169,7 +217,6 @@ class OfferFeedback(dspy.Signature): "like the successful trajectory rather than the lower-scoring trajectory." ) - def inspect_modules(program): separator = "-" * 80 output = [separator] @@ -209,4 +256,4 @@ def recursive_mask(o): return tuple(recursive_mask(v) for v in o) # Otherwise, replace it with a placeholder string (or use repr(o)). else: - return f"" + return f"" \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 78c1e637d8..6eb315a221 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ dependencies = [ "backoff>=2.2", "joblib~=1.3", - "openai>=0.28.1", + "openai>=0.28.1,<=1.67.0", "pandas>=2.1.1", "regex>=2023.10.3", "ujson>=5.8.0", From 8b3b6e60afdabd91505002a1463354c25db72d9a Mon Sep 17 00:00:00 2001 From: Krista Opsahl-Ong Date: Mon, 2 Jun 2025 14:29:57 -0400 Subject: [PATCH 3/4] wip --- dspy/teleprompt/mipro_optimizer_v2.py | 37 +-------------------------- 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/dspy/teleprompt/mipro_optimizer_v2.py b/dspy/teleprompt/mipro_optimizer_v2.py index 879d25711a..05dd4eec1a 100644 --- a/dspy/teleprompt/mipro_optimizer_v2.py +++ b/dspy/teleprompt/mipro_optimizer_v2.py @@ -7,14 +7,9 @@ from typing import TYPE_CHECKING from collections import defaultdict from typing import Any, Callable, Dict, List, Literal, Optional, Tuple -import select -import sys -import time import numpy as np -import optuna -from optuna.distributions import CategoricalDistribution -import math + import dspy from dspy.evaluate.evaluate import Evaluate from dspy.propose import GroundedProposer @@ -122,23 +117,6 @@ def compile( requires_permission_to_run: bool = True, provide_traceback: Optional[bool] = None, ) -> Any: -<<<<<<< HEAD - - zeroshot_opt = (self.max_bootstrapped_demos == 0) and (self.max_labeled_demos == 0) - - # If auto is None, and num_trials is not provided (but num_candidates is), raise an error that suggests a good num_trials value - if self.auto is None and (self.num_candidates is not None and num_trials is None): - raise ValueError(f"If auto is None, num_trials must also be provided. Given num_candidates={self.num_candidates}, we'd recommend setting num_trials to ~{self._set_num_trials_from_num_candidates(student, zeroshot_opt, self.num_candidates)}.") - - # If auto is None, and num_candidates or num_trials is None, raise an error - if self.auto is None and (self.num_candidates is None or num_trials is None): - raise ValueError("If auto is None, num_candidates must also be provided.") - - # If auto is provided, and either num_candidates or num_trials is not None, raise an error - if self.auto is not None and (self.num_candidates is not None or num_trials is not None): - raise ValueError("If auto is not None, num_candidates and num_trials cannot be set, since they would be overrided by the auto settings. Please either set auto to None, or do not specify num_candidates and num_trials.") - -======= zeroshot_opt = (self.max_bootstrapped_demos == 0) and (self.max_labeled_demos == 0) @@ -154,7 +132,6 @@ def compile( if self.auto is not None and (self.num_candidates is not None or num_trials is not None): raise ValueError("If auto is not None, num_candidates and num_trials cannot be set, since they would be overrided by the auto settings. Please either set auto to None, or do not specify num_candidates and num_trials.") ->>>>>>> 82d3878b12b4632b3c549d9c4e85eaef360ad1f7 # Set random seeds seed = seed or self.seed self._set_random_seeds(seed) @@ -252,11 +229,7 @@ def _set_num_trials_from_num_candidates(self, program, zeroshot_opt, num_candida num_trials = int(max(2 * num_vars * np.log2(num_candidates), 1.5 * num_candidates)) return num_trials -<<<<<<< HEAD - -======= ->>>>>>> 82d3878b12b4632b3c549d9c4e85eaef360ad1f7 def _set_hyperparams_from_run_mode( self, program: Any, @@ -411,11 +384,7 @@ def _get_user_confirmation( ) print(f"{user_message}\n{user_confirmation_message}\nDo you wish to continue? (y/n): ", end='', flush=True) -<<<<<<< HEAD - -======= ->>>>>>> 82d3878b12b4632b3c549d9c4e85eaef360ad1f7 # Wait for input with timeout start_time = time.time() while time.time() - start_time < 20: @@ -423,11 +392,7 @@ def _get_user_confirmation( user_input = sys.stdin.readline().strip().lower() return user_input == "y" time.sleep(0.1) -<<<<<<< HEAD - -======= ->>>>>>> 82d3878b12b4632b3c549d9c4e85eaef360ad1f7 print("\nNo input received within 20 seconds. Proceeding with execution...") return True From 44313d9264181dfe0b6786f41c4b74d56ca33fcc Mon Sep 17 00:00:00 2001 From: Krista Opsahl-Ong Date: Fri, 8 Aug 2025 12:32:04 -0400 Subject: [PATCH 4/4] fixing litellm logging --- dspy/clients/__init__.py | 44 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/dspy/clients/__init__.py b/dspy/clients/__init__.py index 735691e9ce..496aa48eb7 100644 --- a/dspy/clients/__init__.py +++ b/dspy/clients/__init__.py @@ -3,9 +3,43 @@ from pathlib import Path from typing import Optional +# Set environment variables before importing litellm +os.environ["LITELLM_LOG"] = "ERROR" +os.environ["OPENAI_LOG"] = "ERROR" + import litellm from litellm.caching.caching import Cache as LitellmCache +def _configure_litellm_logging(level: str = "ERROR"): + """Configure LiteLLM logging to the specified level.""" + # Update environment variables + os.environ["LITELLM_LOG"] = level + os.environ["OPENAI_LOG"] = level + + # Cover both capitalization variants used by LiteLLM + logger_names = [ + "LiteLLM", + "LiteLLM.utils", + "LiteLLM.proxy.utils", + "litellm", + "litellm.utils", + "litellm.proxy.utils", + ] + _level = getattr(logging, level) + for logger_name in logger_names: + lg = logging.getLogger(logger_name) + lg.setLevel(_level) + lg.propagate = False + # Remove all existing handlers or force them to the desired level + for h in lg.handlers[:]: + h.setLevel(_level) + # Ensure there is at least a NullHandler to swallow logs + if not lg.handlers: + lg.addHandler(logging.NullHandler()) + +# Immediately disable LiteLLM logging after import +_configure_litellm_logging("ERROR") + from dspy.clients.base_lm import BaseLM, inspect_history from dspy.clients.cache import Cache from dspy.clients.embedding import Embedder @@ -86,9 +120,6 @@ def configure_cache( memory_max_entries=1000000, ) -# Turn off by default to avoid LiteLLM logging during every LM call. -litellm.suppress_debug_info = True - if "LITELLM_LOCAL_MODEL_COST_MAP" not in os.environ: # Accessed at run time by litellm; i.e., fine to keep after import os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" @@ -96,10 +127,17 @@ def configure_cache( def enable_litellm_logging(): litellm.suppress_debug_info = False + _configure_litellm_logging("INFO") + # Remove environment variables to allow logging + if "LITELLM_LOG" in os.environ: + del os.environ["LITELLM_LOG"] + if "OPENAI_LOG" in os.environ: + del os.environ["OPENAI_LOG"] def disable_litellm_logging(): litellm.suppress_debug_info = True + _configure_litellm_logging("ERROR") __all__ = [