From 35b496cece11879c5c09246a64189f83f287eed2 Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Mon, 24 Mar 2025 09:17:04 -0400 Subject: [PATCH 1/2] Add demo related args to outlines_adm --- .gitignore | 2 ++ align_system/algorithms/outlines_adm.py | 22 +++++++++++++------ .../prompt_engineering/outlines_prompts.py | 9 +++++--- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index c807c0c3..1253fc2b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ run.bash venv/ __pycache__/ outputs + +.vscode/ \ No newline at end of file diff --git a/align_system/algorithms/outlines_adm.py b/align_system/algorithms/outlines_adm.py index 83d6a5ec..7e29d01d 100644 --- a/align_system/algorithms/outlines_adm.py +++ b/align_system/algorithms/outlines_adm.py @@ -1,10 +1,10 @@ import json import random import itertools -import numpy as np import torch import yaml import copy +from functools import partial import outlines from outlines.samplers import MultinomialSampler @@ -25,7 +25,6 @@ calculate_votes, filter_votes_to_responses, ) -from align_system.utils.hydrate_state import hydrate_scenario_state from align_system.algorithms.abstracts import ActionBasedADM from align_system.prompt_engineering.outlines_prompts import ( baseline_system_prompt, @@ -46,8 +45,6 @@ tag_choice_json_schema, treatment_choice_json_schema, treatment_choice_from_list_json_schema, - detailed_unstructured_treatment_action_text, - detailed_unstructured_tagging_action_text, high_risk_aversion_system_prompt, low_risk_aversion_system_prompt, high_continuing_care_system_prompt, @@ -64,6 +61,7 @@ log = logging.getLogger(__name__) JSON_HIGHLIGHTER = JSONHighlighter() +MAX_GENERATOR_TOKENS = 8092 class OutlinesTransformersADM(ActionBasedADM): def __init__(self, @@ -241,6 +239,9 @@ def top_level_choose_action(self, num_negative_samples=0, generator_batch_size=5, kdma_descriptions_map='align_system/prompt_engineering/kdma_descriptions.yml', + reasoning_max_length=512, + generator_seed = -1, + shuffle_choices=True, **kwargs): if self.baseline and num_negative_samples > 0: raise RuntimeError("No notion of negative samples for baseline run") @@ -338,7 +339,7 @@ def top_level_choose_action(self, positive_dialogs = [] for _ in range(num_positive_samples): - shuffled_choices = random.sample(choices, len(choices)) + shuffled_choices = random.sample(choices, len(choices)) if shuffle_choices else choices prompt = self.action_selection_prompt_template(scenario_description, shuffled_choices) dialog = [{'role': 'system', 'content': positive_system_prompt}] @@ -349,7 +350,7 @@ def top_level_choose_action(self, negative_dialogs = [] for _ in range(num_negative_samples): - shuffled_choices = random.sample(choices, len(choices)) + shuffled_choices = random.sample(choices, len(choices)) if shuffle_choices else choices prompt = self.action_selection_prompt_template(scenario_description, shuffled_choices) dialog = [{'role': 'system', 'content': negative_system_prompt}] @@ -363,9 +364,16 @@ def top_level_choose_action(self, # https://github.com/outlines-dev/outlines/issues/690#issuecomment-2102291934 generator = outlines.generate.json( self.model, - action_choice_json_schema(json.dumps(choices)), + action_choice_json_schema(json.dumps(choices), reasoning_max_length), sampler=self.sampler, whitespace_pattern=r"[ ]?") + + if generator_seed >= 0: + torch.manual_seed(generator_seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(generator_seed) + generator = partial(generator, max_tokens=MAX_GENERATOR_TOKENS) + dialog_texts = [self.dialog_to_prompt(d) for d in itertools.chain(positive_dialogs, negative_dialogs)] diff --git a/align_system/prompt_engineering/outlines_prompts.py b/align_system/prompt_engineering/outlines_prompts.py index f8bcad69..b30878ec 100644 --- a/align_system/prompt_engineering/outlines_prompts.py +++ b/align_system/prompt_engineering/outlines_prompts.py @@ -510,13 +510,16 @@ def followup_clarify_aid(character, available_aids): @outlines.prompt -def action_choice_json_schema(choices_json_str): +def action_choice_json_schema(choices_json_str, reasoning_max_length=512): ''' {"$defs": {"ActionChoice": {"enum": {{ choices_json_str }}, "title": "ActionChoice", "type": "string"}}, - "properties": {"detailed_reasoning": {"title": "Detailed Reasoning", - "type": "string", "minLength": 1, "maxLength": 512}, + "properties": {"detailed_reasoning": { + "title": "Detailed Reasoning", + "type": "string", + "minLength": 1{% if reasoning_max_length > 0 %}, "maxLength": {{ reasoning_max_length }}{% endif %} + }, "action_choice": {"$ref": "#/$defs/ActionChoice"}}, "required": ["detailed_reasoning", "action_choice"], "title": "ActionSelection", From e6ed50d2c58b7cc9f8aafa397d394e63f8accf2f Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Mon, 24 Mar 2025 15:56:49 -0400 Subject: [PATCH 2/2] Factor get_dialogs to static method in outlines_adm --- align_system/algorithms/outlines_adm.py | 127 +++++++++++++++++------- 1 file changed, 90 insertions(+), 37 deletions(-) diff --git a/align_system/algorithms/outlines_adm.py b/align_system/algorithms/outlines_adm.py index 7e29d01d..ba5011f2 100644 --- a/align_system/algorithms/outlines_adm.py +++ b/align_system/algorithms/outlines_adm.py @@ -61,7 +61,6 @@ log = logging.getLogger(__name__) JSON_HIGHLIGHTER = JSONHighlighter() -MAX_GENERATOR_TOKENS = 8092 class OutlinesTransformersADM(ActionBasedADM): def __init__(self, @@ -193,7 +192,8 @@ def kdma_value_to_system_prompt(kdma, value): else: return None - def _state_to_top_level_prompt(self, scenario_state, actions): + @staticmethod + def _static_state_to_top_level_prompt(action_selection_prompt_template, scenario_description, scenario_state, actions): """ Generate prompt dialog based on given state and actions """ @@ -203,11 +203,23 @@ def _state_to_top_level_prompt(self, scenario_state, actions): scenario_state ) - scenario_description = self.scenario_description_template(scenario_state) - prompt = self.action_selection_prompt_template(scenario_description, choices) + prompt = action_selection_prompt_template(scenario_description, choices) return prompt, choices + def _state_to_top_level_prompt(self, scenario_state, actions): + """ + Generate prompt dialog based on given state and actions + """ + scenario_description = self.scenario_description_template(scenario_state) + return OutlinesTransformersADM._static_state_to_top_level_prompt( + self.action_selection_prompt_template, + scenario_description, + scenario_state, + actions + ) + + # Function borrowed from # https://docs.python.org/3/library/itertools.html#itertools.batched # (since itertools.batched is only available in Python 3.12 or newer): @@ -231,24 +243,25 @@ def run_in_batches(cls, inference_function, inputs, batch_size): outputs.extend(output) return outputs - def top_level_choose_action(self, - scenario_state, - available_actions, - alignment_target, - num_positive_samples=1, - num_negative_samples=0, - generator_batch_size=5, - kdma_descriptions_map='align_system/prompt_engineering/kdma_descriptions.yml', - reasoning_max_length=512, - generator_seed = -1, - shuffle_choices=True, - **kwargs): - if self.baseline and num_negative_samples > 0: + @staticmethod + def get_dialogs(scenario_state, + available_actions, + alignment_target, + num_positive_samples=1, + num_negative_samples=0, + kdma_descriptions_map='align_system/prompt_engineering/kdma_descriptions.yml', + shuffle_choices=True, + baseline=False, + scenario_description_template=scenario_state_description_1, + action_selection_prompt_template=action_selection_prompt, + baseline_system_prompt=baseline_system_prompt, + **kwargs): + if baseline and num_negative_samples > 0: raise RuntimeError("No notion of negative samples for baseline run") - if self.baseline and "incontext" in kwargs and kwargs["incontext"]["number"] > 0: + if baseline and "incontext" in kwargs and kwargs["incontext"]["number"] > 0: raise RuntimeError("No notion of incontext examples for baseline run") - scenario_description = self.scenario_description_template(scenario_state) + scenario_description = scenario_description_template(scenario_state) # Important that the choices stay in the same order as the # available actions as we'll use the selected index later to # map to the corresponding action @@ -261,12 +274,11 @@ def top_level_choose_action(self, positive_icl_examples = [] negative_icl_examples = [] incontext_settings=kwargs.get("incontext", {}) - if not self.baseline and alignment_target is not None: - kdma_values = alignment_target.kdma_values + if not baseline and alignment_target is not None: + kdma_values = alignment_target.kdma_values if len(kdma_values) != 1: raise RuntimeError("This ADM assumes a single KDMA target, aborting!") - kdma_value = kdma_values[0] if isinstance(kdma_value, KDMAValue): kdma_value = kdma_value.to_dict() @@ -280,8 +292,8 @@ def top_level_choose_action(self, kdma_descriptions = yaml.load(f, Loader=yaml.FullLoader) name = kdma_descriptions[kdma]['name'] - positive_system_prompt = self.__class__.kdma_value_to_system_prompt(kdma, value) - negative_system_prompt = self.__class__.kdma_value_to_system_prompt(kdma, negative_value) + positive_system_prompt = OutlinesTransformersADM.kdma_value_to_system_prompt(kdma, value) + negative_system_prompt = OutlinesTransformersADM.kdma_value_to_system_prompt(kdma, negative_value) if positive_system_prompt is None: raise RuntimeError("Couldn't find system prompt for kdma: {}, and " @@ -291,8 +303,7 @@ def top_level_choose_action(self, "value: {}.".format(kdma, negative_value)) if "incontext" in kwargs and "number" in incontext_settings and incontext_settings["number"] > 0: - scenario_to_match = self.scenario_description_template(scenario_state) - prompt_to_match, _ = self._state_to_top_level_prompt(scenario_state, available_actions) + prompt_to_match, _ = OutlinesTransformersADM._state_to_top_level_prompt(action_selection_prompt_template, scenario_state, available_actions) # Create positive ICL example generators positive_target = {'kdma': kdma, 'name': name, 'value': value} @@ -301,7 +312,7 @@ def top_level_choose_action(self, # Get subset of relevant of examples positive_selected_icl_examples = positive_icl_example_generator.select_icl_examples( sys_kdma_name=kdma, - scenario_description_to_match=scenario_to_match, + scenario_description_to_match=scenario_description, prompt_to_match=prompt_to_match, state_comparison=scenario_state ) @@ -321,7 +332,7 @@ def top_level_choose_action(self, # Get subset of relevant of examples negative_selected_icl_examples = negative_icl_example_generator.select_icl_examples( sys_kdma_name=kdma, - scenario_description_to_match=scenario_to_match, + scenario_description_to_match=scenario_description, prompt_to_match=prompt_to_match, state_comparison=scenario_state ) @@ -331,17 +342,17 @@ def top_level_choose_action(self, {"role": "assistant", "content": f'{icl_sample["response"]}'} ]) else: - positive_system_prompt = self.baseline_system_prompt() + positive_system_prompt = baseline_system_prompt() if num_negative_samples > 0: raise RuntimeError("No notion of negative samples for baseline run") if "incontext" in kwargs and kwargs["incontext"]["number"] > 0: raise RuntimeError("No notion of incontext examples for baseline run") + negative_system_prompt = None # Not used in baseline positive_dialogs = [] for _ in range(num_positive_samples): - shuffled_choices = random.sample(choices, len(choices)) if shuffle_choices else choices - - prompt = self.action_selection_prompt_template(scenario_description, shuffled_choices) + shuf = random.sample(choices, len(choices)) if shuffle_choices else choices + prompt = action_selection_prompt(scenario_description, shuf) dialog = [{'role': 'system', 'content': positive_system_prompt}] dialog.extend(positive_icl_examples) dialog.append({'role': 'user', 'content': prompt}) @@ -350,15 +361,55 @@ def top_level_choose_action(self, negative_dialogs = [] for _ in range(num_negative_samples): - shuffled_choices = random.sample(choices, len(choices)) if shuffle_choices else choices - - prompt = self.action_selection_prompt_template(scenario_description, shuffled_choices) + shuf = random.sample(choices, len(choices)) if shuffle_choices else choices + prompt = action_selection_prompt(scenario_description, shuf) dialog = [{'role': 'system', 'content': negative_system_prompt}] dialog.extend(negative_icl_examples) dialog.append({'role': 'user', 'content': prompt}) - negative_dialogs.append(dialog) + return {"scenario_description": scenario_description, + "choices": choices, + "positive_system_prompt": positive_system_prompt, + "negative_system_prompt": negative_system_prompt, + "positive_dialogs": positive_dialogs, + "negative_dialogs": negative_dialogs} + + def top_level_choose_action(self, + scenario_state, + available_actions, + alignment_target, + num_positive_samples=1, + num_negative_samples=0, + generator_batch_size=5, + kdma_descriptions_map='align_system/prompt_engineering/kdma_descriptions.yml', + reasoning_max_length=512, + generator_seed=-1, + max_generator_tokens=-1, + shuffle_choices=True, + **kwargs): + if self.baseline and num_negative_samples > 0: + raise RuntimeError("No notion of negative samples for baseline run") + if self.baseline and "incontext" in kwargs and kwargs["incontext"]["number"] > 0: + raise RuntimeError("No notion of incontext examples for baseline run") + + dialogs_data = OutlinesTransformersADM.get_dialogs( + scenario_state, + available_actions, + alignment_target, + num_positive_samples, + num_negative_samples, + kdma_descriptions_map, + shuffle_choices, + baseline=self.baseline, + scenario_description_template=self.scenario_description_template, + action_selection_prompt_template=self.action_selection_prompt_template, + baseline_system_prompt=self.baseline_system_prompt, + ) + choices = dialogs_data["choices"] + positive_dialogs = dialogs_data["positive_dialogs"] + negative_dialogs = dialogs_data["negative_dialogs"] + # Need to set the whitespace_pattern to prevent the state # machine from looping indefinitely in some cases, see: # https://github.com/outlines-dev/outlines/issues/690#issuecomment-2102291934 @@ -367,12 +418,14 @@ def top_level_choose_action(self, action_choice_json_schema(json.dumps(choices), reasoning_max_length), sampler=self.sampler, whitespace_pattern=r"[ ]?") + + if max_generator_tokens >= 0: + generator = partial(generator, max_tokens=max_generator_tokens) if generator_seed >= 0: torch.manual_seed(generator_seed) if torch.cuda.is_available(): torch.cuda.manual_seed(generator_seed) - generator = partial(generator, max_tokens=MAX_GENERATOR_TOKENS) dialog_texts = [self.dialog_to_prompt(d) for d in