diff --git a/.gitignore b/.gitignore index 70a21c83..56492e38 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ run.bash +results/* venv/ __pycache__/ \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..3b2439cb --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,146 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "High Incontext Adept", + "type": "debugpy", + "request": "launch", + "console": "integratedTerminal", + "module": "align_system.cli.run_align_system", + "args": [ + "TA3ActionBased", + "--adm-config", "adm_configs/single_kdma_adm_config_high_incontext.yml", + "--username", "kitware-single-kdma-adm-aligned-no-negatives", + "--align-to-target", + "--session-type", "adept", + "--api_endpoint", "http://127.0.0.1:8080", + "--loglevel", "DEBUG", + "--logfile-path", "${workspaceFolder}/results/high_incontext/output.log", + "--save-input-output-to-path", "${workspaceFolder}/results/high_incontext/input-output.json", + "--save-alignment-score-to-path", "${workspaceFolder}/results/high_incontext/output-scores.json", + "--training-session" + ], + "env": { + "CUDA_VISIBLE_DEVICES": "1" + } + }, + { + "name": "Low Incontext Adept", + "type": "debugpy", + "request": "launch", + "console": "integratedTerminal", + "module": "align_system.cli.run_align_system", + "args": [ + "TA3ActionBased", + "--adm-config", "adm_configs/single_kdma_adm_config_low_incontext.yml", + "--username", "kitware-single-kdma-adm-aligned-no-negatives", + "--align-to-target", + "--session-type", "adept", + "--api_endpoint", "http://127.0.0.1:8080", + "--loglevel", "DEBUG", + "--logfile-path", "${workspaceFolder}/results/low_incontext/output.log", + "--save-input-output-to-path", "${workspaceFolder}/results/low_incontext/input-output.json", + "--save-alignment-score-to-path", "${workspaceFolder}/results/low_incontext/output-scores.json", + "--training-session" + ], + "env": { + "CUDA_VISIBLE_DEVICES": "1" + } + }, + { + "name": "High Adept", + "type": "debugpy", + "request": "launch", + "console": "integratedTerminal", + "module": "align_system.cli.run_align_system", + "args": [ + "TA3ActionBased", + "--adm-config", "adm_configs/single_kdma_adm_config_high.yml", + "--username", "kitware-single-kdma-adm-aligned-no-negatives", + "--align-to-target", + "--session-type", "adept", + "--api_endpoint", "http://127.0.0.1:8080", + "--loglevel", "DEBUG", + "--logfile-path", "${workspaceFolder}/results/high/output.log", + "--save-input-output-to-path", "${workspaceFolder}/results/high/input-output.json", + "--save-alignment-score-to-path", "${workspaceFolder}/results/high/output-scores.json", + "--training-session" + ], + "env": { + "CUDA_VISIBLE_DEVICES": "2" + } + }, + { + "name": "Low Adept", + "type": "debugpy", + "request": "launch", + "console": "integratedTerminal", + "module": "align_system.cli.run_align_system", + "args": [ + "TA3ActionBased", + "--adm-config", "adm_configs/single_kdma_adm_config_low.yml", + "--username", "kitware-single-kdma-adm-aligned-no-negatives", + "--align-to-target", + "--session-type", "adept", + "--api_endpoint", "http://127.0.0.1:8080", + "--loglevel", "DEBUG", + "--logfile-path", "${workspaceFolder}/results/low/output.log", + "--save-input-output-to-path", "${workspaceFolder}/results/low/input-output.json", + "--save-alignment-score-to-path", "${workspaceFolder}/results/low/output-scores.json", + "--training-session" + ], + "env": { + "CUDA_VISIBLE_DEVICES": "3" + } + }, + { + "name": "High Baseline Adept", + "type": "debugpy", + "request": "launch", + "console": "integratedTerminal", + "module": "align_system.cli.run_align_system", + "args": [ + "TA3ActionBased", + "--adm-config", "adm_configs/single_kdma_adm_config_high_baseline.yml", + "--username", "kitware-single-kdma-adm-aligned-no-negatives", + "--align-to-target", + "--session-type", "adept", + "--api_endpoint", "http://127.0.0.1:8080", + "--loglevel", "DEBUG", + "--logfile-path", "${workspaceFolder}/results/high_baseline/output.log", + "--save-input-output-to-path", "${workspaceFolder}/results/high_baseline/input-output.json", + "--save-alignment-score-to-path", "${workspaceFolder}/results/high_baseline/output-scores.json", + "--training-session" + ], + "env": { + "CUDA_VISIBLE_DEVICES": "3" + } + }, + { + "name": "Low Baseline Adept", + "type": "debugpy", + "request": "launch", + "console": "integratedTerminal", + "module": "align_system.cli.run_align_system", + "args": [ + "TA3ActionBased", + "--adm-config", "adm_configs/single_kdma_adm_config_low_baseline.yml", + "--username", "kitware-single-kdma-adm-aligned-no-negatives", + "--align-to-target", + "--session-type", "adept", + "--api_endpoint", "http://127.0.0.1:8080", + "--loglevel", "DEBUG", + "--logfile-path", "${workspaceFolder}/results/low_baseline/output.log", + "--save-input-output-to-path", "${workspaceFolder}/low_baseline/baseline/input-output.json", + "--save-alignment-score-to-path", "${workspaceFolder}/low_baseline/baseline/output-scores.json", + "--training-session" + ], + "env": { + "CUDA_VISIBLE_DEVICES": "3" + } + } + ] +} \ No newline at end of file diff --git a/adm_configs/single_kdma_adm_config_high.yml b/adm_configs/single_kdma_adm_config_high.yml new file mode 100644 index 00000000..646c27c2 --- /dev/null +++ b/adm_configs/single_kdma_adm_config_high.yml @@ -0,0 +1,17 @@ +adm: + name: 'SingleKDMAADM' + init_kwargs: + hf_model: meta-llama/Llama-2-7b-chat-hf + precision: half + temperature: 0.7 + + inference_kwargs: + baseline: false + n_negative_samples: 0 + n_positive_samples: 1 + shuffle: true + +alignment_target_override: + id: ADEPT-metrics_eval-alignment-target-train-HIGH + kdma_values: + - {kdma: MoralDesert, value: 1} diff --git a/adm_configs/single_kdma_adm_config.yml b/adm_configs/single_kdma_adm_config_high_baseline.yml similarity index 100% rename from adm_configs/single_kdma_adm_config.yml rename to adm_configs/single_kdma_adm_config_high_baseline.yml diff --git a/adm_configs/single_kdma_adm_config_high_incontext.yml b/adm_configs/single_kdma_adm_config_high_incontext.yml new file mode 100644 index 00000000..51c9762b --- /dev/null +++ b/adm_configs/single_kdma_adm_config_high_incontext.yml @@ -0,0 +1,22 @@ +adm: + name: 'SingleKDMAADM' + init_kwargs: + hf_model: meta-llama/Llama-2-7b-chat-hf + precision: half + temperature: 0.7 + + inference_kwargs: + baseline: false + n_negative_samples: 0 + n_positive_samples: 1 + shuffle: true + incontext: + number: 5 + method: bert_similarity + # dataset: ../datasets/metrics-eval/bbn/metrics-eval-train-renamed.json + dataset: /data/shared/samba/integrated_results_metrics_eval/captured_dataset_for_chris/baseline_adept_high-1715105775-input-output.json + +alignment_target_override: + id: ADEPT-metrics_eval-alignment-target-train-HIGH + kdma_values: + - {kdma: MoralDesert, value: 1} diff --git a/adm_configs/single_kdma_adm_config_low.yml b/adm_configs/single_kdma_adm_config_low.yml new file mode 100644 index 00000000..70a9d648 --- /dev/null +++ b/adm_configs/single_kdma_adm_config_low.yml @@ -0,0 +1,17 @@ +adm: + name: 'SingleKDMAADM' + init_kwargs: + hf_model: meta-llama/Llama-2-7b-chat-hf + precision: half + temperature: 0.7 + + inference_kwargs: + baseline: false + n_negative_samples: 0 + n_positive_samples: 1 + shuffle: true + +alignment_target_override: + id: ADEPT-metrics_eval-alignment-target-train-LOW + kdma_values: + - {kdma: MoralDesert, value: 0} diff --git a/adm_configs/single_kdma_adm_config_low_baseline.yml b/adm_configs/single_kdma_adm_config_low_baseline.yml new file mode 100644 index 00000000..55fd28e2 --- /dev/null +++ b/adm_configs/single_kdma_adm_config_low_baseline.yml @@ -0,0 +1,17 @@ +adm: + name: 'SingleKDMAADM' + init_kwargs: + hf_model: meta-llama/Llama-2-7b-chat-hf + precision: half + temperature: 0.7 + + inference_kwargs: + baseline: true + n_negative_samples: 0 + n_positive_samples: 1 + shuffle: true + +alignment_target_override: + id: ADEPT-metrics_eval-alignment-target-train-LOW + kdma_values: + - {kdma: MoralDesert, value: 0} diff --git a/adm_configs/single_kdma_adm_config_low_incontext.yml b/adm_configs/single_kdma_adm_config_low_incontext.yml new file mode 100644 index 00000000..a23452cb --- /dev/null +++ b/adm_configs/single_kdma_adm_config_low_incontext.yml @@ -0,0 +1,22 @@ +adm: + name: 'SingleKDMAADM' + init_kwargs: + hf_model: meta-llama/Llama-2-7b-chat-hf + precision: half + temperature: 0.7 + + inference_kwargs: + baseline: false + n_negative_samples: 0 + n_positive_samples: 1 + shuffle: true + incontext: + number: 5 + method: bert_similarity + # dataset: ../datasets/metrics-eval/bbn/metrics-eval-train-renamed.json + dataset: /data/shared/samba/integrated_results_metrics_eval/captured_dataset_for_chris/baseline_adept_high-1715105775-input-output.json + +alignment_target_override: + id: ADEPT-metrics_eval-alignment-target-train-LOW + kdma_values: + - {kdma: MoralDesert, value: 0} diff --git a/align_system/algorithms/llama_2_single_kdma_adm.py b/align_system/algorithms/llama_2_single_kdma_adm.py index 22082fb4..deb85166 100644 --- a/align_system/algorithms/llama_2_single_kdma_adm.py +++ b/align_system/algorithms/llama_2_single_kdma_adm.py @@ -3,7 +3,8 @@ import random import os import pathlib -from align_system.algorithms.abstracts import AlignedDecisionMaker +import random +from functools import reduce from jinja2.exceptions import TemplateError @@ -13,8 +14,7 @@ import numpy as np from align_system.utils import logging - - +from align_system.algorithms.abstracts import AlignedDecisionMaker from align_system.similarity_measures import build_force_choice_func @@ -115,6 +115,7 @@ def __init__(self, device='cuda', hf_model='meta-llama/Llama-2-7b-chat-hf', prec self.temperature = temperature self.do_sample = do_sample self.chat_template = kwargs.get('chat_template', None) + self.dataset = [] assert precision in ['full', 'half'], "precision must be either 'full' or 'half'." self.precision = torch.float32 if precision == 'full' else torch.float16 @@ -126,11 +127,11 @@ def __init__(self, device='cuda', hf_model='meta-llama/Llama-2-7b-chat-hf', prec def load_model(self, model=None, tokenizer=None): assert (model is None) == (tokenizer is None), "model and tokenizer must both be None or both be not None." if model is not None: - print('Loading model and tokenizer from provided objects.') + log.info('Loading model and tokenizer from provided objects.') self.model = model self.tokenizer = tokenizer else: - print('Loading model:', self.hf_model) + log.info('Loading model: %s', self.hf_model) if self.device == 'auto': self.model = AutoModelForCausalLM.from_pretrained(self.hf_model, torch_dtype=self.precision, device_map='auto') else: @@ -214,40 +215,76 @@ def chat_prompt_tokens(self, dialogs, return_tensor=True): def build_multiple_choice_dialog(self, - question, - options, - system_message=None, - json_format=STANDARD_MULTIPLE_CHOICE_JSON_FORMAT): + question, + options, + incontext=None, + system_message=None, + json_format=STANDARD_MULTIPLE_CHOICE_JSON_FORMAT): + """ + Constructs a structured dialog for a multiple-choice question scenario, typically used in systems + that require a step-by-step reasoning response in a controlled format such as JSON. + + Parameters: + question (str): The question to be answered, forming the basis of the multiple-choice scenario. + options (list of str): The multiple-choice options provided as answers to the question. + incontext (dict, optional): Additional context that might be relevant to how the system message + or user response is formed. Defaults to None. + system_message (str, optional): A predefined system message that guides the assistant on how to + respond. If None, a default message tailored for medical triage scenarios is used. + json_format (str): The JSON format that the user response should adhere to, providing structural + guidance on how the response should be formatted. + + Returns: + list of dict: A list where each dictionary represents a part of the dialog, with 'role' specifying + whether the part is from the 'system' or the 'user', and 'content' containing the text for that role. + + Notes: + The function defaults to using a specialized system message for medical triage unless a specific + message is provided. This default message instructs the assistant on how to approach and format + the response to the question in a methodical and informative manner. + """ + # Define a default system message for medical triage if none is provided medical_triage_system_message = ( "You are an assistant specialized in answering multiple-choice questions related to " "medical triage. When a question is posed, carefully analyze the symptoms or conditions " "described. Respond by providing a detailed reasoning using a step-by-step process or " "process of elimination. Conclude with the final answer, represented by the corresponding " "index number. Your response should be grounded in established medical knowledge and aim " - "to be informative. Please format your response as a JSON object in the following structure:\\n\\n" + "to be informative. Please format your response as a JSON object in the following structure:\n\n" f"{json_format}" "Ensure that you adhere to proper JSON syntax, and carefully note the index that corresponds to each answer." ) if system_message is None: system_message = medical_triage_system_message + # Format the multiple choice options for display formatted_options = [f'({i}) {option}' for i, option in enumerate(options)] - content = f'{question} {formatted_options}' + content = f'{question} {" ".join(formatted_options)}' + if incontext: + dialog = list(reduce(lambda x, y: x + y, incontext, [])) + else: + dialog = [] - dialog = [ + # Construct the dialog with system and user parts + + s_message = [ { "role": "system", "content": system_message - }, + } + ] + u_message = [ { "role": "user", "content": content } ] + dialog = s_message + dialog + u_message return dialog + def log_dialog(self, dialog): for e in dialog: if e.get('role') == 'system': @@ -284,7 +321,7 @@ def respond_to_dialog(self, dialog, prefix=None): else: new_dialog.append(message) dialog = new_dialog - print('INPUT\n', dialog) + log.info('INPUT\n %s', dialog) prompt_tokens = [self.tokenizer.apply_chat_template(dialog, tokenize=True)] inference_pair['input'] = self.tokenizer.apply_chat_template(dialog, tokenize=False) @@ -306,7 +343,7 @@ def respond_to_dialog(self, dialog, prefix=None): temperature=self.temperature, do_sample=self.do_sample) - # Print the generated model output + # log.info the generated model output generated_output = self.tokenizer.decode(outputs.sequences[0][prompt_length:]) inference_pair['output'] = generated_output @@ -374,20 +411,54 @@ def respond_to_dialogs_batched(self, dialogs, prefixes=None): return generated_outputs - def aligned_decision_maker(self, question, choices, target_kdmas, n_positive_samples=5, n_negative_sampels=5, shuffle=True, baseline=False, n_retries=3): + def aligned_decision_maker(self, question, choices, target_kdmas, incontext=None, n_positive_samples=5, n_negative_sampels=5, shuffle=True, baseline=False, n_retries=3): + """ Executes a decision-making process by simulating a dialog based on positive and negative alignments with specified Knowledge Domain Model Attributes (KDMAs). + It attempts to identify the choice that best aligns with the target attributes, using both positive and negative samples to provide robustness against biases. + + Parameters: + question (str): The primary question posed to the decision-making system. + choices (list of str): A list of choices from which the system must select the most appropriate based on KDAMs. + target_kdmas (dict): Key-value pairs indicating the target KDMAs and their desired levels. Values indicate desired thresholds for alignment. + incontext (dict, optional): Additional context provided to the decision-making system, which may affect its responses. + n_positive_samples (int): Number of samples to process assuming positive alignment with the target KDMAs. + n_negative_samples (int): Number of samples to process assuming negative or inverse alignment with the target KDMAs. + shuffle (bool): If True, shuffle the choices to potentially reduce positional bias in the decision-making process. + baseline (bool): If True, use a baseline decision-making model that does not consider specific KDMAs. + n_retries (int): The number of retry attempts to parse a successful response from the decision-making process. + + Returns: + tuple: + responses (list): A list of dictionaries where each dictionary contains the response from the decision-making system, the reasoning behind it, and the index of the chosen answer. + inference_pairs (list): A list of dictionaries capturing detailed information about each inference attempt for analysis and debugging. + + Raises: + RuntimeError: If any specified KDAMs in `target_kdmas` are not supported by the system. + + Notes: + This function leverages logging to trace both aligned and misaligned dialogs, only the first of each type is logged for brevity. + """ + inference_pairs = [] + + + # Check if baseline is not used and handle unsupported KDMAs if not baseline: unsupported_kdmas = {kdma_remapping.get(k, k) for k in target_kdmas.keys()} - kdmas if len(unsupported_kdmas) > 0: raise RuntimeError(f"KDMA(s) {unsupported_kdmas} not supported.") + + # Prefix for logging reasoning prefix = '{"Reasoning": "Because' responses = [] + # Flags to ensure we log certain types of dialog once logged_aligned_dialog = False logged_inverse_misaligned_dialog = False + + # Generate responses for positive samples for _ in range(n_positive_samples): if baseline: system_message = load_system_message() @@ -398,22 +469,27 @@ def aligned_decision_maker(self, question, choices, target_kdmas, n_positive_sam for kdma, value in target_kdmas.items()} system_message = load_system_message(system_message_keys) - indecies = list(range(len(choices))) + # Shuffle choices if required + indices = list(range(len(choices))) if shuffle: - random.shuffle(indecies) - shuffled_choices = [choices[i] for i in indecies] + random.shuffle(indices) + shuffled_choices = [choices[i] for i in indices] + # Build dialog with the system message and shuffled choices dialog = self.build_multiple_choice_dialog( question, shuffled_choices, - system_message=system_message) + system_message=system_message, + incontext=incontext) + # Log aligned dialog once for clarity if not logged_aligned_dialog: log.debug("[bold]*ALIGNED DIALOG*[/bold]", extra={"markup": True}) self.log_dialog(dialog) logged_aligned_dialog = True + # Attempt to parse a valid response multiple times good_parse = False for i in range(n_retries): high_response, inference_pair = self.respond_to_dialog(dialog, prefix=prefix) @@ -425,42 +501,48 @@ def aligned_decision_maker(self, question, choices, target_kdmas, n_positive_sam except RuntimeError as e: pass + # Fallback parsing strategy if normal parsing fails if not good_parse: reasoning, answer_idx, parse_method = Llama2SingleKDMAADM.bert_similarity_parse(high_response, shuffled_choices) - print('CHOSEN ANSWER IDX', answer_idx, shuffled_choices) + # Ensure an answer was parsed successfully + log.explain('CHOSEN ANSWER IDX %s %s', answer_idx, shuffled_choices) assert answer_idx is not None, f'Failed to parse answer index from generated output: {low_response}' + # Store response details responses.append({ 'response': high_response, 'reasoning': reasoning, 'answer_idx': answer_idx, - 'shuffle_indecies': indecies, + 'shuffle_indices': indices, 'alignment': system_message_keys, 'aligned': True, 'parse_method': parse_method, }) - + # Repeat process for negative samples with inverse KDAM logic for _ in range(n_negative_sampels): system_message_keys = {kdma: 'high' if not value > 5 else 'low' for kdma, value in target_kdmas.items()} - indecies = list(range(len(choices))) + indices = list(range(len(choices))) if shuffle: - random.shuffle(indecies) - shuffled_choices = [choices[i] for i in indecies] + random.shuffle(indices) + shuffled_choices = [choices[i] for i in indices] + # Build dialog with inverse logic inverse_misaligned_dialog = self.build_multiple_choice_dialog( question, shuffled_choices, system_message=load_system_message(system_message_keys)) + # Log the first occurrence of an inverse misaligned dialog if not logged_inverse_misaligned_dialog: log.debug("[bold]*INVERSE MISALIGNED DIALOG*[/bold]", extra={"markup": True}) self.log_dialog(inverse_misaligned_dialog) logged_inverse_misaligned_dialog = True + # Attempt response parsing with retries good_parse = False for i in range(n_retries): low_response, inference_pair = self.respond_to_dialog(inverse_misaligned_dialog, prefix=prefix) @@ -472,16 +554,18 @@ def aligned_decision_maker(self, question, choices, target_kdmas, n_positive_sam except RuntimeError as e: pass + # Fallback parsing strategy if normal parsing fails if not good_parse: reasoning, answer_idx, parse_method = Llama2SingleKDMAADM.bert_similarity_parse(low_response, shuffled_choices) assert answer_idx is not None, f'Failed to parse answer index from generated output: {low_response}' + # Store response details responses.append({ 'response': low_response, 'reasoning': reasoning, 'answer_idx': answer_idx, - 'shuffle_indecies': indecies, + 'shuffle_indices': indices, 'alignment': system_message_keys, 'aligned': False, 'parse_method': parse_method, @@ -492,6 +576,23 @@ def aligned_decision_maker(self, question, choices, target_kdmas, n_positive_sam @staticmethod def calculate_votes(responses, choices): + """ + Calculates voting scores for each choice based on a list of responses. Responses that align with the desired outcome increase the score of the selected choice. Misaligned responses distribute a penalty among other choices. + + Parameters: + responses (list of dicts): Each dictionary contains information about a single response, including: + - 'answer_idx' (int or str): The index of the chosen answer. + - 'shuffle_indices' (list of int, optional): If present, it represents the original indices of the choices after shuffling. + - 'aligned' (bool): Indicates whether the response is aligned (True) or misaligned (False) with the desired outcome. + choices (list of str): A list of choices available for voting. + + Returns: + list of float: A list of normalized vote scores for each choice, where higher scores represent greater alignment with the desired outcome. + + Notes: + - The function handles cases where 'answer_idx' may not be an integer or could be out of the valid range of choices. + - Scores are adjusted by the minimum score to ensure all are non-negative and are then normalized to sum to 1. + """ choice_votes = [0] * len(choices) for response in responses: answer_idx = response['answer_idx'] @@ -506,8 +607,8 @@ def calculate_votes(responses, choices): if answer_idx >= len(choices): continue - if 'shuffle_indecies' in response: - answer_idx = response['shuffle_indecies'][int(answer_idx)] + if 'shuffle_indices' in response: + answer_idx = response['shuffle_indices'][int(answer_idx)] aligned = response['aligned'] @@ -600,10 +701,10 @@ def parse_generated_output(generated_output, n_choices): @staticmethod def bert_similarity_parse(generated_output, choices): - print('BERT SIMILARITY PARSE') + log.info('BERT SIMILARITY PARSE') force_choice_func = build_force_choice_func('bert') answer_idx, _ = force_choice_func(generated_output, choices) - print('ANSWER IDX', answer_idx, type(answer_idx)) + log.info('ANSWER IDX %s %s', answer_idx, type(answer_idx)) return generated_output, answer_idx, 'bert_similarity' @staticmethod @@ -714,11 +815,50 @@ def correct_json(self, invalid_json, verbose=True): return None def run_aligned_decision_maker_with_voting( - self, prompt, choices, alignment_target, n_positive_samples=5, n_negative_samples=5, baseline=False, shuffle=False): + self, + prompt, + choices, + alignment_target, + incontext= None, + n_positive_samples=5, + n_negative_samples=5, + baseline=False, + shuffle=False): + """ Executes a decision-making process with voting based on alignment targets and user-provided choices. + This method incorporates a mechanism for evaluating the alignment of choices with a specified target + using a set of positive and negative samples. + + Parameters: + prompt (str): The input prompt to which the decision-making model responds. + choices (list): A list of possible choices for the decision-maker to evaluate. + alignment_target (str): A target alignment criterion that guides the decision-making process. + incontext (list[dict], optional): Additional contextual information to provide to the model. Defaults to None. + n_positive_samples (int): Number of positive samples to use for aligning the choices with the target. Defaults to 5. + n_negative_samples (int): Number of negative samples to use for the alignment evaluation. Defaults to 5. + baseline (bool): Flag to determine whether to use a baseline model for comparison. Defaults to False. + shuffle (bool): Option to shuffle the choices before processing. This can help in reducing bias. Defaults to False. + + Returns: + tuple: A tuple containing: + - reasoning (str or None): The reasoning behind the selected choice, if available. + - answer_idx (int): The index of the choice selected as most aligned. + - responses (list): Detailed responses from the model for each choice. + - inference_pairs (list): Raw data pairs used in the inference process. + + Raises: + Exception: Captures and logs any exception that occurs during the vote calculation, defaulting choice scores to None if an error occurs. + + Notes: + This method leverages internal logging to trace the detailed responses and the computation of choice scores. + It is essential to ensure proper initialization of the logging and handling mechanisms to capture and utilize + the detailed debug outputs effectively. + + """ responses, inference_pairs = self.aligned_decision_maker( prompt, choices, alignment_target, + incontext=incontext, baseline=baseline, n_positive_samples=n_positive_samples, n_negative_sampels=n_negative_samples, @@ -752,46 +892,177 @@ def run_aligned_decision_maker_with_voting( for r in responses: assert r['answer_idx'] is not None - assert int(r['answer_idx']) < len(r['shuffle_indecies']) + assert int(r['answer_idx']) < len(r['shuffle_indices']) - if r['shuffle_indecies'][int(r['answer_idx'])] == answer_idx: + if r['shuffle_indices'][int(r['answer_idx'])] == answer_idx: reasoning = r['reasoning'] break return reasoning, answer_idx, responses, inference_pairs + def format_single_incontext_prompt(self, sample, labels, target_kdma_values): + """ + Formats an in-context prompt for use in a dialogue system where an assistant provides reasoned answers based on provided scores and target values. + + Parameters: + sample (dict): A dictionary containing the scenario description and choices. + - 'prompt' (str): The main question or scenario description. + - 'choices' (list of dicts): Possible choices for the scenario. + - Each choice is a dictionary with an 'unstructured' key containing the choice text. + labels (list of dicts): A list where each dictionary contains scores associated with each choice. + target_kdma_values (dict): A dictionary mapping a target attribute to its desired value. + + Returns: + list of dicts: A list containing structured data for the user and assistant roles, formatted for a dialogue interaction. + - Each dictionary contains 'role' (str) indicating if it's 'user' or 'assistant' and 'content' (str) which is the text associated with that role. + + Notes: + This function assumes there is exactly one key in `target_kdma_values`. + The assistant's answer includes reasoning why a particular choice is preferred based on the minimal distance to the target value in `labels`. + """ + # Mapping of target attributes to their corresponding score keys + kdma_name_map = { + 'moral_deservingness': 'MoralDesert', + 'maximization': 'maximization', + } + + # Extract the main question from the sample + question = sample['prompt'] + + # Format choices as enumerated options for display + formatted_options = [f'({i}) {option["unstructured"]}' for i, option in enumerate(sample['choices'])] + content = f"{question} {' '.join(formatted_options)}" + + # Extract the target attribute (assuming there's only one key in target_kdma_values) + target = next(iter(target_kdma_values)) + + # Calculate the distance of each choice's score to the target value + dist_to_target = [] + for score in labels: + if kdma_name_map[target] in score: + # Multiply by 10 to match the rest of the KDMA's score range + dist = abs(score[kdma_name_map[target]] * 10 - target_kdma_values[target]) + else: + dist = float('inf') # If the target attribute is not in the scores, assign an infinite distance + dist_to_target.append(dist) + + # Determine the index of the choice with the minimum distance to the target value + correct_answer_idx = np.argmin(dist_to_target) + + # Map the target attribute to its descriptive wording + target_wording_mapping = { + "moral_deservingness": "morally deserving choice.", + "maximization": "maximization choice." + } + + # Determine the adjective based on the value being higher or lower than a threshold + adjective = 'more' if target_kdma_values[target] > 5 else 'less' + + # Construct the reasoning text using the correct choice and its descriptor + answer_text = f'Because {sample["choices"][correct_answer_idx]["unstructured"]} is the {adjective} {target_wording_mapping.get(target, "specified attribute")}' + answer = f'{{"Reasoning": "{answer_text}", "answer": {correct_answer_idx}}}' + + # Structure the dialog with user and assistant roles + prompt = [ + {"role": "user", "content": content}, + {"role": "assistant", "content": answer} + ] + + return prompt + def __call__(self, sample, target_kdma_values, **kwargs): + """ + Build the prompt and send it to the LLM to ask for a single KDMA (Key Decision-Making Attribute). + + Parameters: + sample (dict): A dictionary containing the scenario, state, probe, and choices. + - 'scenario' (str): The main scenario description. + - 'state' (str, optional): Additional state information to append to the scenario. + - 'probe' (str): The specific question or probe to be answered. + - 'choices' (list of str): Possible choices for the scenario. + target_kdma_values (dict): A dictionary mapping a target attribute to its desired value. + kwargs (dict): Additional keyword arguments for in-context learning, retrievers, labels, etc. + - 'incontext' (dict, optional): Configuration for in-context learning. + - 'dataset' (str): Path to the in-context dataset. + - 'number' (int): Number of in-context samples to use. + - 'method' (str): Method to select in-context samples ('random' or 'bert_similarity'). + - 'labels' (list of dicts, optional): A list where each dictionary contains scores associated with each choice. + - 'n_positive_samples' (int, optional): Number of positive samples for decision making. + - 'n_negative_samples' (int, optional): Number of negative samples for decision making. + - 'baseline' (bool, optional): Whether to use a baseline approach. + - 'shuffle' (bool, optional): Whether to shuffle the choices. + + Returns: + dict: A dictionary containing the selected choice and additional information. + - 'choice' (int): The index of the selected choice. + - 'info' (dict): Additional information including reasoning, responses, and raw data. + """ prompt = sample['scenario'] if sample['state'] is not None: prompt += f'\n{sample["state"]}' - if 'retriever' in kwargs: - # retriever_prompt = "How would you treat the following injuries: {}".format(prompt) - retriever_prompt = "{} {}".format(prompt, sample['probe']) + incontext_prompts = [] - retriever = kwargs['retriever'] - retrieved_nodes = retriever.retrieve(retriever_prompt) + if 'incontext' in kwargs: + possible_samples = [] - if 'summarizer' in kwargs: - summarizer = kwargs['summarizer'] - summary = summarizer.synthesize(retriever_prompt, nodes=retrieved_nodes) + # Read dataset + with open(kwargs['incontext']['dataset']) as f: + dataset = json.load(f) - log.explain("[bold] ** Retrieval Summary ** [/bold]", - extra={"markup": True}) - log.explain(summary) + # Populate possible samples from the dataset + for sam in dataset: + possible_samples.append(sam) + + if len(possible_samples) < kwargs['incontext']['number']: + raise RuntimeError(f'Not enough possible in-context samples to learn from. Only {len(possible_samples)} samples available while asking for {kwargs["incontext"]["number"]} in-context samples.') + + if kwargs['incontext']['method'] == 'random': + chosen_sample = random.sample(possible_samples, kwargs['incontext']['number']) + elif kwargs['incontext']['method'] == 'bert_similarity': + # Extract strings for each situation + possible_samples_parse = [s['input']['prompt'] for s in possible_samples] - prompt += "\n#############\n{}\n#############".format(summary) + # Create similarity scores between the in-context dataset and find top-k indices + from bert_score import score + _, _, F1 = score([prompt]*len(possible_samples_parse), possible_samples_parse, lang='en') + _, indices = torch.topk(F1, kwargs['incontext']['number']) + # Make list of the top k for creating prompts + chosen_sample = [possible_samples[i] for i in indices] else: - prompt += "\n#############\n{}\n#############".format( - "\n#############\n".join((n.text for n in retrieved_nodes))) + raise ValueError(f'"{kwargs["incontext"]["method"]}" is not a valid in-context method. Please use "random" or "bert_similarity".') - prompt += f'\nGiven the scenario and documentation above.. {sample["probe"]}' - else: - prompt += f'\n{sample["probe"]}' + # Create in-context prompts + for cs in chosen_sample: + incontext_prompts.append(self.format_single_incontext_prompt(cs['input'], cs['label'], target_kdma_values)) - choices = sample['choices'] + # if 'retriever' in kwargs: + # # retriever_prompt = "How would you treat the following injuries: {}".format(prompt) + # retriever_prompt = "{} {}".format(prompt, sample['probe']) + + # retriever = kwargs['retriever'] + # retrieved_nodes = retriever.retrieve(retriever_prompt) + + # if 'summarizer' in kwargs: + # summarizer = kwargs['summarizer'] + # summary = summarizer.synthesize(retriever_prompt, nodes=retrieved_nodes) + + # log.explain("[bold] ** Retrieval Summary ** [/bold]", + # extra={"markup": True}) + # log.explain(summary) + # prompt += "\n#############\n{}\n#############".format(summary) + + # else: + # prompt += "\n#############\n{}\n#############".format( + # "\n#############\n".join((n.text for n in retrieved_nodes))) + + # prompt += f'\nGiven the scenario and documentation above.. {sample["probe"]}' + # else: + + prompt += f'\n{sample["probe"]}' + choices = sample['choices'] labels = kwargs.get('labels', {}) alignment_target = None @@ -809,6 +1080,7 @@ def __call__(self, sample, target_kdma_values, **kwargs): prompt, choices, alignment_target, + incontext=incontext_prompts, n_positive_samples=kwargs.get('n_positive_samples', 5), n_negative_samples=kwargs.get('n_negative_samples', 5), baseline=kwargs.get('baseline', False), @@ -1042,7 +1314,7 @@ def populate_tagging_parameters(self, scenario_state, tagging_action, alignment_ parsed_tagging_output = self.attempt_generic_parse( # noqa raw_tagging_response, ['Reasoning', 'Answer', 'Tag']) # noqa - + if parsed_tagging_output is not None: if len(untagged_characters) == 1: log.debug("** Force selecting only available character") diff --git a/align_system/evaluation/adm_evaluator.py b/align_system/evaluation/adm_evaluator.py index ff7eda9f..4a742e4f 100644 --- a/align_system/evaluation/adm_evaluator.py +++ b/align_system/evaluation/adm_evaluator.py @@ -11,7 +11,7 @@ def generate_outputs(dataset, adm, target_kdma_values, **kwargs): }) continue - outputs.append(adm(input_, target_kdma_values, labels=label, **kwargs)) + outputs.append(adm(input_, target_kdma_values, labels=label, dataset=dataset, **kwargs)) return outputs