diff --git a/.gitignore b/.gitignore
index 70a21c83..56492e38 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 run.bash
+results/*
 
 venv/
 __pycache__/
\ No newline at end of file
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 00000000..3b2439cb
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,146 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "High Incontext Adept",
+            "type": "debugpy",
+            "request": "launch",
+            "console": "integratedTerminal",
+            "module": "align_system.cli.run_align_system",
+            "args": [
+                    "TA3ActionBased",
+                    "--adm-config", "adm_configs/single_kdma_adm_config_high_incontext.yml",
+                    "--username", "kitware-single-kdma-adm-aligned-no-negatives",
+                    "--align-to-target",
+                    "--session-type", "adept",
+                    "--api_endpoint", "http://127.0.0.1:8080",
+                    "--loglevel", "DEBUG",
+                    "--logfile-path", "${workspaceFolder}/results/high_incontext/output.log",
+                    "--save-input-output-to-path", "${workspaceFolder}/results/high_incontext/input-output.json",
+                    "--save-alignment-score-to-path", "${workspaceFolder}/results/high_incontext/output-scores.json",
+                    "--training-session"
+                    ],
+            "env":  {
+                    "CUDA_VISIBLE_DEVICES": "1"
+                    }
+        },
+        {
+            "name": "Low Incontext Adept",
+            "type": "debugpy",
+            "request": "launch",
+            "console": "integratedTerminal",
+            "module": "align_system.cli.run_align_system",
+            "args": [
+                    "TA3ActionBased",
+                    "--adm-config", "adm_configs/single_kdma_adm_config_low_incontext.yml",
+                    "--username", "kitware-single-kdma-adm-aligned-no-negatives",
+                    "--align-to-target",
+                    "--session-type", "adept",
+                    "--api_endpoint", "http://127.0.0.1:8080",
+                    "--loglevel", "DEBUG",
+                    "--logfile-path", "${workspaceFolder}/results/low_incontext/output.log",
+                    "--save-input-output-to-path", "${workspaceFolder}/results/low_incontext/input-output.json",
+                    "--save-alignment-score-to-path", "${workspaceFolder}/results/low_incontext/output-scores.json",
+                    "--training-session"
+                    ],
+            "env":  {
+                    "CUDA_VISIBLE_DEVICES": "1"
+                    }
+        },
+        {
+            "name": "High Adept",
+            "type": "debugpy",
+            "request": "launch",
+            "console": "integratedTerminal",
+            "module": "align_system.cli.run_align_system",
+            "args": [
+                    "TA3ActionBased",
+                    "--adm-config", "adm_configs/single_kdma_adm_config_high.yml",
+                    "--username", "kitware-single-kdma-adm-aligned-no-negatives",
+                    "--align-to-target",
+                    "--session-type", "adept",
+                    "--api_endpoint", "http://127.0.0.1:8080",
+                    "--loglevel", "DEBUG",
+                    "--logfile-path", "${workspaceFolder}/results/high/output.log",
+                    "--save-input-output-to-path", "${workspaceFolder}/results/high/input-output.json",
+                    "--save-alignment-score-to-path", "${workspaceFolder}/results/high/output-scores.json",
+                    "--training-session"
+                    ],
+            "env":  {
+                    "CUDA_VISIBLE_DEVICES": "2"
+                    }
+        },
+        {
+            "name": "Low Adept",
+            "type": "debugpy",
+            "request": "launch",
+            "console": "integratedTerminal",
+            "module": "align_system.cli.run_align_system",
+            "args": [
+                    "TA3ActionBased",
+                    "--adm-config", "adm_configs/single_kdma_adm_config_low.yml",
+                    "--username", "kitware-single-kdma-adm-aligned-no-negatives",
+                    "--align-to-target",
+                    "--session-type", "adept",
+                    "--api_endpoint", "http://127.0.0.1:8080",
+                    "--loglevel", "DEBUG",
+                    "--logfile-path", "${workspaceFolder}/results/low/output.log",
+                    "--save-input-output-to-path", "${workspaceFolder}/results/low/input-output.json",
+                    "--save-alignment-score-to-path", "${workspaceFolder}/results/low/output-scores.json",
+                    "--training-session"
+                    ],
+            "env":  {
+                    "CUDA_VISIBLE_DEVICES": "3"
+                    }
+        },
+        {
+            "name": "High Baseline Adept",
+            "type": "debugpy",
+            "request": "launch",
+            "console": "integratedTerminal",
+            "module": "align_system.cli.run_align_system",
+            "args": [
+                    "TA3ActionBased",
+                    "--adm-config", "adm_configs/single_kdma_adm_config_high_baseline.yml",
+                    "--username", "kitware-single-kdma-adm-aligned-no-negatives",
+                    "--align-to-target",
+                    "--session-type", "adept",
+                    "--api_endpoint", "http://127.0.0.1:8080",
+                    "--loglevel", "DEBUG",
+                    "--logfile-path", "${workspaceFolder}/results/high_baseline/output.log",
+                    "--save-input-output-to-path", "${workspaceFolder}/results/high_baseline/input-output.json",
+                    "--save-alignment-score-to-path", "${workspaceFolder}/results/high_baseline/output-scores.json",
+                    "--training-session"
+                    ],
+            "env":  {
+                    "CUDA_VISIBLE_DEVICES": "3"
+                    }
+        },
+        {
+            "name": "Low Baseline Adept",
+            "type": "debugpy",
+            "request": "launch",
+            "console": "integratedTerminal",
+            "module": "align_system.cli.run_align_system",
+            "args": [
+                    "TA3ActionBased",
+                    "--adm-config", "adm_configs/single_kdma_adm_config_low_baseline.yml",
+                    "--username", "kitware-single-kdma-adm-aligned-no-negatives",
+                    "--align-to-target",
+                    "--session-type", "adept",
+                    "--api_endpoint", "http://127.0.0.1:8080",
+                    "--loglevel", "DEBUG",
+                    "--logfile-path", "${workspaceFolder}/results/low_baseline/output.log",
+                    "--save-input-output-to-path", "${workspaceFolder}/low_baseline/baseline/input-output.json",
+                    "--save-alignment-score-to-path", "${workspaceFolder}/low_baseline/baseline/output-scores.json",
+                    "--training-session"
+                    ],
+            "env":  {
+                    "CUDA_VISIBLE_DEVICES": "3"
+                    }
+        }
+    ]
+}
\ No newline at end of file
diff --git a/adm_configs/single_kdma_adm_config_high.yml b/adm_configs/single_kdma_adm_config_high.yml
new file mode 100644
index 00000000..646c27c2
--- /dev/null
+++ b/adm_configs/single_kdma_adm_config_high.yml
@@ -0,0 +1,17 @@
+adm:
+  name: 'SingleKDMAADM'
+  init_kwargs:
+    hf_model: meta-llama/Llama-2-7b-chat-hf
+    precision: half
+    temperature: 0.7
+
+  inference_kwargs:
+    baseline: false
+    n_negative_samples: 0
+    n_positive_samples: 1
+    shuffle: true
+
+alignment_target_override:
+  id: ADEPT-metrics_eval-alignment-target-train-HIGH
+  kdma_values:
+    - {kdma: MoralDesert, value: 1}
diff --git a/adm_configs/single_kdma_adm_config.yml b/adm_configs/single_kdma_adm_config_high_baseline.yml
similarity index 100%
rename from adm_configs/single_kdma_adm_config.yml
rename to adm_configs/single_kdma_adm_config_high_baseline.yml
diff --git a/adm_configs/single_kdma_adm_config_high_incontext.yml b/adm_configs/single_kdma_adm_config_high_incontext.yml
new file mode 100644
index 00000000..51c9762b
--- /dev/null
+++ b/adm_configs/single_kdma_adm_config_high_incontext.yml
@@ -0,0 +1,22 @@
+adm:
+  name: 'SingleKDMAADM'
+  init_kwargs:
+    hf_model: meta-llama/Llama-2-7b-chat-hf
+    precision: half
+    temperature: 0.7
+
+  inference_kwargs:
+    baseline: false
+    n_negative_samples: 0
+    n_positive_samples: 1
+    shuffle: true
+    incontext: 
+      number: 5
+      method: bert_similarity
+      # dataset: ../datasets/metrics-eval/bbn/metrics-eval-train-renamed.json
+      dataset: /data/shared/samba/integrated_results_metrics_eval/captured_dataset_for_chris/baseline_adept_high-1715105775-input-output.json
+
+alignment_target_override:
+  id: ADEPT-metrics_eval-alignment-target-train-HIGH
+  kdma_values:
+    - {kdma: MoralDesert, value: 1}
diff --git a/adm_configs/single_kdma_adm_config_low.yml b/adm_configs/single_kdma_adm_config_low.yml
new file mode 100644
index 00000000..70a9d648
--- /dev/null
+++ b/adm_configs/single_kdma_adm_config_low.yml
@@ -0,0 +1,17 @@
+adm:
+  name: 'SingleKDMAADM'
+  init_kwargs:
+    hf_model: meta-llama/Llama-2-7b-chat-hf
+    precision: half
+    temperature: 0.7
+
+  inference_kwargs:
+    baseline: false
+    n_negative_samples: 0
+    n_positive_samples: 1
+    shuffle: true
+
+alignment_target_override:
+  id: ADEPT-metrics_eval-alignment-target-train-LOW
+  kdma_values:
+    - {kdma: MoralDesert, value: 0}
diff --git a/adm_configs/single_kdma_adm_config_low_baseline.yml b/adm_configs/single_kdma_adm_config_low_baseline.yml
new file mode 100644
index 00000000..55fd28e2
--- /dev/null
+++ b/adm_configs/single_kdma_adm_config_low_baseline.yml
@@ -0,0 +1,17 @@
+adm:
+  name: 'SingleKDMAADM'
+  init_kwargs:
+    hf_model: meta-llama/Llama-2-7b-chat-hf
+    precision: half
+    temperature: 0.7
+
+  inference_kwargs:
+    baseline: true
+    n_negative_samples: 0
+    n_positive_samples: 1
+    shuffle: true
+
+alignment_target_override:
+  id: ADEPT-metrics_eval-alignment-target-train-LOW
+  kdma_values:
+    - {kdma: MoralDesert, value: 0}
diff --git a/adm_configs/single_kdma_adm_config_low_incontext.yml b/adm_configs/single_kdma_adm_config_low_incontext.yml
new file mode 100644
index 00000000..a23452cb
--- /dev/null
+++ b/adm_configs/single_kdma_adm_config_low_incontext.yml
@@ -0,0 +1,22 @@
+adm:
+  name: 'SingleKDMAADM'
+  init_kwargs:
+    hf_model: meta-llama/Llama-2-7b-chat-hf
+    precision: half
+    temperature: 0.7
+
+  inference_kwargs:
+    baseline: false
+    n_negative_samples: 0
+    n_positive_samples: 1
+    shuffle: true
+    incontext: 
+      number: 5
+      method: bert_similarity
+      # dataset: ../datasets/metrics-eval/bbn/metrics-eval-train-renamed.json
+      dataset: /data/shared/samba/integrated_results_metrics_eval/captured_dataset_for_chris/baseline_adept_high-1715105775-input-output.json
+
+alignment_target_override:
+  id: ADEPT-metrics_eval-alignment-target-train-LOW
+  kdma_values:
+    - {kdma: MoralDesert, value: 0}
diff --git a/align_system/algorithms/llama_2_single_kdma_adm.py b/align_system/algorithms/llama_2_single_kdma_adm.py
index 22082fb4..deb85166 100644
--- a/align_system/algorithms/llama_2_single_kdma_adm.py
+++ b/align_system/algorithms/llama_2_single_kdma_adm.py
@@ -3,7 +3,8 @@
 import random
 import os
 import pathlib
-from align_system.algorithms.abstracts import AlignedDecisionMaker
+import random
+from functools import reduce
 
 from jinja2.exceptions import TemplateError
 
@@ -13,8 +14,7 @@
 import numpy as np
 
 from align_system.utils import logging
-
-
+from align_system.algorithms.abstracts import AlignedDecisionMaker
 from align_system.similarity_measures import build_force_choice_func
 
 
@@ -115,6 +115,7 @@ def __init__(self, device='cuda', hf_model='meta-llama/Llama-2-7b-chat-hf', prec
         self.temperature = temperature
         self.do_sample = do_sample
         self.chat_template = kwargs.get('chat_template', None)
+        self.dataset = []
 
         assert precision in ['full', 'half'], "precision must be either 'full' or 'half'."
         self.precision = torch.float32 if precision == 'full' else torch.float16
@@ -126,11 +127,11 @@ def __init__(self, device='cuda', hf_model='meta-llama/Llama-2-7b-chat-hf', prec
     def load_model(self, model=None, tokenizer=None):
         assert (model is None) == (tokenizer is None), "model and tokenizer must both be None or both be not None."
         if model is not None:
-            print('Loading model and tokenizer from provided objects.')
+            log.info('Loading model and tokenizer from provided objects.')
             self.model = model
             self.tokenizer = tokenizer
         else:
-            print('Loading model:', self.hf_model)
+            log.info('Loading model: %s', self.hf_model)
             if self.device == 'auto':
                 self.model = AutoModelForCausalLM.from_pretrained(self.hf_model, torch_dtype=self.precision, device_map='auto')
             else:
@@ -214,40 +215,76 @@ def chat_prompt_tokens(self, dialogs, return_tensor=True):
 
 
     def build_multiple_choice_dialog(self,
-                                     question,
-                                     options,
-                                     system_message=None,
-                                     json_format=STANDARD_MULTIPLE_CHOICE_JSON_FORMAT):
+                                    question,
+                                    options,
+                                    incontext=None,
+                                    system_message=None,
+                                    json_format=STANDARD_MULTIPLE_CHOICE_JSON_FORMAT):
+        """
+        Constructs a structured dialog for a multiple-choice question scenario, typically used in systems
+        that require a step-by-step reasoning response in a controlled format such as JSON.
+
+        Parameters:
+            question (str): The question to be answered, forming the basis of the multiple-choice scenario.
+            options (list of str): The multiple-choice options provided as answers to the question.
+            incontext (dict, optional): Additional context that might be relevant to how the system message
+                or user response is formed. Defaults to None.
+            system_message (str, optional): A predefined system message that guides the assistant on how to
+                respond. If None, a default message tailored for medical triage scenarios is used.
+            json_format (str): The JSON format that the user response should adhere to, providing structural
+                guidance on how the response should be formatted.
+
+        Returns:
+            list of dict: A list where each dictionary represents a part of the dialog, with 'role' specifying
+                whether the part is from the 'system' or the 'user', and 'content' containing the text for that role.
+
+        Notes:
+            The function defaults to using a specialized system message for medical triage unless a specific
+            message is provided. This default message instructs the assistant on how to approach and format
+            the response to the question in a methodical and informative manner.
+        """
+        # Define a default system message for medical triage if none is provided
         medical_triage_system_message = (
             "You are an assistant specialized in answering multiple-choice questions related to "
             "medical triage. When a question is posed, carefully analyze the symptoms or conditions "
             "described. Respond by providing a detailed reasoning using a step-by-step process or "
             "process of elimination. Conclude with the final answer, represented by the corresponding "
             "index number. Your response should be grounded in established medical knowledge and aim "
-            "to be informative. Please format your response as a JSON object in the following structure:\\n\\n"
+            "to be informative. Please format your response as a JSON object in the following structure:\n\n"
             f"{json_format}"
             "Ensure that you adhere to proper JSON syntax, and carefully note the index that corresponds to each answer."
         )
         if system_message is None:
             system_message = medical_triage_system_message
 
+        # Format the multiple choice options for display
         formatted_options = [f'({i}) {option}' for i, option in enumerate(options)]
 
-        content = f'{question} {formatted_options}'
+        content = f'{question} {" ".join(formatted_options)}'
+        if incontext:
+            dialog =  list(reduce(lambda x, y: x + y, incontext, []))
+        else:
+            dialog = []
 
-        dialog = [
+        # Construct the dialog with system and user parts
+        
+        s_message = [
             {
                 "role": "system",
                 "content": system_message
-            },
+            }
+        ]  
+        u_message = [
             {
                 "role": "user",
                 "content": content
             }
         ]
+        dialog = s_message + dialog + u_message
 
         return dialog
 
+
     def log_dialog(self, dialog):
         for e in dialog:
             if e.get('role') == 'system':
@@ -284,7 +321,7 @@ def respond_to_dialog(self, dialog, prefix=None):
                 else:
                     new_dialog.append(message)
             dialog = new_dialog
-            print('INPUT\n', dialog)
+            log.info('INPUT\n %s', dialog)
             prompt_tokens = [self.tokenizer.apply_chat_template(dialog, tokenize=True)]
             inference_pair['input'] = self.tokenizer.apply_chat_template(dialog, tokenize=False)
 
@@ -306,7 +343,7 @@ def respond_to_dialog(self, dialog, prefix=None):
             temperature=self.temperature,
             do_sample=self.do_sample)
 
-        # Print the generated model output
+        # log.info the generated model output
         generated_output = self.tokenizer.decode(outputs.sequences[0][prompt_length:])
         inference_pair['output'] = generated_output
 
@@ -374,20 +411,54 @@ def respond_to_dialogs_batched(self, dialogs, prefixes=None):
 
         return generated_outputs
 
-    def aligned_decision_maker(self, question, choices, target_kdmas, n_positive_samples=5, n_negative_sampels=5, shuffle=True, baseline=False, n_retries=3):
+    def aligned_decision_maker(self, question, choices, target_kdmas, incontext=None, n_positive_samples=5, n_negative_sampels=5, shuffle=True, baseline=False, n_retries=3):
+        """ Executes a decision-making process by simulating a dialog based on positive and negative alignments with specified Knowledge Domain Model Attributes (KDMAs). 
+        It attempts to identify the choice that best aligns with the target attributes, using both positive and negative samples to provide robustness against biases.
+
+        Parameters:
+            question (str): The primary question posed to the decision-making system.
+            choices (list of str): A list of choices from which the system must select the most appropriate based on KDAMs.
+            target_kdmas (dict): Key-value pairs indicating the target KDMAs and their desired levels. Values indicate desired thresholds for alignment.
+            incontext (dict, optional): Additional context provided to the decision-making system, which may affect its responses.
+            n_positive_samples (int): Number of samples to process assuming positive alignment with the target KDMAs.
+            n_negative_samples (int): Number of samples to process assuming negative or inverse alignment with the target KDMAs.
+            shuffle (bool): If True, shuffle the choices to potentially reduce positional bias in the decision-making process.
+            baseline (bool): If True, use a baseline decision-making model that does not consider specific KDMAs.
+            n_retries (int): The number of retry attempts to parse a successful response from the decision-making process.
+
+        Returns:
+            tuple:
+                responses (list): A list of dictionaries where each dictionary contains the response from the decision-making system, the reasoning behind it, and the index of the chosen answer.
+                inference_pairs (list): A list of dictionaries capturing detailed information about each inference attempt for analysis and debugging.
+
+        Raises:
+            RuntimeError: If any specified KDAMs in `target_kdmas` are not supported by the system.
+
+        Notes:
+            This function leverages logging to trace both aligned and misaligned dialogs, only the first of each type is logged for brevity. 
+        """
+
         inference_pairs = []
+
+
+        # Check if baseline is not used and handle unsupported KDMAs
         if not baseline:
             unsupported_kdmas = {kdma_remapping.get(k, k)
                                  for k in target_kdmas.keys()} - kdmas
             if len(unsupported_kdmas) > 0:
                 raise RuntimeError(f"KDMA(s) {unsupported_kdmas} not supported.")
 
+
+        # Prefix for logging reasoning
         prefix = '{"Reasoning": "Because'
 
         responses = []
 
+        # Flags to ensure we log certain types of dialog once
         logged_aligned_dialog = False
         logged_inverse_misaligned_dialog = False
+
+        # Generate responses for positive samples
         for _ in range(n_positive_samples):
             if baseline:
                 system_message = load_system_message()
@@ -398,22 +469,27 @@ def aligned_decision_maker(self, question, choices, target_kdmas, n_positive_sam
                                     for kdma, value in target_kdmas.items()}
                 system_message = load_system_message(system_message_keys)
 
-            indecies = list(range(len(choices)))
+            # Shuffle choices if required
+            indices = list(range(len(choices)))
             if shuffle:
-                random.shuffle(indecies)
-            shuffled_choices = [choices[i] for i in indecies]
+                random.shuffle(indices)
+            shuffled_choices = [choices[i] for i in indices]
 
+            # Build dialog with the system message and shuffled choices
             dialog = self.build_multiple_choice_dialog(
                 question,
                 shuffled_choices,
-                system_message=system_message)
+                system_message=system_message,
+                incontext=incontext)
 
+            # Log aligned dialog once for clarity
             if not logged_aligned_dialog:
                 log.debug("[bold]*ALIGNED DIALOG*[/bold]",
                           extra={"markup": True})
                 self.log_dialog(dialog)
                 logged_aligned_dialog = True
 
+            # Attempt to parse a valid response multiple times
             good_parse = False
             for i in range(n_retries):
                 high_response, inference_pair = self.respond_to_dialog(dialog, prefix=prefix)
@@ -425,42 +501,48 @@ def aligned_decision_maker(self, question, choices, target_kdmas, n_positive_sam
                 except RuntimeError as e:
                     pass
 
+            # Fallback parsing strategy if normal parsing fails
             if not good_parse:
                 reasoning, answer_idx, parse_method = Llama2SingleKDMAADM.bert_similarity_parse(high_response, shuffled_choices)
 
-            print('CHOSEN ANSWER IDX', answer_idx, shuffled_choices)
+            # Ensure an answer was parsed successfully
+            log.explain('CHOSEN ANSWER IDX %s %s', answer_idx, shuffled_choices)
             assert answer_idx is not None, f'Failed to parse answer index from generated output: {low_response}'
 
+            # Store response details
             responses.append({
                 'response': high_response,
                 'reasoning': reasoning,
                 'answer_idx': answer_idx,
-                'shuffle_indecies': indecies,
+                'shuffle_indices': indices,
                 'alignment': system_message_keys,
                 'aligned': True,
                 'parse_method': parse_method,
             })
-
+        # Repeat process for negative samples with inverse KDAM logic
         for _ in range(n_negative_sampels):
             system_message_keys = {kdma: 'high' if not value > 5 else 'low'
                                     for kdma, value in target_kdmas.items()}
 
-            indecies = list(range(len(choices)))
+            indices = list(range(len(choices)))
             if shuffle:
-                random.shuffle(indecies)
-            shuffled_choices = [choices[i] for i in indecies]
+                random.shuffle(indices)
+            shuffled_choices = [choices[i] for i in indices]
 
+             # Build dialog with inverse logic
             inverse_misaligned_dialog = self.build_multiple_choice_dialog(
                 question,
                 shuffled_choices,
                 system_message=load_system_message(system_message_keys))
 
+            # Log the first occurrence of an inverse misaligned dialog
             if not logged_inverse_misaligned_dialog:
                 log.debug("[bold]*INVERSE MISALIGNED DIALOG*[/bold]",
                             extra={"markup": True})
                 self.log_dialog(inverse_misaligned_dialog)
                 logged_inverse_misaligned_dialog = True
 
+            # Attempt response parsing with retries
             good_parse = False
             for i in range(n_retries):
                 low_response, inference_pair = self.respond_to_dialog(inverse_misaligned_dialog, prefix=prefix)
@@ -472,16 +554,18 @@ def aligned_decision_maker(self, question, choices, target_kdmas, n_positive_sam
                 except RuntimeError as e:
                     pass
 
+            # Fallback parsing strategy if normal parsing fails
             if not good_parse:
                 reasoning, answer_idx, parse_method = Llama2SingleKDMAADM.bert_similarity_parse(low_response, shuffled_choices)
 
             assert answer_idx is not None, f'Failed to parse answer index from generated output: {low_response}'
 
+            # Store response details
             responses.append({
                 'response': low_response,
                 'reasoning': reasoning,
                 'answer_idx': answer_idx,
-                'shuffle_indecies': indecies,
+                'shuffle_indices': indices,
                 'alignment': system_message_keys,
                 'aligned': False,
                 'parse_method': parse_method,
@@ -492,6 +576,23 @@ def aligned_decision_maker(self, question, choices, target_kdmas, n_positive_sam
 
     @staticmethod
     def calculate_votes(responses, choices):
+        """
+        Calculates voting scores for each choice based on a list of responses. Responses that align with the desired outcome increase the score of the selected choice. Misaligned responses distribute a penalty among other choices.
+
+        Parameters:
+            responses (list of dicts): Each dictionary contains information about a single response, including:
+                - 'answer_idx' (int or str): The index of the chosen answer.
+                - 'shuffle_indices' (list of int, optional): If present, it represents the original indices of the choices after shuffling.
+                - 'aligned' (bool): Indicates whether the response is aligned (True) or misaligned (False) with the desired outcome.
+            choices (list of str): A list of choices available for voting.
+
+        Returns:
+            list of float: A list of normalized vote scores for each choice, where higher scores represent greater alignment with the desired outcome.
+
+        Notes:
+            - The function handles cases where 'answer_idx' may not be an integer or could be out of the valid range of choices.
+            - Scores are adjusted by the minimum score to ensure all are non-negative and are then normalized to sum to 1.
+        """
         choice_votes = [0] * len(choices)
         for response in responses:
             answer_idx = response['answer_idx']
@@ -506,8 +607,8 @@ def calculate_votes(responses, choices):
             if answer_idx >= len(choices):
                 continue
 
-            if 'shuffle_indecies' in response:
-                answer_idx = response['shuffle_indecies'][int(answer_idx)]
+            if 'shuffle_indices' in response:
+                answer_idx = response['shuffle_indices'][int(answer_idx)]
 
             aligned = response['aligned']
 
@@ -600,10 +701,10 @@ def parse_generated_output(generated_output, n_choices):
 
     @staticmethod
     def bert_similarity_parse(generated_output, choices):
-        print('BERT SIMILARITY PARSE')
+        log.info('BERT SIMILARITY PARSE')
         force_choice_func = build_force_choice_func('bert')
         answer_idx, _ = force_choice_func(generated_output, choices)
-        print('ANSWER IDX', answer_idx, type(answer_idx))
+        log.info('ANSWER IDX %s %s', answer_idx, type(answer_idx))
         return generated_output, answer_idx, 'bert_similarity'
 
     @staticmethod
@@ -714,11 +815,50 @@ def correct_json(self, invalid_json, verbose=True):
             return None
 
     def run_aligned_decision_maker_with_voting(
-            self, prompt, choices, alignment_target, n_positive_samples=5, n_negative_samples=5, baseline=False, shuffle=False):
+            self, 
+            prompt, 
+            choices, 
+            alignment_target, 
+            incontext= None,
+            n_positive_samples=5, 
+            n_negative_samples=5, 
+            baseline=False, 
+            shuffle=False):
+        """ Executes a decision-making process with voting based on alignment targets and user-provided choices. 
+        This method incorporates a mechanism for evaluating the alignment of choices with a specified target 
+        using a set of positive and negative samples.
+
+        Parameters:
+            prompt (str): The input prompt to which the decision-making model responds.
+            choices (list): A list of possible choices for the decision-maker to evaluate.
+            alignment_target (str): A target alignment criterion that guides the decision-making process.
+            incontext (list[dict], optional): Additional contextual information to provide to the model. Defaults to None.
+            n_positive_samples (int): Number of positive samples to use for aligning the choices with the target. Defaults to 5.
+            n_negative_samples (int): Number of negative samples to use for the alignment evaluation. Defaults to 5.
+            baseline (bool): Flag to determine whether to use a baseline model for comparison. Defaults to False.
+            shuffle (bool): Option to shuffle the choices before processing. This can help in reducing bias. Defaults to False.
+
+        Returns:
+            tuple: A tuple containing:
+                - reasoning (str or None): The reasoning behind the selected choice, if available.
+                - answer_idx (int): The index of the choice selected as most aligned.
+                - responses (list): Detailed responses from the model for each choice.
+                - inference_pairs (list): Raw data pairs used in the inference process.
+
+        Raises:
+            Exception: Captures and logs any exception that occurs during the vote calculation, defaulting choice scores to None if an error occurs.
+
+        Notes:
+            This method leverages internal logging to trace the detailed responses and the computation of choice scores. 
+            It is essential to ensure proper initialization of the logging and handling mechanisms to capture and utilize
+            the detailed debug outputs effectively.
+        
+        """
         responses, inference_pairs = self.aligned_decision_maker(
             prompt,
             choices,
             alignment_target,
+            incontext=incontext,
             baseline=baseline,
             n_positive_samples=n_positive_samples,
             n_negative_sampels=n_negative_samples,
@@ -752,46 +892,177 @@ def run_aligned_decision_maker_with_voting(
 
         for r in responses:
             assert r['answer_idx'] is not None
-            assert int(r['answer_idx']) < len(r['shuffle_indecies'])
+            assert int(r['answer_idx']) < len(r['shuffle_indices'])
 
-            if r['shuffle_indecies'][int(r['answer_idx'])] == answer_idx:
+            if r['shuffle_indices'][int(r['answer_idx'])] == answer_idx:
                 reasoning = r['reasoning']
                 break
 
         return reasoning, answer_idx, responses, inference_pairs
 
+    def format_single_incontext_prompt(self, sample, labels, target_kdma_values):
+        """
+        Formats an in-context prompt for use in a dialogue system where an assistant provides reasoned answers based on provided scores and target values.
+
+        Parameters:
+            sample (dict): A dictionary containing the scenario description and choices.
+                - 'prompt' (str): The main question or scenario description.
+                - 'choices' (list of dicts): Possible choices for the scenario.
+                    - Each choice is a dictionary with an 'unstructured' key containing the choice text.
+            labels (list of dicts): A list where each dictionary contains scores associated with each choice.
+            target_kdma_values (dict): A dictionary mapping a target attribute to its desired value.
+
+        Returns:
+            list of dicts: A list containing structured data for the user and assistant roles, formatted for a dialogue interaction.
+                - Each dictionary contains 'role' (str) indicating if it's 'user' or 'assistant' and 'content' (str) which is the text associated with that role.
+
+        Notes:
+            This function assumes there is exactly one key in `target_kdma_values`.
+            The assistant's answer includes reasoning why a particular choice is preferred based on the minimal distance to the target value in `labels`.
+        """
+        # Mapping of target attributes to their corresponding score keys
+        kdma_name_map = {
+            'moral_deservingness': 'MoralDesert',
+            'maximization': 'maximization',
+        }
+
+        # Extract the main question from the sample
+        question = sample['prompt']
+
+        # Format choices as enumerated options for display
+        formatted_options = [f'({i}) {option["unstructured"]}' for i, option in enumerate(sample['choices'])]
+        content = f"{question} {' '.join(formatted_options)}"
+
+        # Extract the target attribute (assuming there's only one key in target_kdma_values)
+        target = next(iter(target_kdma_values))
+
+        # Calculate the distance of each choice's score to the target value
+        dist_to_target = []
+        for score in labels:
+            if kdma_name_map[target] in score:
+                # Multiply by 10 to match the rest of the KDMA's score range
+                dist = abs(score[kdma_name_map[target]] * 10 - target_kdma_values[target])
+            else: 
+                dist = float('inf')  # If the target attribute is not in the scores, assign an infinite distance
+            dist_to_target.append(dist)    
+
+        # Determine the index of the choice with the minimum distance to the target value
+        correct_answer_idx = np.argmin(dist_to_target)
+
+        # Map the target attribute to its descriptive wording
+        target_wording_mapping = {
+            "moral_deservingness": "morally deserving choice.",
+            "maximization": "maximization choice."
+        }
+
+        # Determine the adjective based on the value being higher or lower than a threshold
+        adjective = 'more' if target_kdma_values[target] > 5 else 'less'
+
+        # Construct the reasoning text using the correct choice and its descriptor
+        answer_text = f'Because {sample["choices"][correct_answer_idx]["unstructured"]} is the {adjective} {target_wording_mapping.get(target, "specified attribute")}'
+        answer = f'{{"Reasoning": "{answer_text}", "answer": {correct_answer_idx}}}'
+
+        # Structure the dialog with user and assistant roles
+        prompt = [
+            {"role": "user", "content": content},
+            {"role": "assistant", "content": answer}
+        ]
+
+        return prompt
+
     def __call__(self, sample, target_kdma_values, **kwargs):
+        """
+        Build the prompt and send it to the LLM to ask for a single KDMA (Key Decision-Making Attribute).
+
+        Parameters:
+            sample (dict): A dictionary containing the scenario, state, probe, and choices.
+                - 'scenario' (str): The main scenario description.
+                - 'state' (str, optional): Additional state information to append to the scenario.
+                - 'probe' (str): The specific question or probe to be answered.
+                - 'choices' (list of str): Possible choices for the scenario.
+            target_kdma_values (dict): A dictionary mapping a target attribute to its desired value.
+            kwargs (dict): Additional keyword arguments for in-context learning, retrievers, labels, etc.
+                - 'incontext' (dict, optional): Configuration for in-context learning.
+                    - 'dataset' (str): Path to the in-context dataset.
+                    - 'number' (int): Number of in-context samples to use.
+                    - 'method' (str): Method to select in-context samples ('random' or 'bert_similarity').
+                - 'labels' (list of dicts, optional): A list where each dictionary contains scores associated with each choice.
+                - 'n_positive_samples' (int, optional): Number of positive samples for decision making.
+                - 'n_negative_samples' (int, optional): Number of negative samples for decision making.
+                - 'baseline' (bool, optional): Whether to use a baseline approach.
+                - 'shuffle' (bool, optional): Whether to shuffle the choices.
+
+        Returns:
+            dict: A dictionary containing the selected choice and additional information.
+                - 'choice' (int): The index of the selected choice.
+                - 'info' (dict): Additional information including reasoning, responses, and raw data.
+        """
         prompt = sample['scenario']
         if sample['state'] is not None:
             prompt += f'\n{sample["state"]}'
 
-        if 'retriever' in kwargs:
-            # retriever_prompt = "How would you treat the following injuries: {}".format(prompt)
-            retriever_prompt = "{}  {}".format(prompt, sample['probe'])
+        incontext_prompts = []
 
-            retriever = kwargs['retriever']
-            retrieved_nodes = retriever.retrieve(retriever_prompt)
+        if 'incontext' in kwargs:
+            possible_samples = []
 
-            if 'summarizer' in kwargs:
-                summarizer = kwargs['summarizer']
-                summary = summarizer.synthesize(retriever_prompt, nodes=retrieved_nodes)
+            # Read dataset
+            with open(kwargs['incontext']['dataset']) as f:
+                dataset = json.load(f)
 
-                log.explain("[bold] ** Retrieval Summary ** [/bold]",
-                            extra={"markup": True})
-                log.explain(summary)
+            # Populate possible samples from the dataset
+            for sam in dataset:
+                possible_samples.append(sam)
+
+            if len(possible_samples) < kwargs['incontext']['number']:
+                raise RuntimeError(f'Not enough possible in-context samples to learn from. Only {len(possible_samples)} samples available while asking for {kwargs["incontext"]["number"]} in-context samples.')
+
+            if kwargs['incontext']['method'] == 'random':
+                chosen_sample = random.sample(possible_samples, kwargs['incontext']['number'])
+            elif kwargs['incontext']['method'] == 'bert_similarity':
+                # Extract strings for each situation
+                possible_samples_parse = [s['input']['prompt'] for s in possible_samples]
 
-                prompt += "\n#############\n{}\n#############".format(summary)
+                # Create similarity scores between the in-context dataset and find top-k indices
+                from bert_score import score 
+                _, _, F1 = score([prompt]*len(possible_samples_parse), possible_samples_parse, lang='en')
+                _, indices = torch.topk(F1, kwargs['incontext']['number'])
 
+                # Make list of the top k for creating prompts
+                chosen_sample = [possible_samples[i] for i in indices]
             else:
-                prompt += "\n#############\n{}\n#############".format(
-                    "\n#############\n".join((n.text for n in retrieved_nodes)))
+                raise ValueError(f'"{kwargs["incontext"]["method"]}" is not a valid in-context method. Please use "random" or "bert_similarity".')
 
-            prompt += f'\nGiven the scenario and documentation above.. {sample["probe"]}'
-        else:
-            prompt += f'\n{sample["probe"]}'
+            # Create in-context prompts
+            for cs in chosen_sample:
+                incontext_prompts.append(self.format_single_incontext_prompt(cs['input'], cs['label'], target_kdma_values))
 
-        choices = sample['choices']
+        # if 'retriever' in kwargs:
+        #     # retriever_prompt = "How would you treat the following injuries: {}".format(prompt)
+        #     retriever_prompt = "{}  {}".format(prompt, sample['probe'])
+
+        #     retriever = kwargs['retriever']
+        #     retrieved_nodes = retriever.retrieve(retriever_prompt)
+
+        #     if 'summarizer' in kwargs:
+        #         summarizer = kwargs['summarizer']
+        #         summary = summarizer.synthesize(retriever_prompt, nodes=retrieved_nodes)
+
+        #         log.explain("[bold] ** Retrieval Summary ** [/bold]",
+        #                     extra={"markup": True})
+        #         log.explain(summary)
 
+        #         prompt += "\n#############\n{}\n#############".format(summary)
+
+        #     else:
+        #         prompt += "\n#############\n{}\n#############".format(
+        #             "\n#############\n".join((n.text for n in retrieved_nodes)))
+
+        #     prompt += f'\nGiven the scenario and documentation above.. {sample["probe"]}'
+        # else:
+
+        prompt += f'\n{sample["probe"]}'
+        choices = sample['choices']
         labels = kwargs.get('labels', {})
 
         alignment_target = None
@@ -809,6 +1080,7 @@ def __call__(self, sample, target_kdma_values, **kwargs):
             prompt,
             choices,
             alignment_target,
+            incontext=incontext_prompts,
             n_positive_samples=kwargs.get('n_positive_samples', 5),
             n_negative_samples=kwargs.get('n_negative_samples', 5),
             baseline=kwargs.get('baseline', False),
@@ -1042,7 +1314,7 @@ def populate_tagging_parameters(self, scenario_state, tagging_action, alignment_
 
             parsed_tagging_output = self.attempt_generic_parse(  # noqa
                 raw_tagging_response, ['Reasoning', 'Answer', 'Tag'])  # noqa
-
+            
             if parsed_tagging_output is not None:
                 if len(untagged_characters) == 1:
                     log.debug("** Force selecting only available character")
diff --git a/align_system/evaluation/adm_evaluator.py b/align_system/evaluation/adm_evaluator.py
index ff7eda9f..4a742e4f 100644
--- a/align_system/evaluation/adm_evaluator.py
+++ b/align_system/evaluation/adm_evaluator.py
@@ -11,7 +11,7 @@ def generate_outputs(dataset, adm, target_kdma_values, **kwargs):
             })
             continue
         
-        outputs.append(adm(input_, target_kdma_values, labels=label, **kwargs))
+        outputs.append(adm(input_, target_kdma_values, labels=label, dataset=dataset, **kwargs))
     
     return outputs