Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,6 @@ pnpm-debug.log*
__pycache__
*egg-info
*pyc

# Test fixtures cache (downloaded experiment data)
tests/fixtures/.cache/
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,17 @@ poetry run align-app --scenarios /data/shared/evaluation_outputs/Ph2-JulyCollab/
poetry run align-app --scenarios /path/to/scenarios1.json /path/to/scenarios2.json /path/to/scenarios_dir
```

### Load Experiment Results

You can load pre-computed experiment results using the `--experiments` flag. This extracts unique ADM configurations from experiment directories and adds them to the decider dropdown:

```console
# Use test fixtures (download first by running: poetry run pytest tests/test_experiment_deciders.py -k download)
poetry run align-app --experiments tests/fixtures/.cache/experiments
```

The experiment directory should contain subdirectories with `.hydra/config.yaml` and `input_output.json` files from align-system experiment runs.

### Optionally Configure Network Port or Host

The web server is from Trame. To configure the port, use the `--port` or `-p` arg
Expand Down
47 changes: 27 additions & 20 deletions align_app/adm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pathlib import Path
import align_system
from align_app.adm.hydra_config_loader import load_adm_config
from align_app.adm.experiment_config_loader import load_experiment_adm_config
from align_app.utils.utils import merge_dicts


Expand All @@ -28,6 +29,9 @@ def get_decider_config(
Merges base decider config with app-level overrides.
Two-layer merge: base YAML config + (config_overrides + dataset_overrides)

For experiment configs (experiment_config: True), loads pre-resolved YAML directly.
For edited configs (edited_config: True), returns the stored resolved_config directly.

Args:
probe_id: The probe ID to get config for
all_deciders: Dict of all available deciders
Expand All @@ -41,35 +45,38 @@ def get_decider_config(
if not decider_cfg:
return None

config_path = decider_cfg["config_path"]
is_edited_config = decider_cfg.get("edited_config", False)
is_experiment_config = decider_cfg.get("experiment_config", False)

if is_edited_config:
config = copy.deepcopy(decider_cfg["resolved_config"])
if llm_backbone and "structured_inference_engine" in config:
config["structured_inference_engine"]["model_name"] = llm_backbone
return config

# Layer 1: Load base config from align-system YAML
full_cfg = load_adm_config(
config_path,
str(base_align_system_config_dir),
)
decider_base = full_cfg.get("adm", {})
# Layer 1: Load base config - either pre-resolved experiment YAML or Hydra compose.
# Both produce same structure with ${ref:...} that initialize_with_custom_references handles.
if is_experiment_config:
experiment_path = Path(decider_cfg["experiment_path"])
decider_base = load_experiment_adm_config(experiment_path) or {}
else:
config_path = decider_cfg["config_path"]
full_cfg = load_adm_config(
config_path,
str(base_align_system_config_dir),
)
decider_base = full_cfg.get("adm", {})

# Layer 2: Prepare app-level overrides
config_overrides = decider_cfg.get("config_overrides", {})
dataset_overrides = decider_cfg.get("dataset_overrides", {}).get(dataset_name, {})

# Extract metadata fields from decider entry
metadata = {
k: v
for k, v in decider_cfg.items()
if k in ["llm_backbones", "model_path_keys"]
}

# Single deep merge: base + config_overrides + dataset_overrides + metadata
# Deep merge: base + config_overrides + dataset_overrides
merged_config = copy.deepcopy(decider_base)
merged_config = merge_dicts(merged_config, config_overrides)
merged_config = merge_dicts(merged_config, dataset_overrides)
merged_config = merge_dicts(merged_config, metadata)

if llm_backbone:
merged_config["llm_backbone"] = llm_backbone
if "structured_inference_engine" in merged_config:
merged_config["structured_inference_engine"]["model_name"] = llm_backbone
if llm_backbone and "structured_inference_engine" in merged_config:
merged_config["structured_inference_engine"]["model_name"] = llm_backbone

return merged_config
3 changes: 2 additions & 1 deletion align_app/adm/decider/executor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from typing import Any, Tuple
import gc
import torch
from functools import partial
from align_system.utils.hydra_utils import initialize_with_custom_references
from align_system.utils.hydrate_state import p2triage_hydrate_scenario_state
Expand Down Expand Up @@ -96,6 +95,8 @@ def instantiate_adm(decider_config):
adm = initialize_with_custom_references({"adm": decider_config})["adm"]

def cleanup(_):
import torch

gc.collect()
torch.cuda.empty_cache()

Expand Down
1 change: 0 additions & 1 deletion align_app/adm/decider/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ def resolved_random_config():
"${ref:adm.step_definitions.populate_choice_info}",
],
},
"model_path_keys": ["structured_inference_engine", "model_name"],
}


Expand Down
29 changes: 29 additions & 0 deletions align_app/adm/decider/tests/test_worker.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,34 @@
import multiprocessing as mp
from align_app.adm.decider.types import DeciderParams
from align_app.adm.decider.worker import extract_cache_key


class TestExtractCacheKey:
def test_same_config_produces_same_key(self):
config = {"model_name": "test-model", "temperature": 0.7}
key1 = extract_cache_key(config)
key2 = extract_cache_key(config)
assert key1 == key2

def test_different_configs_produce_different_keys(self):
config1 = {"model_name": "test-model", "temperature": 0.7}
config2 = {"model_name": "test-model", "temperature": 0.8}
key1 = extract_cache_key(config1)
key2 = extract_cache_key(config2)
assert key1 != key2

def test_same_model_different_settings_produce_different_keys(self):
config1 = {
"structured_inference_engine": {"model_name": "same-model"},
"setting_a": "value1",
}
config2 = {
"structured_inference_engine": {"model_name": "same-model"},
"setting_a": "value2",
}
key1 = extract_cache_key(config1)
key2 = extract_cache_key(config2)
assert key1 != key2


class TestDeciderWorker:
Expand Down
15 changes: 2 additions & 13 deletions align_app/adm/decider/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,8 @@


def extract_cache_key(resolved_config: Dict[str, Any]) -> str:
llm_backbone = resolved_config.get("llm_backbone", {})
model_path_keys = resolved_config.get("model_path_keys", [])

cache_parts = []
for key in model_path_keys:
if key in llm_backbone:
cache_parts.append(f"{key}={llm_backbone[key]}")

if not cache_parts:
cache_str = json.dumps(resolved_config, sort_keys=True)
return hashlib.md5(cache_str.encode()).hexdigest()

return "_".join(cache_parts)
cache_str = json.dumps(resolved_config, sort_keys=True)
return hashlib.md5(cache_str.encode()).hexdigest()


def decider_worker_func(task_queue: Queue, result_queue: Queue):
Expand Down
12 changes: 5 additions & 7 deletions align_app/adm/decider_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ def create_decider_entry(config_path, overrides={}):
return {
"config_path": config_path,
"llm_backbones": LLM_BACKBONES,
"model_path_keys": ["structured_inference_engine", "model_name"],
"dataset_overrides": {},
**overrides,
}
Expand All @@ -113,19 +112,19 @@ def create_decider_entry(config_path, overrides={}):
"phase2_pipeline_zeroshot_comparative_regression": create_decider_entry(
"adm/phase2_pipeline_zeroshot_comparative_regression.yaml",
{
"max_alignment_attributes": 10,
"config_overrides": {
"comparative_regression_choice_schema": {"reasoning_max_length": -1},
"max_alignment_attributes": 10,
},
"system_prompt_generator": _generate_comparative_regression_pipeline_system_prompt,
},
),
"phase2_pipeline_fewshot_comparative_regression": create_decider_entry(
"adm/phase2_pipeline_fewshot_comparative_regression.yaml",
{
"max_alignment_attributes": 10,
"config_overrides": {
"comparative_regression_choice_schema": {"reasoning_max_length": -1},
"max_alignment_attributes": 10,
"step_definitions": {
"regression_icl": {
"icl_generator_partial": {
Expand Down Expand Up @@ -179,13 +178,12 @@ def create_runtime_decider_entry(config_path):
)


def get_all_deciders(config_paths=[]):
"""Get all deciders, merging runtime configs from paths with base deciders."""
runtime_deciders = {
def get_runtime_deciders(config_paths):
"""Get runtime deciders from CLI config paths."""
return {
Path(config_path).stem: create_runtime_decider_entry(config_path)
for config_path in config_paths
}
return {**runtime_deciders, **_BASE_DECIDERS}


def get_system_prompt(
Expand Down
84 changes: 72 additions & 12 deletions align_app/adm/decider_registry.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from functools import partial
from collections import namedtuple
from typing import Dict, Any
from .decider_definitions import get_all_deciders, get_system_prompt
from .decider_definitions import (
get_runtime_deciders,
get_system_prompt,
_BASE_DECIDERS,
)
from .config import get_decider_config, _get_dataset_name


Expand All @@ -21,24 +25,17 @@ def _get_decider_options(
Dict with option fields, or None if decider doesn't exist for probe's dataset
"""
try:
dataset_name = _get_dataset_name(probe_id, datasets)
_get_dataset_name(probe_id, datasets)
except ValueError:
return None

decider_cfg = all_deciders.get(decider)
if not decider_cfg:
return None

config_overrides = decider_cfg.get("config_overrides", {})
dataset_overrides = decider_cfg.get("dataset_overrides", {}).get(dataset_name, {})

metadata = {
"llm_backbones": decider_cfg.get("llm_backbones", []),
"model_path_keys": decider_cfg.get("model_path_keys", []),
"max_alignment_attributes": config_overrides.get(
"max_alignment_attributes",
dataset_overrides.get("max_alignment_attributes", 0),
),
"max_alignment_attributes": decider_cfg.get("max_alignment_attributes", 0),
"config_path": decider_cfg.get("config_path"),
"exists": True,
}
Expand All @@ -53,16 +50,75 @@ def _get_decider_options(
"get_decider_options",
"get_system_prompt",
"get_all_deciders",
"add_edited_decider",
],
)


def create_decider_registry(config_paths, scenario_registry):
def _get_root_decider_name(decider_name: str) -> str:
"""Extract the root decider name without any ' - edit N' suffix."""
import re

match = re.match(r"^(.+?) - edit \d+$", decider_name)
if match:
return _get_root_decider_name(match.group(1))
return decider_name


def _add_edited_decider(
base_decider_name: str,
resolved_config: Dict[str, Any],
llm_backbones: list,
all_deciders: Dict[str, Any],
) -> str:
"""
Add an edited decider to the registry.

Args:
base_decider_name: Original decider name this was edited from
resolved_config: The edited resolved config
llm_backbones: Available LLM backbones for this decider
all_deciders: The mutable deciders dictionary (pre-bound via partial)

Returns:
The new decider name "{root_decider_name} - edit {n}"
"""
root_name = _get_root_decider_name(base_decider_name)

edit_count = 1
for name in all_deciders:
if name.startswith(f"{root_name} - edit "):
try:
n = int(name.split(" - edit ")[-1])
edit_count = max(edit_count, n + 1)
except ValueError:
pass

new_name = f"{root_name} - edit {edit_count}"
all_deciders[new_name] = {
"edited_config": True,
"resolved_config": resolved_config,
"llm_backbones": llm_backbones,
"max_alignment_attributes": 10,
}
return new_name


def create_decider_registry(config_paths, scenario_registry, experiment_deciders=None):
"""
Takes config paths and scenario_registry, returns a DeciderRegistry namedtuple
with all_deciders and datasets pre-bound using partial application.

Args:
config_paths: List of paths to runtime decider configs
scenario_registry: Registry for scenarios/probes
experiment_deciders: Optional dict of experiment deciders to merge
"""
all_deciders = get_all_deciders(config_paths)
all_deciders = {
**_BASE_DECIDERS,
**(experiment_deciders or {}),
**get_runtime_deciders(config_paths),
}
datasets = scenario_registry.get_datasets()

return DeciderRegistry(
Expand All @@ -82,4 +138,8 @@ def create_decider_registry(config_paths, scenario_registry):
datasets=datasets,
),
get_all_deciders=lambda: all_deciders,
add_edited_decider=partial(
_add_edited_decider,
all_deciders=all_deciders,
),
)
17 changes: 17 additions & 0 deletions align_app/adm/experiment_config_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""Shared loader for experiment config files."""

from functools import lru_cache
from pathlib import Path
from typing import Dict, Any
import yaml


@lru_cache(maxsize=256)
def load_experiment_adm_config(experiment_path: Path) -> Dict[str, Any] | None:
"""Load the adm config from experiment's .hydra/config.yaml."""
config_path = experiment_path / ".hydra" / "config.yaml"
if not config_path.exists():
return None
with open(config_path) as f:
config = yaml.safe_load(f)
return config.get("adm", config)
Loading