Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions AI Drug safety/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Virtual environment
venv/
env/

# Python cache
__pycache__/
*.pyc

# Environment variables file
.env

# Local virtualenv and caches
.venv/
.cache/
76 changes: 76 additions & 0 deletions AI Drug safety/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# AI Drug Safety Agent

This repository contains a Python scaffold for an AI Drug Safety Agent that
fetches real drug data (openFDA / optional DrugBank), computes clinical risk
scores (HAS-BLED, Tisdale), and synthesizes a concise clinical recommendation
using an LLM when available.

Requirements
------------

Install dependencies into a virtual environment (recommended):

```bash
python -m venv .venv
source .venv/bin/activate # or .venv\Scripts\activate on Windows
pip install -r requirements.txt
```

Quick start
-----------

Run the Streamlit tester UI:

```bash
streamlit run streamlit_app.py
```

Run the FastAPI web shim (optional):

```bash
```

CLI example:

```bash
python -m ai_drug_safety.cli aspirin --age 70 --conditions "hypertension"
```

Environment / secrets
---------------------

Set `OPENAI_API_KEY` and `DRUGBANK_API_KEY` in your local environment or a
`.env` file at the project root. IMPORTANT: do not commit secrets. The
repository `.gitignore` contains `.env` and `.venv` entries — ensure you do not
push actual API keys to GitHub.

What I changed for you
----------------------

- Added `requirements.txt` with core dependencies.
- Updated the Streamlit UI to accept `sex` and relevant clinical flags.
- Improved the LLM prompt to request `confidence` and `assumptions` fields
and explicitly consider `patient.sex` when relevant.

What to keep (recommended)
--------------------------

- `ai_drug_safety/` — core package and clinical logic (keep).
- `evaluations/` — adjudicated gold and annotation CSVs (keep for evaluation).
- `streamlit_app.py` — interactive tester UI (keep if you use it).

Optional files you can remove
----------------------------

- `run_example.py` — tiny demo script (non-essential).
- `.cache/` — cache files from previous runs (safe to remove if you want a clean repo).
- `__pycache__/` directories — Python bytecode caches (safe to remove).

Pushing to GitHub
------------------

Before you push, ensure:
- `.env` does not contain secrets (remove or redact keys).
- `.venv/` is not committed (use `.gitignore`).

If you want, I can remove non-essential files now (e.g., `run_example.py`, `.cache/`).
7 changes: 7 additions & 0 deletions AI Drug safety/ai_drug_safety/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
__all__ = [
"api_clients",
"risk_scoring",
"llm_reasoner",
"agent",
"cli",
]
217 changes: 217 additions & 0 deletions AI Drug safety/ai_drug_safety/adjudicate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
"""Adjudication tooling: pairwise annotation comparison + Cohen's kappa report.

Reads two annotator CSV files and optionally an adjudicator CSV. Produces
an adjudication report JSON and an adjudicated CSV with consensus/adjudicated
labels where available.

Usage:
python -m ai_drug_safety.adjudicate --a evaluations/annotations_annotatorA.csv \
--b evaluations/annotations_annotatorB.csv --out-dir evaluations
"""
from __future__ import annotations

import argparse
import csv
import json
import os
import random
from datetime import datetime
from typing import Dict, List, Tuple, Optional


def read_csv(path: str) -> List[Dict[str, str]]:
out = []
with open(path, newline='', encoding='utf-8') as f:
reader = csv.DictReader(f)
for r in reader:
out.append(r)
return out


def norm_key(row: Dict[str, str]) -> str:
# Prefer numeric id if present, else normalized drug name
idv = (row.get('id') or '').strip()
if idv:
return idv
return (row.get('drug') or '').strip().lower()


def parse_bool(x: Optional[str]) -> Optional[bool]:
if x is None:
return None
s = str(x).strip().lower()
if s == '':
return None
if s in ('1', 'true', 'yes', 'y'):
return True
if s in ('0', 'false', 'no', 'n'):
return False
return None


def compute_confusion(pairs: List[Tuple[bool, bool]]) -> Dict[str, int]:
n00 = n01 = n10 = n11 = 0
for a, b in pairs:
if a and b:
n11 += 1
elif a and not b:
n10 += 1
elif not a and b:
n01 += 1
else:
n00 += 1
return {'n00': n00, 'n01': n01, 'n10': n10, 'n11': n11}


def kappa_from_conf(conf: Dict[str, int]) -> Optional[float]:
n00 = conf['n00']; n01 = conf['n01']; n10 = conf['n10']; n11 = conf['n11']
N = n00 + n01 + n10 + n11
if N == 0:
return None
Po = (n00 + n11) / N
pA_true = (n10 + n11) / N
pB_true = (n01 + n11) / N
Pe = pA_true * pB_true + (1 - pA_true) * (1 - pB_true)
denom = 1 - Pe
if denom == 0:
return None
return (Po - Pe) / denom


def bootstrap_kappa(pairs: List[Tuple[bool, bool]], n_boot: int = 1000, seed: int = 42) -> Tuple[float, float]:
random.seed(seed)
N = len(pairs)
if N == 0:
return (0.0, 0.0)
vals = []
for _ in range(n_boot):
sample = [pairs[random.randrange(N)] for _ in range(N)]
conf = compute_confusion(sample)
k = kappa_from_conf(conf)
vals.append(k if k is not None else 0.0)
vals.sort()
lo = vals[int(0.025 * len(vals))]
hi = vals[int(0.975 * len(vals)) - 1]
return (lo, hi)


def main():
parser = argparse.ArgumentParser(description='Adjudicate pairwise annotations and compute Cohen\'s kappa')
parser.add_argument('--a', required=True, help='Annotator A CSV')
parser.add_argument('--b', required=True, help='Annotator B CSV')
parser.add_argument('--adjudicator', help='Optional adjudicator CSV')
parser.add_argument('--out-dir', default='evaluations', help='Output directory')
parser.add_argument('--bootstrap', type=int, default=1000, help='Bootstrap iterations for CI')
args = parser.parse_args()

os.makedirs(args.out_dir, exist_ok=True)
a_rows = read_csv(args.a)
b_rows = read_csv(args.b)
adj_rows = read_csv(args.adjudicator) if args.adjudicator else []
adj_map = {norm_key(r): r for r in adj_rows} if adj_rows else {}

a_map = {norm_key(r): r for r in a_rows}
b_map = {norm_key(r): r for r in b_rows}

keys = sorted(set(a_map.keys()) & set(b_map.keys()))
pairs_major: List[Tuple[bool, bool]] = []
pairs_adverse: List[Tuple[bool, bool]] = []
merged_output = []

for k in keys:
ra = a_map[k]
rb = b_map[k]
drug = ra.get('drug') or rb.get('drug') or k
ida = ra.get('id') or ''
idb = rb.get('id') or ''
a_major = parse_bool(ra.get('major_interaction'))
b_major = parse_bool(rb.get('major_interaction'))
a_adv = parse_bool(ra.get('adverse_outcome'))
b_adv = parse_bool(rb.get('adverse_outcome'))

if a_major is not None and b_major is not None:
pairs_major.append((a_major, b_major))
if a_adv is not None and b_adv is not None:
pairs_adverse.append((a_adv, b_adv))

# adjudication logic
adj = adj_map.get(k)
final_major = ''
major_agree = False
if a_major is not None and b_major is not None and a_major == b_major:
final_major = str(a_major)
major_agree = True
elif adj and adj.get('major_interaction') is not None:
final_major = str(parse_bool(adj.get('major_interaction')))
else:
final_major = ''

final_adv = ''
adv_agree = False
if a_adv is not None and b_adv is not None and a_adv == b_adv:
final_adv = str(a_adv)
adv_agree = True
elif adj and adj.get('adverse_outcome') is not None:
final_adv = str(parse_bool(adj.get('adverse_outcome')))
else:
final_adv = ''

merged_output.append({
'id': ida or idb or '',
'drug': drug,
'major_interaction_A': '' if a_major is None else str(a_major),
'major_interaction_B': '' if b_major is None else str(b_major),
'major_interaction_final': final_major,
'major_interaction_agree': major_agree,
'adverse_outcome_A': '' if a_adv is None else str(a_adv),
'adverse_outcome_B': '' if b_adv is None else str(b_adv),
'adverse_outcome_final': final_adv,
'adverse_outcome_agree': adv_agree,
'notes_A': ra.get('notes',''),
'notes_B': rb.get('notes',''),
})

conf_major = compute_confusion(pairs_major)
conf_adv = compute_confusion(pairs_adverse)
k_major = kappa_from_conf(conf_major)
k_adv = kappa_from_conf(conf_adv)
ci_major = bootstrap_kappa(pairs_major, n_boot=args.bootstrap) if pairs_major else (0.0, 0.0)
ci_adv = bootstrap_kappa(pairs_adverse, n_boot=args.bootstrap) if pairs_adverse else (0.0, 0.0)

ts = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
report = {
'n_pairs': len(keys),
'major': {
'confusion': conf_major,
'kappa': k_major,
'bootstrap_ci': {'2.5%': ci_major[0], '97.5%': ci_major[1]},
},
'adverse': {
'confusion': conf_adv,
'kappa': k_adv,
'bootstrap_ci': {'2.5%': ci_adv[0], '97.5%': ci_adv[1]},
},
'annotator_a_file': args.a,
'annotator_b_file': args.b,
'adjudicator_file': args.adjudicator or None,
}

report_path = os.path.join(args.out_dir, f'adjudication_report_{ts}.json')
with open(report_path, 'w', encoding='utf-8') as f:
json.dump(report, f, indent=2, ensure_ascii=False)

out_csv = os.path.join(args.out_dir, f'adjudicated_gold_{ts}.csv')
with open(out_csv, 'w', newline='', encoding='utf-8') as f:
keys = ['id','drug','major_interaction_A','major_interaction_B','major_interaction_final','major_interaction_agree',
'adverse_outcome_A','adverse_outcome_B','adverse_outcome_final','adverse_outcome_agree','notes_A','notes_B']
writer = csv.DictWriter(f, fieldnames=keys)
writer.writeheader()
for r in merged_output:
writer.writerow(r)

print(f'Wrote adjudication report: {report_path}')
print(f'Wrote adjudicated gold CSV: {out_csv}')


if __name__ == '__main__':
main()
27 changes: 27 additions & 0 deletions AI Drug safety/ai_drug_safety/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import Dict, Any
from . import api_clients, risk_scoring, llm_reasoner

def run_agent(drug_input: str, patient_info: Dict[str, Any] = None, use_mock: bool = False) -> Dict[str, Any]:
"""
Orchestrate data lookup, risk scoring and reasoning.
All drug inputs are normalized to RXCUI for robust rule matching.
Returns a structured result suitable for CLI or programmatic use.
"""
if patient_info is None:
patient_info = {}

# Normalize input to RXCUI if possible
from .rxnorm import get_rxcui
rxcui = get_rxcui(drug_input)
findings = api_clients.get_drug_data(drug_input, use_mock=use_mock)
if rxcui:
findings["rxcui"] = rxcui
risk = risk_scoring.score_risk(findings, patient_info)
analysis = llm_reasoner.analyze(findings, patient_info, risk)
return {
"drug": drug_input,
"rxcui": rxcui,
"findings": findings,
"risk": risk,
"analysis": analysis,
}
Loading