Skip to content

Commit e43eb93

Browse files
author
zach
authored
fix: improve separation of judge and test profiles (#6)
1 parent 4c715ab commit e43eb93

File tree

4 files changed

+6
-15
lines changed

4 files changed

+6
-15
lines changed

mcpx_eval/__main__.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ async def run():
208208
"--profile",
209209
"-p",
210210
default=None,
211-
help="Profile to use for all models",
211+
help="Profile to use for judge model",
212212
)
213213

214214
test_parser.add_argument("--prompt", help="Test prompt")
@@ -318,17 +318,8 @@ async def run():
318318

319319
if hasattr(args, "config") and args.config is not None:
320320
test = Test.load(args.config)
321-
if args.profile:
322-
test.profile = args.profile
323321
for model in args.model:
324-
if args.profile:
325-
if "/" in model:
326-
a, _ = model.split("/", maxsplit=1)
327-
test.models.append(f"{a}/{args.profile}")
328-
else:
329-
test.models.append(f"{model}/{args.profile}")
330-
else:
331-
test.models.append(model)
322+
test.models.append(model)
332323
if args.name is None or args.name == "":
333324
if test.name is not None:
334325
name = test.name
@@ -362,7 +353,7 @@ async def run():
362353

363354
judge = Judge(
364355
models=test.models,
365-
profile=test.profile,
356+
profile=args.profile,
366357
db=db,
367358
judge_model=args.judge_model,
368359
ignore_tools=test.ignore_tools,

mcpx_eval/judge.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ async def run(
264264
result["duration_in_seconds"] = f"{duration_seconds}s"
265265
result["number_of_tools_used"] = str(tool_analysis.total_tool_calls)
266266

267-
logger.info(f"Analyzing results of {model.slug}")
267+
logger.info(f"Analyzing results of {model.slug} with profile={self.profile}")
268268
agent = Chat(
269269
client=mcp_run.Client(
270270
config=mcp_run.ClientConfig(profile=self.profile)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "mcpx-eval"
3-
version = "0.1.3"
3+
version = "0.1.4"
44
description = "Open ended tool use evaluation framework"
55
readme = "README.md"
66
requires-python = ">=3.12"

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)