From 284c867713f16d3be78b3a928fde3e3834045cbd Mon Sep 17 00:00:00 2001 From: Wyatt Avilla Date: Sat, 4 Apr 2026 17:17:33 -0700 Subject: [PATCH 1/6] command arg enum --- src/cli/args.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/cli/args.rs b/src/cli/args.rs index 027ed6c..fa02659 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -43,7 +43,7 @@ pub struct Args { headers: Vec, #[command(subcommand)] - pub evaluation: EvaluationCommand, + pub command: Command, } impl Args { @@ -52,6 +52,15 @@ impl Args { } } +#[derive(Subcommand, Debug)] +pub enum Command { + /// Run evaluations + Eval { + #[command(subcommand)] + evaluation: EvaluationCommand, + }, +} + #[derive(Subcommand, Debug)] pub enum EvaluationCommand { /// Run single-turn evaluation From 908a382b2b25e8e5f23642e4a9ca6f97c3e4b752 Mon Sep 17 00:00:00 2001 From: Wyatt Avilla Date: Sat, 4 Apr 2026 17:17:49 -0700 Subject: [PATCH 2/6] tests --- src/cli/args.rs | 61 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/src/cli/args.rs b/src/cli/args.rs index fa02659..dea9d71 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -124,6 +124,7 @@ mod tests { "cbl", "--cbl-api-key", "cbl-key", + "eval", "single-turn", "--threshold", "0.5", @@ -140,22 +141,49 @@ mod tests { .expect("single-turn args should parse"); #[allow(clippy::match_wildcard_for_single_variants)] - match args.evaluation { - super::EvaluationCommand::SingleTurn { request, .. } => { - assert!((request.threshold - 0.5).abs() < f32::EPSILON); - assert_eq!(request.variations, 2); - assert_eq!(request.maximum_iteration_layers, 2); - } - _ => panic!("expected single-turn command"), + match args.command { + super::Command::Eval { evaluation } => match evaluation { + super::EvaluationCommand::SingleTurn { request, .. } => { + assert!((request.threshold - 0.5).abs() < f32::EPSILON); + assert_eq!(request.variations, 2); + assert_eq!(request.maximum_iteration_layers, 2); + } + _ => panic!("expected single-turn command"), + }, } } + #[test] + fn rejects_legacy_top_level_evaluation_commands() { + let err = Args::try_parse_from([ + "cbl", + "--cbl-api-key", + "cbl-key", + "single-turn", + "--threshold", + "0.5", + "--variations", + "2", + "--maximum-iteration-layers", + "2", + "openai", + "--api-key", + "openai-key", + "--model", + "gpt-4.1-nano", + ]) + .expect_err("legacy top-level evaluation command should be rejected"); + + assert_eq!(err.kind(), ErrorKind::InvalidSubcommand); + } + #[test] fn rejects_out_of_range_threshold() { let err = Args::try_parse_from([ "cbl", "--cbl-api-key", "cbl-key", + "eval", "single-turn", "--threshold", "1.5", @@ -184,6 +212,7 @@ mod tests { "cbl", "--cbl-api-key", "cbl-key", + "eval", "single-turn", "--threshold", "0.5", @@ -212,6 +241,7 @@ mod tests { "cbl", "--cbl-api-key", "cbl-key", + "eval", "single-turn", "--threshold", "0.5", @@ -240,6 +270,7 @@ mod tests { "cbl", "--cbl-api-key", "cbl-key", + "eval", "single-turn", "--threshold", "0.5", @@ -256,11 +287,13 @@ mod tests { .expect("zero iteration layers should parse"); #[allow(clippy::match_wildcard_for_single_variants)] - match args.evaluation { - super::EvaluationCommand::SingleTurn { request, .. } => { - assert_eq!(request.maximum_iteration_layers, 0); - } - _ => panic!("expected single-turn command"), + match args.command { + super::Command::Eval { evaluation } => match evaluation { + super::EvaluationCommand::SingleTurn { request, .. } => { + assert_eq!(request.maximum_iteration_layers, 0); + } + _ => panic!("expected single-turn command"), + }, } } @@ -270,6 +303,7 @@ mod tests { "cbl", "--cbl-api-key", "cbl-key", + "eval", "single-turn", "--threshold", "0.5", @@ -298,6 +332,7 @@ mod tests { "cbl", "--cbl-api-key", "cbl-key", + "eval", "multi-turn", "--threshold", "0.5", @@ -326,6 +361,7 @@ mod tests { "cbl", "--cbl-api-key", "cbl-key", + "eval", "multi-turn", "--threshold", "0.5", @@ -354,6 +390,7 @@ mod tests { "cbl", "--cbl-api-key", "cbl-key", + "eval", "multi-turn", "--threshold", "0.5", From 9de6c8e1a7f3dcd2bd54dd0c8d9f8f52aa2d8d34 Mon Sep 17 00:00:00 2001 From: Wyatt Avilla Date: Sat, 4 Apr 2026 17:18:02 -0700 Subject: [PATCH 3/6] exports --- src/cli/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 306ef36..aebdd9f 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -3,4 +3,4 @@ mod args; mod headers; mod version; -pub use args::{Args, EvaluationCommand, ProviderCommand}; +pub use args::{Args, Command, EvaluationCommand, ProviderCommand}; From ae6772a0284789301c536348687045f30f723e98 Mon Sep 17 00:00:00 2001 From: Wyatt Avilla Date: Sat, 4 Apr 2026 17:18:11 -0700 Subject: [PATCH 4/6] use new command enum in `main` --- src/main.rs | 52 +++++++++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/src/main.rs b/src/main.rs index 645bc23..4dba309 100644 --- a/src/main.rs +++ b/src/main.rs @@ -48,7 +48,11 @@ async fn main() -> Result<(), Box> { let headers = cli_args.headers(); - let provider_command = match &cli_args.evaluation { + let evaluation = match &cli_args.command { + cli::Command::Eval { evaluation } => evaluation, + }; + + let provider_command = match evaluation { cli::EvaluationCommand::SingleTurn { provider, .. } | cli::EvaluationCommand::MultiTurn { provider, .. } => provider, }; @@ -67,32 +71,34 @@ async fn main() -> Result<(), Box> { let websocket = websockets::connect( &cli_args.cbl_api_base_url, - (&cli_args.evaluation).into(), + evaluation.into(), &cli_args.cbl_api_key, ) .await?; - match cli_args.evaluation { - cli::EvaluationCommand::SingleTurn { request, .. } => { - run_single_turn_evaluation( - websocket, - provider, - request, - cli_args.log_mode, - cli_args.output_file, - ) - .await?; - } - cli::EvaluationCommand::MultiTurn { request, .. } => { - run_multi_turn_evaluation( - websocket, - provider, - request, - cli_args.log_mode, - cli_args.output_file, - ) - .await?; - } + match cli_args.command { + cli::Command::Eval { evaluation } => match evaluation { + cli::EvaluationCommand::SingleTurn { request, .. } => { + run_single_turn_evaluation( + websocket, + provider, + request, + cli_args.log_mode, + cli_args.output_file, + ) + .await?; + } + cli::EvaluationCommand::MultiTurn { request, .. } => { + run_multi_turn_evaluation( + websocket, + provider, + request, + cli_args.log_mode, + cli_args.output_file, + ) + .await?; + } + }, } Ok(()) From f0d3deb16a9f65415c596c4b2bdc07cae4027e3c Mon Sep 17 00:00:00 2001 From: Wyatt Avilla Date: Sat, 4 Apr 2026 17:18:47 -0700 Subject: [PATCH 5/6] update docs --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index fc65c37..aca170a 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ Try a single-turn evaluation: ```sh -cbl single-turn \ +cbl eval single-turn \ --threshold 0.75 \ --variations 2 \ --maximum-iteration-layers 2 \ @@ -32,7 +32,7 @@ cbl single-turn \ Try a multi-turn evaluation: ```sh -cbl multi-turn \ +cbl eval multi-turn \ --threshold 0.95 \ --max-turns 8 \ --test-types user_persona,semantic_chunks \ @@ -49,17 +49,17 @@ Click [here](mailto:team@circuitbreakerlabs.ai?subject=Getting%20Set%20Up&body=I ### Flags and Options -You can see the available options and flags for `cbl` with `cbl help` or for a subcommand with `cbl help`. +You can see the available options and flags for `cbl` with `cbl help`, for evaluation commands with `cbl eval help`, or for a specific evaluation type with `cbl eval help`. ### Syntax The syntax for `cbl` is: ```sh -cbl --top-level-arg1 --evaluation-arg1 --provider-arg1 +cbl --top-level-arg1 eval --evaluation-arg1 --provider-arg1 ``` -where `` and `` are subcommands. +where `eval`, ``, and `` are subcommands. The available evaluation types are `single-turn` and `multi-turn`. The available providers are `ollama`, `openai`, and `custom`. @@ -70,6 +70,7 @@ The following would run a single-turn evaluation against a custom OpenAI finetun ```sh cbl \ --output-file result.json \ + eval \ single-turn \ # evaluation type --threshold 0.3 \ --variations 3 \ From 233ee628a6ab6baf1c84c0de6dfc0a097d2bbf7a Mon Sep 17 00:00:00 2001 From: Wyatt Avilla Date: Fri, 15 May 2026 13:23:57 -0700 Subject: [PATCH 6/6] fix tests --- src/cli/args.rs | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/src/cli/args.rs b/src/cli/args.rs index f9b0a03..ef1584d 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -143,14 +143,16 @@ mod tests { .expect("single-turn args should parse"); #[allow(clippy::match_wildcard_for_single_variants)] - match args.evaluation { - super::EvaluationCommand::SingleTurn { request, .. } => { - assert!((request.threshold - 0.5).abs() < f32::EPSILON); - assert_eq!(request.variations, 2); - assert_eq!(request.maximum_iteration_layers, 2); - assert_eq!(request.test_case_groups, vec!["suicidal_ideation"]); - } - _ => panic!("expected single-turn command"), + match args.command { + super::Command::Eval { evaluation } => match evaluation { + super::EvaluationCommand::SingleTurn { request, .. } => { + assert!((request.threshold - 0.5).abs() < f32::EPSILON); + assert_eq!(request.variations, 2); + assert_eq!(request.maximum_iteration_layers, 2); + assert_eq!(request.test_case_groups, vec!["suicidal_ideation"]); + } + _ => panic!("expected single-turn command"), + }, } } @@ -215,6 +217,7 @@ mod tests { "cbl", "--cbl-api-key", "cbl-key", + "eval", "single-turn", "--threshold", "-0.1", @@ -373,6 +376,7 @@ mod tests { "cbl", "--cbl-api-key", "cbl-key", + "eval", "single-turn", "--threshold", "0.5", @@ -403,6 +407,7 @@ mod tests { "cbl", "--cbl-api-key", "cbl-key", + "eval", "multi-turn", "--threshold", "0.5", @@ -419,13 +424,15 @@ mod tests { .expect("multi-turn args should parse"); #[allow(clippy::match_wildcard_for_single_variants)] - match args.evaluation { - super::EvaluationCommand::MultiTurn { request, .. } => { - assert!((request.threshold - 0.5).abs() < f32::EPSILON); - assert_eq!(request.max_turns, 4); - assert_eq!(request.test_case_groups, vec!["suicidal_ideation"]); - } - _ => panic!("expected multi-turn command"), + match args.command { + super::Command::Eval { evaluation } => match evaluation { + super::EvaluationCommand::MultiTurn { request, .. } => { + assert!((request.threshold - 0.5).abs() < f32::EPSILON); + assert_eq!(request.max_turns, 4); + assert_eq!(request.test_case_groups, vec!["suicidal_ideation"]); + } + _ => panic!("expected multi-turn command"), + }, } } @@ -493,6 +500,7 @@ mod tests { "cbl", "--cbl-api-key", "cbl-key", + "eval", "multi-turn", "--threshold", "0.5", @@ -521,6 +529,7 @@ mod tests { "cbl", "--cbl-api-key", "cbl-key", + "eval", "single-turn", "--threshold", "0.5", @@ -570,6 +579,7 @@ mod tests { "cbl", "--cbl-api-key", "cbl-key", + "eval", "single-turn", "--threshold", "0.5", @@ -597,6 +607,7 @@ mod tests { "cbl", "--cbl-api-key", "cbl-key", + "eval", "multi-turn", "--threshold", "0.5",