circuitbreakerlabs · wyatt-avilla · May 18, 2026 · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026
diff --git a/README.md b/README.md
@@ -22,7 +22,7 @@
 Try a single-turn evaluation:
 
 ```sh
-cbl single-turn \
+cbl eval single-turn \
     --threshold 0.75 \
     --variations 2 \
     --maximum-iteration-layers 2 \
@@ -33,7 +33,7 @@ cbl single-turn \
 Try a multi-turn evaluation:
 
 ```sh
-cbl multi-turn \
+cbl eval multi-turn \
     --threshold 0.95 \
     --max-turns 8 \
     --test-case-groups suicidal_ideation \
@@ -50,17 +50,17 @@ Click [here](mailto:team@circuitbreakerlabs.ai?subject=Getting%20Set%20Up&body=I
 
 ### Flags and Options
 
-You can see the available options and flags for `cbl` with `cbl help` or for a subcommand with `cbl <subcommand> help`.
+You can see the available options and flags for `cbl` with `cbl help`, for evaluation commands with `cbl eval help`, or for a specific evaluation type with `cbl eval <evaluation_type> help`.
 
 ### Syntax
 
 The syntax for `cbl` is:
 
 ```sh
-cbl --top-level-arg1 <evaluation_type> --evaluation-arg1 <provider> --provider-arg1
+cbl --top-level-arg1 eval <evaluation_type> --evaluation-arg1 <provider> --provider-arg1
 ```
 
-where `<evaluation_type>` and `<provider>` are subcommands.
+where `eval`, `<evaluation_type>`, and `<provider>` are subcommands.
 
 The available evaluation types are `single-turn` and `multi-turn`. The available providers are `ollama`, `openai`, and `custom`.
 
@@ -71,6 +71,7 @@ The following would run a single-turn evaluation against a custom OpenAI finetun
 ```sh
 cbl \
     --output-file result.json \
+    eval \
     single-turn \  # evaluation type
     --threshold 0.3 \
     --variations 3 \

diff --git a/src/cli/args.rs b/src/cli/args.rs
@@ -43,7 +43,7 @@ pub struct Args {
     headers: Vec<Headers>,
 
     #[command(subcommand)]
-    pub evaluation: EvaluationCommand,
+    pub command: Command,
 }
 
 impl Args {
@@ -52,6 +52,15 @@ impl Args {
     }
 }
 
+#[derive(Subcommand, Debug)]
+pub enum Command {
+    /// Run evaluations
+    Eval {
+        #[command(subcommand)]
+        evaluation: EvaluationCommand,
+    },
+}
+
 #[derive(Subcommand, Debug)]
 pub enum EvaluationCommand {
     /// Run single-turn evaluation
@@ -115,6 +124,7 @@ mod tests {
             "cbl",
             "--cbl-api-key",
             "cbl-key",
+            "eval",
             "single-turn",
             "--threshold",
             "0.5",
@@ -133,23 +143,50 @@ mod tests {
         .expect("single-turn args should parse");
 
         #[allow(clippy::match_wildcard_for_single_variants)]
-        match args.evaluation {
-            super::EvaluationCommand::SingleTurn { request, .. } => {
-                assert!((request.threshold - 0.5).abs() < f32::EPSILON);
-                assert_eq!(request.variations, 2);
-                assert_eq!(request.maximum_iteration_layers, 2);
-                assert_eq!(request.test_case_groups, vec!["suicidal_ideation"]);
-            }
-            _ => panic!("expected single-turn command"),
+        match args.command {
+            super::Command::Eval { evaluation } => match evaluation {
+                super::EvaluationCommand::SingleTurn { request, .. } => {
+                    assert!((request.threshold - 0.5).abs() < f32::EPSILON);
+                    assert_eq!(request.variations, 2);
+                    assert_eq!(request.maximum_iteration_layers, 2);
+                    assert_eq!(request.test_case_groups, vec!["suicidal_ideation"]);
+                }
+                _ => panic!("expected single-turn command"),
+            },
         }
     }
 
+    #[test]
+    fn rejects_legacy_top_level_evaluation_commands() {
+        let err = Args::try_parse_from([
+            "cbl",
+            "--cbl-api-key",
+            "cbl-key",
+            "single-turn",
+            "--threshold",
+            "0.5",
+            "--variations",
+            "2",
+            "--maximum-iteration-layers",
+            "2",
+            "openai",
+            "--api-key",
+            "openai-key",
+            "--model",
+            "gpt-4.1-nano",
+        ])
+        .expect_err("legacy top-level evaluation command should be rejected");
+
+        assert_eq!(err.kind(), ErrorKind::InvalidSubcommand);
+    }
+
     #[test]
     fn rejects_out_of_range_threshold() {
         let err = Args::try_parse_from([
             "cbl",
             "--cbl-api-key",
             "cbl-key",
+            "eval",
             "single-turn",
             "--threshold",
             "1.5",
@@ -180,6 +217,7 @@ mod tests {
             "cbl",
             "--cbl-api-key",
             "cbl-key",
+            "eval",
             "single-turn",
             "--threshold",
             "-0.1",
@@ -210,6 +248,7 @@ mod tests {
             "cbl",
             "--cbl-api-key",
             "cbl-key",
+            "eval",
             "single-turn",
             "--threshold",
             "0.5",
@@ -240,6 +279,7 @@ mod tests {
             "cbl",
             "--cbl-api-key",
             "cbl-key",
+            "eval",
             "single-turn",
             "--threshold",
             "0.5",
@@ -270,6 +310,7 @@ mod tests {
             "cbl",
             "--cbl-api-key",
             "cbl-key",
+            "eval",
             "single-turn",
             "--threshold",
             "0.5",
@@ -288,11 +329,13 @@ mod tests {
         .expect("zero iteration layers should parse");
 
         #[allow(clippy::match_wildcard_for_single_variants)]
-        match args.evaluation {
-            super::EvaluationCommand::SingleTurn { request, .. } => {
-                assert_eq!(request.maximum_iteration_layers, 0);
-            }
-            _ => panic!("expected single-turn command"),
+        match args.command {
+            super::Command::Eval { evaluation } => match evaluation {
+                super::EvaluationCommand::SingleTurn { request, .. } => {
+                    assert_eq!(request.maximum_iteration_layers, 0);
+                }
+                _ => panic!("expected single-turn command"),
+            },
         }
     }
 
@@ -302,6 +345,7 @@ mod tests {
             "cbl",
             "--cbl-api-key",
             "cbl-key",
+            "eval",
             "single-turn",
             "--threshold",
             "0.5",
@@ -332,6 +376,7 @@ mod tests {
             "cbl",
             "--cbl-api-key",
             "cbl-key",
+            "eval",
             "single-turn",
             "--threshold",
             "0.5",
@@ -362,6 +407,7 @@ mod tests {
             "cbl",
             "--cbl-api-key",
             "cbl-key",
+            "eval",
             "multi-turn",
             "--threshold",
             "0.5",
@@ -378,13 +424,15 @@ mod tests {
         .expect("multi-turn args should parse");
 
         #[allow(clippy::match_wildcard_for_single_variants)]
-        match args.evaluation {
-            super::EvaluationCommand::MultiTurn { request, .. } => {
-                assert!((request.threshold - 0.5).abs() < f32::EPSILON);
-                assert_eq!(request.max_turns, 4);
-                assert_eq!(request.test_case_groups, vec!["suicidal_ideation"]);
-            }
-            _ => panic!("expected multi-turn command"),
+        match args.command {
+            super::Command::Eval { evaluation } => match evaluation {
+                super::EvaluationCommand::MultiTurn { request, .. } => {
+                    assert!((request.threshold - 0.5).abs() < f32::EPSILON);
+                    assert_eq!(request.max_turns, 4);
+                    assert_eq!(request.test_case_groups, vec!["suicidal_ideation"]);
+                }
+                _ => panic!("expected multi-turn command"),
+            },
         }
     }
 
@@ -394,6 +442,7 @@ mod tests {
             "cbl",
             "--cbl-api-key",
             "cbl-key",
+            "eval",
             "multi-turn",
             "--threshold",
             "0.5",
@@ -422,6 +471,7 @@ mod tests {
             "cbl",
             "--cbl-api-key",
             "cbl-key",
+            "eval",
             "multi-turn",
             "--threshold",
             "0.5",
@@ -450,6 +500,7 @@ mod tests {
             "cbl",
             "--cbl-api-key",
             "cbl-key",
+            "eval",
             "multi-turn",
             "--threshold",
             "0.5",
@@ -478,6 +529,7 @@ mod tests {
             "cbl",
             "--cbl-api-key",
             "cbl-key",
+            "eval",
             "single-turn",
             "--threshold",
             "0.5",
@@ -503,6 +555,7 @@ mod tests {
             "cbl",
             "--cbl-api-key",
             "cbl-key",
+            "eval",
             "multi-turn",
             "--threshold",
             "0.5",
@@ -526,6 +579,7 @@ mod tests {
             "cbl",
             "--cbl-api-key",
             "cbl-key",
+            "eval",
             "single-turn",
             "--threshold",
             "0.5",
@@ -553,6 +607,7 @@ mod tests {
             "cbl",
             "--cbl-api-key",
             "cbl-key",
+            "eval",
             "multi-turn",
             "--threshold",
             "0.5",

diff --git a/src/cli/mod.rs b/src/cli/mod.rs
@@ -3,4 +3,4 @@ mod args;
 mod headers;
 mod version;
 
-pub use args::{Args, EvaluationCommand, ProviderCommand};
+pub use args::{Args, Command, EvaluationCommand, ProviderCommand};
diff --git a/src/main.rs b/src/main.rs
@@ -48,7 +48,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     let headers = cli_args.headers();
 
-    let provider_command = match &cli_args.evaluation {
+    let evaluation = match &cli_args.command {
+        cli::Command::Eval { evaluation } => evaluation,
+    };
+
+    let provider_command = match evaluation {
         cli::EvaluationCommand::SingleTurn { provider, .. }
         | cli::EvaluationCommand::MultiTurn { provider, .. } => provider,
     };
@@ -67,32 +71,34 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     let websocket = websockets::connect(
         &cli_args.cbl_api_base_url,
-        (&cli_args.evaluation).into(),
+        evaluation.into(),
         &cli_args.cbl_api_key,
     )
     .await?;
 
-    match cli_args.evaluation {
-        cli::EvaluationCommand::SingleTurn { request, .. } => {
-            run_single_turn_evaluation(
-                websocket,
-                provider,
-                request,
-                cli_args.log_mode,
-                cli_args.output_file,
-            )
-            .await?;
-        }
-        cli::EvaluationCommand::MultiTurn { request, .. } => {
-            run_multi_turn_evaluation(
-                websocket,
-                provider,
-                request,
-                cli_args.log_mode,
-                cli_args.output_file,
-            )
-            .await?;
-        }
+    match cli_args.command {
+        cli::Command::Eval { evaluation } => match evaluation {
+            cli::EvaluationCommand::SingleTurn { request, .. } => {
+                run_single_turn_evaluation(
+                    websocket,
+                    provider,
+                    request,
+                    cli_args.log_mode,
+                    cli_args.output_file,
+                )
+                .await?;
+            }
+            cli::EvaluationCommand::MultiTurn { request, .. } => {
+                run_multi_turn_evaluation(
+                    websocket,
+                    provider,
+                    request,
+                    cli_args.log_mode,
+                    cli_args.output_file,
+                )
+                .await?;
+            }
+        },
     }
 
     Ok(())