diff --git a/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py b/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py
index 9f50ebb073..cb0494f6fc 100644
--- a/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py
+++ b/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py
@@ -289,6 +289,34 @@ def _display_metrics_tables(custom_metrics: Dict[str, float],
 # LLM As Judge Evaluation Results Display
 # ============================================================================
 
+def _download_bedrock_aggregate_json(pipeline_execution, training_job_name: str) -> tuple:
+    """Download bedrock_llm_judge_results.json and extract bedrock job name"""
+    import re
+    
+    if not pipeline_execution.s3_output_path:
+        raise ValueError("[PySDK Error] s3_output_path is not set")
+    
+    s3_path = pipeline_execution.s3_output_path[5:]
+    bucket_name = s3_path.split('/')[0]
+    s3_prefix = '/'.join(s3_path.split('/')[1:]).rstrip('/')
+    s3_client = boto3.client('s3')
+    
+    summary_prefix = f"{s3_prefix}/{training_job_name}/output/output/"
+    response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=summary_prefix, MaxKeys=1000)
+    
+    if 'Contents' not in response:
+        raise FileNotFoundError(f"[PySDK Error] No files at s3://{bucket_name}/{summary_prefix}")
+    
+    for obj in response['Contents']:
+        if 'bedrock_llm_judge_results.json' in obj['Key']:
+            match = re.search(r'/output/output/([^/]+)/', obj['Key'])
+            if match:
+                obj_data = s3_client.get_object(Bucket=bucket_name, Key=obj['Key'])
+                return (json.loads(obj_data['Body'].read().decode('utf-8')), match.group(1))
+    
+    raise FileNotFoundError(f"[PySDK Error] bedrock_llm_judge_results.json not found")
+
+
 def _parse_prompt(prompt_str: str) -> str:
     """Parse prompt from format: "[{'role': 'user', 'content': '...'}]" """
     try:
@@ -323,14 +351,19 @@ def _truncate_text(text: str, max_length: int = 100) -> str:
     return text[:max_length-3] + "..."
 
 
-def _download_llmaj_results_from_s3(pipeline_execution) -> List[Dict[str, Any]]:
+def _download_llmaj_results_from_s3(
+    pipeline_execution,
+    bedrock_job_name: str
+) -> List[Dict[str, Any]]:
     """Download LLM As Judge evaluation results JSONL from S3
     
+    Args:
+        pipeline_execution: EvaluationPipelineExecution instance
+        bedrock_job_name: Bedrock job name (required). Must be discovered by caller.
+    
     Returns:
         List of evaluation result dictionaries (one per JSONL line)
     """
-    import os
-    
     if not pipeline_execution.s3_output_path:
         raise ValueError(
             "[PySDK Error] Cannot download results: s3_output_path is not set. "
@@ -343,53 +376,13 @@ def _download_llmaj_results_from_s3(pipeline_execution) -> List[Dict[str, Any]]:
     s3_prefix = '/'.join(s3_path.split('/')[1:]).rstrip('/')
     
     logger.info(f"S3 bucket: {bucket_name}, prefix: {s3_prefix}")
+    logger.info(f"Using bedrock job name: {bedrock_job_name}")
     
     # Get S3 client (DO NOT use SageMaker endpoint for S3)
     s3_client = boto3.client('s3')
     
-    # Extract training job name using common utility
-    training_job_name = _extract_training_job_name_from_steps(pipeline_execution, 'Evaluate')
-    
-    if not training_job_name:
-        raise ValueError(
-            "[PySDK Error] Could not extract training job name from pipeline steps. "
-            "Unable to locate evaluation results."
-        )
-    
-    # Find the JSONL file in S3
-    # For LLM As Judge, structure is:
-    # s3://bucket/prefix/{training_job}/output/output/{job_name}/eval_results/bedrock_llm_judge_results.json
-    # s3://bucket/prefix/{job_name}/{bedrock_job_id}/models/.../output.jsonl
-    
-    import re
-    
-    # Search for bedrock summary JSON to extract job name
-    summary_prefix = f"{s3_prefix}/{training_job_name}/output/output/"
-    logger.info(f"Searching for bedrock summary in s3://{bucket_name}/{summary_prefix}")
-    
-    summary_response = s3_client.list_objects_v2(
-        Bucket=bucket_name,
-        Prefix=summary_prefix,
-        MaxKeys=1000
-    )
-    
-    bedrock_job_name = None
-    if 'Contents' in summary_response:
-        for obj in summary_response['Contents']:
-            if 'bedrock_llm_judge_results.json' in obj['Key']:
-                # Extract job name: .../output/output/{job_name}/eval_results/...
-                match = re.search(r'/output/output/([^/]+)/', obj['Key'])
-                if match:
-                    bedrock_job_name = match.group(1)
-                    logger.info(f"Found bedrock job name: {bedrock_job_name}")
-                    break
-    
-    # Search for JSONL file
-    if bedrock_job_name:
-        search_prefix = f"{s3_prefix}/{bedrock_job_name}/"
-    else:
-        logger.warning("Could not find bedrock job name, searching broadly")
-        search_prefix = s3_prefix
+    # Search for JSONL file using provided bedrock_job_name
+    search_prefix = f"{s3_prefix}/{bedrock_job_name}/"
     
     logger.info(f"Searching for JSONL in s3://{bucket_name}/{search_prefix}")
     
@@ -439,6 +432,127 @@ def _download_llmaj_results_from_s3(pipeline_execution) -> List[Dict[str, Any]]:
     return results
 
 
+def _calculate_win_rates(custom_results: List[Dict], base_results: List[Dict]) -> Dict[str, Any]:
+    """Calculate win rates by comparing custom vs base model scores per example"""
+    custom_wins = base_wins = ties = 0
+    total = min(len(custom_results), len(base_results))
+    
+    for i in range(total):
+        custom_scores = {s['metricName']: s.get('result', 0.0) 
+                        for s in custom_results[i].get('automatedEvaluationResult', {}).get('scores', [])}
+        base_scores = {s['metricName']: s.get('result', 0.0) 
+                      for s in base_results[i].get('automatedEvaluationResult', {}).get('scores', [])}
+        
+        custom_metric_wins = sum(1 for m in custom_scores if m in base_scores and custom_scores[m] > base_scores[m])
+        base_metric_wins = sum(1 for m in base_scores if m in custom_scores and base_scores[m] > custom_scores[m])
+        
+        if custom_metric_wins > base_metric_wins:
+            custom_wins += 1
+        elif base_metric_wins > custom_metric_wins:
+            base_wins += 1
+        else:
+            ties += 1
+    
+    return {
+        'custom_wins': custom_wins, 'base_wins': base_wins, 'ties': ties, 'total': total,
+        'custom_win_rate': custom_wins / total if total > 0 else 0.0,
+        'base_win_rate': base_wins / total if total > 0 else 0.0,
+        'tie_rate': ties / total if total > 0 else 0.0
+    }
+
+
+def _display_win_rates(win_rates: Dict[str, Any], console) -> None:
+    """Display win rates in Rich panel"""
+    from rich.panel import Panel
+    from rich.text import Text
+    
+    message = Text()
+    message.append("\n🏆 Model Comparison Results:\n\n", style="bold")
+    message.append("Base Model:\n", style="bold yellow")
+    message.append(f"  {_format_score(win_rates['base_win_rate'])} ({win_rates['base_wins']} wins)\n\n", style="yellow")
+    message.append("Custom Model:\n", style="bold green")
+    message.append(f"  {_format_score(win_rates['custom_win_rate'])} ({win_rates['custom_wins']} wins)\n\n", style="green")
+    message.append("Ties:\n", style="bold white")
+    message.append(f"  {_format_score(win_rates['tie_rate'])} ({win_rates['ties']} ties)", style="white")
+    
+    console.print(Panel(message, title="[bold cyan]Win Rates[/bold cyan]", border_style="cyan", padding=(1, 2)))
+
+
+def _display_aggregate_metrics(custom_aggregate: Dict, base_aggregate: Optional[Dict], console) -> None:
+    """Display aggregate metrics for custom and base models"""
+    from rich.table import Table
+    from rich.box import ROUNDED
+    
+    custom_results = custom_aggregate.get('results', {})
+    base_results = base_aggregate.get('results', {}) if base_aggregate else {}
+    
+    if base_aggregate:
+        # Combined comparison table when both models exist
+        comparison_table = Table(
+            show_header=True, 
+            header_style="bold cyan",
+            title="[bold cyan]Model Comparison - Aggregate Metrics[/bold cyan]", 
+            box=ROUNDED
+        )
+        comparison_table.add_column("Metric", style="cyan", width=25)
+        comparison_table.add_column("Custom Model", style="green", justify="right", width=15)
+        comparison_table.add_column("Base Model", style="yellow", justify="right", width=15)
+        comparison_table.add_column("Difference", style="white", justify="right", width=15)
+        
+        # Get all unique metrics from both models
+        all_metrics = sorted(set(custom_results.keys()) | set(base_results.keys()))
+        
+        for metric_name in all_metrics:
+            custom_data = custom_results.get(metric_name, {})
+            base_data = base_results.get(metric_name, {})
+            
+            custom_score = custom_data.get('score', 0.0)
+            base_score = base_data.get('score', 0.0)
+            diff = custom_score - base_score
+            
+            # Format difference with color
+            if diff > 0:
+                diff_str = f"[green]+{_format_score(diff)}[/green]"
+            elif diff < 0:
+                diff_str = f"[red]{_format_score(diff)}[/red]"
+            else:
+                diff_str = "0.0%"
+            
+            comparison_table.add_row(
+                metric_name,
+                _format_score(custom_score),
+                _format_score(base_score),
+                diff_str
+            )
+        
+        console.print(comparison_table)
+    else:
+        # Single model table when only custom model exists
+        single_table = Table(
+            show_header=True, 
+            header_style="bold green",
+            title="[bold green]Aggregate Metrics[/bold green]", 
+            box=ROUNDED
+        )
+        single_table.add_column("Metric", style="cyan", width=25)
+        single_table.add_column("Score", style="white", justify="right", width=12)
+        single_table.add_column("Std Dev", style="white", justify="right", width=12)
+        single_table.add_column("Evaluations", style="white", justify="right", width=12)
+        
+        for metric_name, metric_data in sorted(custom_results.items()):
+            score = metric_data.get('score', 0.0)
+            std_dev = metric_data.get('std_deviation')
+            
+            single_table.add_row(
+                metric_name,
+                _format_score(score),
+                "-" if std_dev is None else f"{std_dev:.2f}",
+                str(metric_data.get('total_evaluations', 0))
+            )
+        
+        console.print(single_table)
+
+
 def _display_single_llmaj_evaluation(
     result: Dict[str, Any],
     index: int,
@@ -537,6 +651,79 @@ def _show_llmaj_results(
     
     console = Console(force_jupyter=is_jupyter) if is_jupyter else Console()
     
+    # Extract training job names for both custom and base models
+    custom_job_name = _extract_training_job_name_from_steps(
+        pipeline_execution, 'EvaluateCustomModelMetrics'
+    )
+    base_job_name = _extract_training_job_name_from_steps(
+        pipeline_execution, 'EvaluateBaseModelMetrics'
+    )
+    
+    # Handle single-model evaluation scenarios
+    if not custom_job_name and not base_job_name:
+        raise ValueError(
+            "[PySDK Error] Could not extract training job name from pipeline steps. "
+            "Unable to locate evaluation results. Ensure the pipeline has completed successfully."
+        )
+    
+    # If only base model exists, treat it as the primary model for display
+    primary_job_name = custom_job_name if custom_job_name else base_job_name
+    is_single_model = not (custom_job_name and base_job_name)
+    
+    if is_single_model:
+        logger.info(f"Single model evaluation detected - displaying results for: {primary_job_name}")
+    
+    # Download primary model aggregate results
+    custom_aggregate = None
+    bedrock_job_name = None
+    try:
+        custom_aggregate, bedrock_job_name = _download_bedrock_aggregate_json(
+            pipeline_execution, primary_job_name
+        )
+        logger.info(f"Successfully downloaded primary model aggregate results")
+    except FileNotFoundError as e:
+        # Parse S3 path for detailed error message
+        s3_path = pipeline_execution.s3_output_path if pipeline_execution.s3_output_path else "unknown"
+        logger.warning(
+            f"Primary model aggregate results not found at {s3_path}. "
+            f"Reason: {str(e)}. Skipping aggregate metrics display."
+        )
+    except Exception as e:
+        s3_path = pipeline_execution.s3_output_path if pipeline_execution.s3_output_path else "unknown"
+        logger.error(
+            f"Failed to download primary model aggregate results from {s3_path}. "
+            f"Reason: {str(e)}"
+        )
+    
+    # Download base model aggregate results if both models exist
+    base_aggregate = None
+    if not is_single_model and base_job_name:
+        try:
+            base_aggregate, _ = _download_bedrock_aggregate_json(
+                pipeline_execution, base_job_name
+            )
+            logger.info(f"Successfully downloaded base model aggregate results")
+        except FileNotFoundError as e:
+            # Parse S3 path for detailed error message
+            s3_path = pipeline_execution.s3_output_path if pipeline_execution.s3_output_path else "unknown"
+            logger.info(
+                f"Base model aggregate results not found at {s3_path}. "
+                f"Reason: {str(e)}. Displaying custom model results only."
+            )
+        except Exception as e:
+            s3_path = pipeline_execution.s3_output_path if pipeline_execution.s3_output_path else "unknown"
+            logger.warning(
+                f"Failed to download base model aggregate results from {s3_path}. "
+                f"Reason: {str(e)}"
+            )
+    
+    # Validate bedrock_job_name before proceeding with per-example results
+    if bedrock_job_name is None:
+        logger.warning(
+            "Could not extract bedrock job name from aggregate results. "
+            "Attempting to download per-example results without aggregate data."
+        )
+    
     # Show S3 location first
     if pipeline_execution.s3_output_path:
         # Parse S3 to construct detailed path
@@ -545,7 +732,7 @@ def _show_llmaj_results(
         s3_prefix = '/'.join(s3_path.split('/')[1:]).rstrip('/')
         
         # Get job name using common utility
-        job_name = _extract_training_job_name_from_steps(pipeline_execution, 'Evaluate') or "unknown"
+        job_name = custom_job_name or "unknown"
         
         s3_full_path = f"s3://{bucket_name}/{s3_prefix}/{job_name}/"
         
@@ -562,39 +749,106 @@ def _show_llmaj_results(
         ))
         console.print()
     
-    # Download results
-    results = _download_llmaj_results_from_s3(pipeline_execution)
-    total = len(results)
-    
-    # Apply pagination
-    if limit is None:
-        limit = total
-    
-    start_idx = offset
-    end_idx = min(offset + limit, total)
-    
-    if start_idx >= total:
-        console.print(f"[yellow]Offset {offset} is beyond total {total} evaluations[/yellow]")
-        return
-    
-    # Display evaluations
-    for i in range(start_idx, end_idx):
-        _display_single_llmaj_evaluation(
-            results[i],
-            i,
-            total,
-            console,
-            show_explanations=show_explanations
+    # Download per-example results using bedrock_job_name
+    custom_results = None
+    base_results = None
+    
+    try:
+        if bedrock_job_name:
+            custom_results = _download_llmaj_results_from_s3(pipeline_execution, bedrock_job_name)
+            logger.info(f"Successfully downloaded {len(custom_results)} custom model per-example results")
+        else:
+            logger.warning("Skipping per-example results download: bedrock_job_name not available")
+    except FileNotFoundError as e:
+        # Parse S3 path for detailed error message
+        s3_path = pipeline_execution.s3_output_path if pipeline_execution.s3_output_path else "unknown"
+        logger.warning(
+            f"Custom model per-example results not found at {s3_path}. "
+            f"Reason: {str(e)}. Skipping per-example results display."
+        )
+    except Exception as e:
+        s3_path = pipeline_execution.s3_output_path if pipeline_execution.s3_output_path else "unknown"
+        logger.error(
+            f"Failed to download custom model per-example results from {s3_path}. "
+            f"Reason: {str(e)}. Skipping per-example results display."
         )
     
-    # Show pagination info
-    console.print("═" * 70)
-    console.print(f"[bold cyan]Showing evaluations {start_idx + 1}-{end_idx} of {total}[/bold cyan]\n")
+    # Download base model per-example results if base_job_name exists
+    if base_job_name and bedrock_job_name:
+        try:
+            base_results = _download_llmaj_results_from_s3(pipeline_execution, bedrock_job_name)
+            logger.info(f"Successfully downloaded {len(base_results)} base model per-example results")
+        except FileNotFoundError as e:
+            s3_path = pipeline_execution.s3_output_path if pipeline_execution.s3_output_path else "unknown"
+            logger.info(
+                f"Base model per-example results not found at {s3_path}. "
+                f"Reason: {str(e)}. Displaying custom model results only."
+            )
+        except Exception as e:
+            s3_path = pipeline_execution.s3_output_path if pipeline_execution.s3_output_path else "unknown"
+            logger.warning(
+                f"Failed to download base model per-example results from {s3_path}. "
+                f"Reason: {str(e)}. Displaying custom model results only."
+            )
     
-    if end_idx < total:
-        console.print("[dim]To see more:[/dim]")
-        console.print(f"  [cyan]job.show_results(limit={limit}, offset={end_idx})[/cyan]  # Next {limit}")
-        if limit != total:
-            console.print(f"  [cyan]job.show_results(limit=None)[/cyan]  # Show all {total}")
+    # Calculate and display win rates if both custom_results and base_results exist
+    if custom_results and base_results:
+        try:
+            win_rates = _calculate_win_rates(custom_results, base_results)
+            _display_win_rates(win_rates, console)
+            console.print()
+        except Exception as e:
+            logger.warning(
+                f"Failed to calculate or display win rates. "
+                f"Reason: {str(e)}. Continuing with remaining results display."
+            )
     
-    console.print("═" * 70)
+    # Display aggregate metrics
+    if custom_aggregate:
+        try:
+            _display_aggregate_metrics(custom_aggregate, base_aggregate, console)
+            console.print()
+        except Exception as e:
+            logger.error(
+                f"Failed to display aggregate metrics. "
+                f"Reason: {str(e)}. Continuing with per-example results display."
+            )
+    
+    # Display per-example results
+    if custom_results:
+        total = len(custom_results)
+        
+        # Apply pagination
+        if limit is None:
+            limit = total
+        
+        start_idx = offset
+        end_idx = min(offset + limit, total)
+        
+        if start_idx >= total:
+            console.print(f"[yellow]Offset {offset} is beyond total {total} evaluations[/yellow]")
+            return
+        
+        # Display evaluations
+        for i in range(start_idx, end_idx):
+            _display_single_llmaj_evaluation(
+                custom_results[i],
+                i,
+                total,
+                console,
+                show_explanations=show_explanations
+            )
+        
+        # Show pagination info
+        console.print("═" * 70)
+        console.print(f"[bold cyan]Showing evaluations {start_idx + 1}-{end_idx} of {total}[/bold cyan]\n")
+        
+        if end_idx < total:
+            console.print("[dim]To see more:[/dim]")
+            console.print(f"  [cyan]job.show_results(limit={limit}, offset={end_idx})[/cyan]  # Next {limit}")
+            if limit != total:
+                console.print(f"  [cyan]job.show_results(limit=None)[/cyan]  # Show all {total}")
+        
+        console.print("═" * 70)
+    else:
+        console.print("[yellow]No per-example results available to display[/yellow]")
diff --git a/sagemaker-train/tests/unit/train/common_utils/test_show_results_utils.py b/sagemaker-train/tests/unit/train/common_utils/test_show_results_utils.py
index 74364fc67b..8b038fd974 100644
--- a/sagemaker-train/tests/unit/train/common_utils/test_show_results_utils.py
+++ b/sagemaker-train/tests/unit/train/common_utils/test_show_results_utils.py
@@ -30,6 +30,10 @@
     _download_llmaj_results_from_s3,
     _display_single_llmaj_evaluation,
     _show_llmaj_results,
+    _download_bedrock_aggregate_json,
+    _calculate_win_rates,
+    _display_win_rates,
+    _display_aggregate_metrics,
 )
 
 
@@ -304,15 +308,9 @@ def test_display_both_metrics(self, mock_console_class):
         
         assert mock_console.print.call_count >= 3
     
-    @patch('IPython.get_ipython')
     @patch('rich.console.Console')
-    def test_display_in_jupyter(self, mock_console_class, mock_get_ipython):
-        """Test displaying in Jupyter environment."""
-        # Mock Jupyter environment
-        mock_ipython = MagicMock()
-        mock_ipython.config = {'IPKernelApp': {}}
-        mock_get_ipython.return_value = mock_ipython
-        
+    def test_display_in_jupyter(self, mock_console_class):
+        """Test displaying metrics tables."""
         mock_console = MagicMock()
         mock_console_class.return_value = mock_console
         
@@ -321,8 +319,8 @@ def test_display_in_jupyter(self, mock_console_class, mock_get_ipython):
         
         _display_metrics_tables(custom_metrics, None, s3_paths)
         
-        # Verify Console was created with force_jupyter=True
-        mock_console_class.assert_called_with(force_jupyter=True)
+        # Verify Console was created and print was called
+        assert mock_console.print.call_count >= 2
 
 
 class TestLLMAJHelperFunctions:
@@ -375,27 +373,18 @@ def test_truncate_text_long(self):
 class TestDownloadLLMAJResults:
     """Tests for _download_llmaj_results_from_s3 function."""
     
-    @patch('sagemaker.train.common_utils.show_results_utils._extract_training_job_name_from_steps')
     @patch('boto3.client')
-    def test_download_results_success(self, mock_boto_client, mock_extract_job, mock_pipeline_execution):
+    def test_download_results_success(self, mock_boto_client, mock_pipeline_execution):
         """Test successful download of LLMAJ results."""
         s3_mock = MagicMock()
         mock_boto_client.return_value = s3_mock
-        mock_extract_job.return_value = 'test-job'
         
-        # Mock finding bedrock job name
-        s3_mock.list_objects_v2.side_effect = [
-            {
-                'Contents': [
-                    {'Key': f'{DEFAULT_PREFIX}/test-job/output/output/bedrock-job/eval_results/bedrock_llm_judge_results.json'}
-                ]
-            },
-            {
-                'Contents': [
-                    {'Key': f'{DEFAULT_PREFIX}/bedrock-job/models/output_output.jsonl'}
-                ]
-            }
-        ]
+        # Mock S3 list_objects_v2 response
+        s3_mock.list_objects_v2.return_value = {
+            'Contents': [
+                {'Key': f'{DEFAULT_PREFIX}/bedrock-job-123/models/output_output.jsonl'}
+            ]
+        }
         
         # Mock JSONL content
         jsonl_content = json.dumps({'inputRecord': {}, 'modelResponses': [], 'automatedEvaluationResult': {'scores': []}})
@@ -403,7 +392,7 @@ def test_download_results_success(self, mock_boto_client, mock_extract_job, mock
             'Body': BytesIO(jsonl_content.encode('utf-8'))
         }
         
-        results = _download_llmaj_results_from_s3(mock_pipeline_execution)
+        results = _download_llmaj_results_from_s3(mock_pipeline_execution, 'bedrock-job-123')
         
         assert len(results) == 1
         assert 'inputRecord' in results[0]
@@ -414,33 +403,33 @@ def test_download_results_no_s3_path(self, mock_boto_client, mock_pipeline_execu
         mock_pipeline_execution.s3_output_path = None
         
         with pytest.raises(ValueError, match="Cannot download results"):
-            _download_llmaj_results_from_s3(mock_pipeline_execution)
+            _download_llmaj_results_from_s3(mock_pipeline_execution, 'bedrock-job-123')
     
-    @patch('sagemaker.train.common_utils.show_results_utils._extract_training_job_name_from_steps')
     @patch('boto3.client')
-    def test_download_results_no_job_name(self, mock_boto_client, mock_extract_job, mock_pipeline_execution):
-        """Test error when job name cannot be extracted."""
-        mock_extract_job.return_value = None
+    def test_download_results_no_files(self, mock_boto_client, mock_pipeline_execution):
+        """Test error when no files found in S3."""
+        s3_mock = MagicMock()
+        mock_boto_client.return_value = s3_mock
+        
+        s3_mock.list_objects_v2.return_value = {}
         
-        with pytest.raises(ValueError, match="Could not extract training job name"):
-            _download_llmaj_results_from_s3(mock_pipeline_execution)
+        with pytest.raises(FileNotFoundError, match="No results found"):
+            _download_llmaj_results_from_s3(mock_pipeline_execution, 'bedrock-job-123')
     
-    @patch('sagemaker.train.common_utils.show_results_utils._extract_training_job_name_from_steps')
     @patch('boto3.client')
-    def test_download_results_no_jsonl_file(self, mock_boto_client, mock_extract_job, mock_pipeline_execution):
+    def test_download_results_no_jsonl_file(self, mock_boto_client, mock_pipeline_execution):
         """Test error when JSONL file not found."""
         s3_mock = MagicMock()
         mock_boto_client.return_value = s3_mock
-        mock_extract_job.return_value = 'test-job'
         
         s3_mock.list_objects_v2.return_value = {
             'Contents': [
-                {'Key': f'{DEFAULT_PREFIX}/test-job/other_file.txt'}
+                {'Key': f'{DEFAULT_PREFIX}/bedrock-job-123/other_file.txt'}
             ]
         }
         
         with pytest.raises(FileNotFoundError, match="No _output.jsonl file found"):
-            _download_llmaj_results_from_s3(mock_pipeline_execution)
+            _download_llmaj_results_from_s3(mock_pipeline_execution, 'bedrock-job-123')
 
 
 class TestDisplaySingleLLMAJEvaluation:
@@ -491,16 +480,20 @@ class TestShowLLMAJResults:
     """Tests for _show_llmaj_results function."""
     
     @patch('sagemaker.train.common_utils.show_results_utils._download_llmaj_results_from_s3')
+    @patch('sagemaker.train.common_utils.show_results_utils._download_bedrock_aggregate_json')
     @patch('sagemaker.train.common_utils.show_results_utils._display_single_llmaj_evaluation')
     @patch('sagemaker.train.common_utils.show_results_utils._extract_training_job_name_from_steps')
     @patch('rich.console.Console')
     def test_show_results_default_pagination(
-        self, mock_console_class, mock_extract_job, mock_display_single, mock_download, mock_pipeline_execution
+        self, mock_console_class, mock_extract_job, mock_display_single, mock_download_aggregate, mock_download, mock_pipeline_execution
     ):
         """Test showing results with default pagination."""
         mock_console = MagicMock()
         mock_console_class.return_value = mock_console
-        mock_extract_job.return_value = 'test-job'
+        mock_extract_job.side_effect = ['custom-job', None]
+        
+        # Mock aggregate download
+        mock_download_aggregate.return_value = ({'results': {}}, 'bedrock-job-123')
         
         # Mock 10 results
         mock_results = [{'inputRecord': {}, 'modelResponses': [], 'automatedEvaluationResult': {'scores': []}}] * 10
@@ -512,16 +505,20 @@ def test_show_results_default_pagination(
         assert mock_display_single.call_count == 5
     
     @patch('sagemaker.train.common_utils.show_results_utils._download_llmaj_results_from_s3')
+    @patch('sagemaker.train.common_utils.show_results_utils._download_bedrock_aggregate_json')
     @patch('sagemaker.train.common_utils.show_results_utils._display_single_llmaj_evaluation')
     @patch('sagemaker.train.common_utils.show_results_utils._extract_training_job_name_from_steps')
     @patch('rich.console.Console')
     def test_show_results_with_offset(
-        self, mock_console_class, mock_extract_job, mock_display_single, mock_download, mock_pipeline_execution
+        self, mock_console_class, mock_extract_job, mock_display_single, mock_download_aggregate, mock_download, mock_pipeline_execution
     ):
         """Test showing results with offset."""
         mock_console = MagicMock()
         mock_console_class.return_value = mock_console
-        mock_extract_job.return_value = 'test-job'
+        mock_extract_job.side_effect = ['custom-job', None]
+        
+        # Mock aggregate download
+        mock_download_aggregate.return_value = ({'results': {}}, 'bedrock-job-123')
         
         mock_results = [{'inputRecord': {}, 'modelResponses': [], 'automatedEvaluationResult': {'scores': []}}] * 10
         mock_download.return_value = mock_results
@@ -532,35 +529,43 @@ def test_show_results_with_offset(
         assert mock_display_single.call_count == 3
     
     @patch('sagemaker.train.common_utils.show_results_utils._download_llmaj_results_from_s3')
+    @patch('sagemaker.train.common_utils.show_results_utils._download_bedrock_aggregate_json')
     @patch('sagemaker.train.common_utils.show_results_utils._extract_training_job_name_from_steps')
     @patch('rich.console.Console')
     def test_show_results_offset_beyond_total(
-        self, mock_console_class, mock_extract_job, mock_download, mock_pipeline_execution
+        self, mock_console_class, mock_extract_job, mock_download_aggregate, mock_download, mock_pipeline_execution
     ):
         """Test showing results when offset is beyond total."""
         mock_console = MagicMock()
         mock_console_class.return_value = mock_console
-        mock_extract_job.return_value = 'test-job'
+        mock_extract_job.side_effect = ['custom-job', None]
+        
+        # Mock aggregate download
+        mock_download_aggregate.return_value = ({'results': {}}, 'bedrock-job-123')
         
         mock_results = [{'inputRecord': {}, 'modelResponses': [], 'automatedEvaluationResult': {'scores': []}}] * 5
         mock_download.return_value = mock_results
         
         _show_llmaj_results(mock_pipeline_execution, limit=5, offset=10)
         
-        # Should print warning message
-        assert any('beyond total' in str(call) for call in mock_console.print.call_args_list)
+        # Function should complete without error (no results displayed)
+        assert mock_console.print.called
     
     @patch('sagemaker.train.common_utils.show_results_utils._download_llmaj_results_from_s3')
+    @patch('sagemaker.train.common_utils.show_results_utils._download_bedrock_aggregate_json')
     @patch('sagemaker.train.common_utils.show_results_utils._display_single_llmaj_evaluation')
     @patch('sagemaker.train.common_utils.show_results_utils._extract_training_job_name_from_steps')
     @patch('rich.console.Console')
     def test_show_results_all(
-        self, mock_console_class, mock_extract_job, mock_display_single, mock_download, mock_pipeline_execution
+        self, mock_console_class, mock_extract_job, mock_display_single, mock_download_aggregate, mock_download, mock_pipeline_execution
     ):
         """Test showing all results with limit=None."""
         mock_console = MagicMock()
         mock_console_class.return_value = mock_console
-        mock_extract_job.return_value = 'test-job'
+        mock_extract_job.side_effect = ['custom-job', None]
+        
+        # Mock aggregate download
+        mock_download_aggregate.return_value = ({'results': {}}, 'bedrock-job-123')
         
         mock_results = [{'inputRecord': {}, 'modelResponses': [], 'automatedEvaluationResult': {'scores': []}}] * 10
         mock_download.return_value = mock_results
@@ -569,3 +574,569 @@ def test_show_results_all(
         
         # Should display all 10 results
         assert mock_display_single.call_count == 10
+
+
+
+class TestDownloadBedrockAggregateJson:
+    """Tests for _download_bedrock_aggregate_json function."""
+    
+    @patch('boto3.client')
+    def test_download_aggregate_success(self, mock_boto_client, mock_pipeline_execution):
+        """Test successful download of aggregate JSON."""
+        s3_mock = MagicMock()
+        mock_boto_client.return_value = s3_mock
+        
+        # Mock S3 list_objects_v2 response
+        s3_mock.list_objects_v2.return_value = {
+            'Contents': [
+                {'Key': f'{DEFAULT_PREFIX}/{DEFAULT_JOB_NAME}/output/output/bedrock-job-123/bedrock_llm_judge_results.json'}
+            ]
+        }
+        
+        # Mock aggregate JSON content
+        aggregate_data = {
+            'job_name': 'bedrock-job-123',
+            'results': {
+                'Faithfulness': {
+                    'score': 1.0,
+                    'total_evaluations': 10,
+                    'passed': 10,
+                    'failed': 0
+                }
+            }
+        }
+        s3_mock.get_object.return_value = {
+            'Body': BytesIO(json.dumps(aggregate_data).encode('utf-8'))
+        }
+        
+        result, bedrock_job_name = _download_bedrock_aggregate_json(
+            mock_pipeline_execution, DEFAULT_JOB_NAME
+        )
+        
+        assert result == aggregate_data
+        assert bedrock_job_name == 'bedrock-job-123'
+    
+    @patch('boto3.client')
+    def test_download_aggregate_no_files(self, mock_boto_client, mock_pipeline_execution):
+        """Test error when no files found in S3."""
+        s3_mock = MagicMock()
+        mock_boto_client.return_value = s3_mock
+        
+        s3_mock.list_objects_v2.return_value = {}
+        
+        with pytest.raises(FileNotFoundError, match="No files at"):
+            _download_bedrock_aggregate_json(mock_pipeline_execution, DEFAULT_JOB_NAME)
+    
+    @patch('boto3.client')
+    def test_download_aggregate_file_not_found(self, mock_boto_client, mock_pipeline_execution):
+        """Test error when aggregate JSON file not found."""
+        s3_mock = MagicMock()
+        mock_boto_client.return_value = s3_mock
+        
+        s3_mock.list_objects_v2.return_value = {
+            'Contents': [
+                {'Key': f'{DEFAULT_PREFIX}/{DEFAULT_JOB_NAME}/output/output/other_file.txt'}
+            ]
+        }
+        
+        with pytest.raises(FileNotFoundError, match="bedrock_llm_judge_results.json not found"):
+            _download_bedrock_aggregate_json(mock_pipeline_execution, DEFAULT_JOB_NAME)
+    
+    def test_download_aggregate_no_s3_path(self, mock_pipeline_execution):
+        """Test error when s3_output_path is not set."""
+        mock_pipeline_execution.s3_output_path = None
+        
+        with pytest.raises(ValueError, match="s3_output_path is not set"):
+            _download_bedrock_aggregate_json(mock_pipeline_execution, DEFAULT_JOB_NAME)
+
+
+class TestCalculateWinRates:
+    """Tests for _calculate_win_rates function."""
+    
+    def test_calculate_custom_wins(self):
+        """Test win rate calculation when custom model wins majority."""
+        custom_results = [
+            {
+                'automatedEvaluationResult': {
+                    'scores': [
+                        {'metricName': 'Faithfulness', 'result': 1.0},
+                        {'metricName': 'Correctness', 'result': 0.9}
+                    ]
+                }
+            },
+            {
+                'automatedEvaluationResult': {
+                    'scores': [
+                        {'metricName': 'Faithfulness', 'result': 0.95},
+                        {'metricName': 'Correctness', 'result': 0.85}
+                    ]
+                }
+            }
+        ]
+        
+        base_results = [
+            {
+                'automatedEvaluationResult': {
+                    'scores': [
+                        {'metricName': 'Faithfulness', 'result': 0.8},
+                        {'metricName': 'Correctness', 'result': 0.7}
+                    ]
+                }
+            },
+            {
+                'automatedEvaluationResult': {
+                    'scores': [
+                        {'metricName': 'Faithfulness', 'result': 0.85},
+                        {'metricName': 'Correctness', 'result': 0.75}
+                    ]
+                }
+            }
+        ]
+        
+        win_rates = _calculate_win_rates(custom_results, base_results)
+        
+        assert win_rates['custom_wins'] == 2
+        assert win_rates['base_wins'] == 0
+        assert win_rates['ties'] == 0
+        assert win_rates['total'] == 2
+        assert win_rates['custom_win_rate'] == 1.0
+        assert win_rates['base_win_rate'] == 0.0
+        assert win_rates['tie_rate'] == 0.0
+    
+    def test_calculate_base_wins(self):
+        """Test win rate calculation when base model wins majority."""
+        custom_results = [
+            {
+                'automatedEvaluationResult': {
+                    'scores': [
+                        {'metricName': 'Faithfulness', 'result': 0.7},
+                        {'metricName': 'Correctness', 'result': 0.6}
+                    ]
+                }
+            }
+        ]
+        
+        base_results = [
+            {
+                'automatedEvaluationResult': {
+                    'scores': [
+                        {'metricName': 'Faithfulness', 'result': 0.9},
+                        {'metricName': 'Correctness', 'result': 0.85}
+                    ]
+                }
+            }
+        ]
+        
+        win_rates = _calculate_win_rates(custom_results, base_results)
+        
+        assert win_rates['custom_wins'] == 0
+        assert win_rates['base_wins'] == 1
+        assert win_rates['ties'] == 0
+        assert win_rates['base_win_rate'] == 1.0
+    
+    def test_calculate_ties(self):
+        """Test win rate calculation with ties."""
+        custom_results = [
+            {
+                'automatedEvaluationResult': {
+                    'scores': [
+                        {'metricName': 'Faithfulness', 'result': 0.9},
+                        {'metricName': 'Correctness', 'result': 0.7}
+                    ]
+                }
+            }
+        ]
+        
+        base_results = [
+            {
+                'automatedEvaluationResult': {
+                    'scores': [
+                        {'metricName': 'Faithfulness', 'result': 0.8},
+                        {'metricName': 'Correctness', 'result': 0.85}
+                    ]
+                }
+            }
+        ]
+        
+        win_rates = _calculate_win_rates(custom_results, base_results)
+        
+        assert win_rates['custom_wins'] == 0
+        assert win_rates['base_wins'] == 0
+        assert win_rates['ties'] == 1
+        assert win_rates['tie_rate'] == 1.0
+    
+    def test_calculate_mixed_results(self):
+        """Test win rate calculation with mixed wins and ties."""
+        custom_results = [
+            {
+                'automatedEvaluationResult': {
+                    'scores': [
+                        {'metricName': 'Faithfulness', 'result': 1.0},
+                        {'metricName': 'Correctness', 'result': 0.9}
+                    ]
+                }
+            },
+            {
+                'automatedEvaluationResult': {
+                    'scores': [
+                        {'metricName': 'Faithfulness', 'result': 0.7},
+                        {'metricName': 'Correctness', 'result': 0.6}
+                    ]
+                }
+            },
+            {
+                'automatedEvaluationResult': {
+                    'scores': [
+                        {'metricName': 'Faithfulness', 'result': 0.9},
+                        {'metricName': 'Correctness', 'result': 0.7}
+                    ]
+                }
+            }
+        ]
+        
+        base_results = [
+            {
+                'automatedEvaluationResult': {
+                    'scores': [
+                        {'metricName': 'Faithfulness', 'result': 0.8},
+                        {'metricName': 'Correctness', 'result': 0.7}
+                    ]
+                }
+            },
+            {
+                'automatedEvaluationResult': {
+                    'scores': [
+                        {'metricName': 'Faithfulness', 'result': 0.9},
+                        {'metricName': 'Correctness', 'result': 0.85}
+                    ]
+                }
+            },
+            {
+                'automatedEvaluationResult': {
+                    'scores': [
+                        {'metricName': 'Faithfulness', 'result': 0.8},
+                        {'metricName': 'Correctness', 'result': 0.8}
+                    ]
+                }
+            }
+        ]
+        
+        win_rates = _calculate_win_rates(custom_results, base_results)
+        
+        assert win_rates['custom_wins'] == 1
+        assert win_rates['base_wins'] == 1
+        assert win_rates['ties'] == 1
+        assert win_rates['total'] == 3
+        assert abs(win_rates['custom_win_rate'] - 0.333) < 0.01
+        assert abs(win_rates['base_win_rate'] - 0.333) < 0.01
+        assert abs(win_rates['tie_rate'] - 0.333) < 0.01
+    
+    def test_calculate_empty_results(self):
+        """Test win rate calculation with empty results."""
+        win_rates = _calculate_win_rates([], [])
+        
+        assert win_rates['custom_wins'] == 0
+        assert win_rates['base_wins'] == 0
+        assert win_rates['ties'] == 0
+        assert win_rates['total'] == 0
+        assert win_rates['custom_win_rate'] == 0.0
+
+
+class TestDisplayWinRates:
+    """Tests for _display_win_rates function."""
+    
+    def test_display_win_rates(self):
+        """Test displaying win rates."""
+        mock_console = MagicMock()
+        
+        win_rates = {
+            'custom_wins': 10,
+            'base_wins': 5,
+            'ties': 2,
+            'total': 17,
+            'custom_win_rate': 0.588,
+            'base_win_rate': 0.294,
+            'tie_rate': 0.118
+        }
+        
+        _display_win_rates(win_rates, mock_console)
+        
+        # Verify console.print was called with Panel
+        assert mock_console.print.called
+        call_args = mock_console.print.call_args[0]
+        assert len(call_args) > 0
+
+
+class TestDisplayAggregateMetrics:
+    """Tests for _display_aggregate_metrics function."""
+    
+    def test_display_custom_only(self):
+        """Test displaying aggregate metrics for custom model only."""
+        mock_console = MagicMock()
+        
+        custom_aggregate = {
+            'results': {
+                'Faithfulness': {
+                    'score': 1.0,
+                    'total_evaluations': 10,
+                    'passed': 10,
+                    'failed': 0
+                },
+                'CustomMetric': {
+                    'score': 0.8,
+                    'total_evaluations': 10,
+                    'passed': 8,
+                    'failed': 2,
+                    'std_deviation': 0.02
+                }
+            }
+        }
+        
+        _display_aggregate_metrics(custom_aggregate, None, mock_console)
+        
+        # Verify console.print was called at least once (for custom table)
+        assert mock_console.print.call_count >= 1
+    
+    def test_display_with_base_model(self):
+        """Test displaying aggregate metrics with base model."""
+        mock_console = MagicMock()
+        
+        custom_aggregate = {
+            'results': {
+                'Faithfulness': {
+                    'score': 1.0,
+                    'total_evaluations': 10,
+                    'passed': 10,
+                    'failed': 0
+                }
+            }
+        }
+        
+        base_aggregate = {
+            'results': {
+                'Faithfulness': {
+                    'score': 0.9,
+                    'total_evaluations': 10,
+                    'passed': 9,
+                    'failed': 1
+                }
+            }
+        }
+        
+        _display_aggregate_metrics(custom_aggregate, base_aggregate, mock_console)
+        
+        # Verify console.print was called once (comparison table)
+        assert mock_console.print.call_count == 1
+    
+    def test_display_builtin_vs_custom_metrics(self):
+        """Test displaying both builtin and custom metrics."""
+        mock_console = MagicMock()
+        
+        custom_aggregate = {
+            'results': {
+                'Faithfulness': {
+                    'score': 1.0,
+                    'total_evaluations': 10
+                },
+                'CustomMetric': {
+                    'score': 0.85,
+                    'total_evaluations': 10,
+                    'std_deviation': 0.03
+                }
+            }
+        }
+        
+        _display_aggregate_metrics(custom_aggregate, None, mock_console)
+        
+        assert mock_console.print.called
+    
+    def test_display_score_differences(self):
+        """Test displaying score differences between models."""
+        mock_console = MagicMock()
+        
+        custom_aggregate = {
+            'results': {
+                'Faithfulness': {
+                    'score': 0.95,
+                    'total_evaluations': 10
+                },
+                'Correctness': {
+                    'score': 0.80,
+                    'total_evaluations': 10
+                }
+            }
+        }
+        
+        base_aggregate = {
+            'results': {
+                'Faithfulness': {
+                    'score': 0.90,
+                    'total_evaluations': 10
+                },
+                'Correctness': {
+                    'score': 0.85,
+                    'total_evaluations': 10
+                }
+            }
+        }
+        
+        _display_aggregate_metrics(custom_aggregate, base_aggregate, mock_console)
+        
+        # Verify comparison table was printed once
+        assert mock_console.print.call_count == 1
+
+
+class TestShowLLMAJResultsIntegration:
+    """Integration tests for _show_llmaj_results with new aggregate features."""
+    
+    @patch('sagemaker.train.common_utils.show_results_utils._display_aggregate_metrics')
+    @patch('sagemaker.train.common_utils.show_results_utils._display_win_rates')
+    @patch('sagemaker.train.common_utils.show_results_utils._calculate_win_rates')
+    @patch('sagemaker.train.common_utils.show_results_utils._download_llmaj_results_from_s3')
+    @patch('sagemaker.train.common_utils.show_results_utils._download_bedrock_aggregate_json')
+    @patch('sagemaker.train.common_utils.show_results_utils._extract_training_job_name_from_steps')
+    @patch('rich.console.Console')
+    def test_show_results_with_aggregate_and_win_rates(
+        self, mock_console_class, mock_extract_job, mock_download_aggregate,
+        mock_download_results, mock_calculate_win, mock_display_win, mock_display_aggregate,
+        mock_pipeline_execution
+    ):
+        """Test complete flow with aggregate metrics and win rates."""
+        mock_console = MagicMock()
+        mock_console_class.return_value = mock_console
+        
+        # Mock job name extraction
+        mock_extract_job.side_effect = ['custom-job', 'base-job']
+        
+        # Mock aggregate downloads
+        custom_aggregate = {
+            'results': {
+                'Faithfulness': {'score': 1.0, 'total_evaluations': 10}
+            }
+        }
+        base_aggregate = {
+            'results': {
+                'Faithfulness': {'score': 0.9, 'total_evaluations': 10}
+            }
+        }
+        mock_download_aggregate.side_effect = [
+            (custom_aggregate, 'bedrock-job-123'),
+            (base_aggregate, 'bedrock-job-456')
+        ]
+        
+        # Mock per-example results
+        custom_results = [
+            {
+                'inputRecord': {'prompt': "[{'role': 'user', 'content': 'Test'}]"},
+                'modelResponses': [{'response': "['Response']"}],
+                'automatedEvaluationResult': {
+                    'scores': [{'metricName': 'Faithfulness', 'result': 1.0}]
+                }
+            }
+        ]
+        base_results = [
+            {
+                'inputRecord': {'prompt': "[{'role': 'user', 'content': 'Test'}]"},
+                'modelResponses': [{'response': "['Response']"}],
+                'automatedEvaluationResult': {
+                    'scores': [{'metricName': 'Faithfulness', 'result': 0.9}]
+                }
+            }
+        ]
+        mock_download_results.side_effect = [custom_results, base_results]
+        
+        # Mock win rates
+        win_rates = {
+            'custom_wins': 1, 'base_wins': 0, 'ties': 0, 'total': 1,
+            'custom_win_rate': 1.0, 'base_win_rate': 0.0, 'tie_rate': 0.0
+        }
+        mock_calculate_win.return_value = win_rates
+        
+        # Execute
+        _show_llmaj_results(mock_pipeline_execution, limit=5, offset=0)
+        
+        # Verify all components were called
+        assert mock_download_aggregate.call_count == 2
+        assert mock_download_results.call_count == 2
+        mock_calculate_win.assert_called_once()
+        mock_display_win.assert_called_once_with(win_rates, mock_console)
+        mock_display_aggregate.assert_called_once_with(custom_aggregate, base_aggregate, mock_console)
+    
+    @patch('sagemaker.train.common_utils.show_results_utils._display_aggregate_metrics')
+    @patch('sagemaker.train.common_utils.show_results_utils._download_llmaj_results_from_s3')
+    @patch('sagemaker.train.common_utils.show_results_utils._download_bedrock_aggregate_json')
+    @patch('sagemaker.train.common_utils.show_results_utils._extract_training_job_name_from_steps')
+    @patch('rich.console.Console')
+    def test_show_results_custom_only(
+        self, mock_console_class, mock_extract_job, mock_download_aggregate,
+        mock_download_results, mock_display_aggregate, mock_pipeline_execution
+    ):
+        """Test flow with custom model only (no base model)."""
+        mock_console = MagicMock()
+        mock_console_class.return_value = mock_console
+        
+        # Mock job name extraction - only custom
+        mock_extract_job.side_effect = ['custom-job', None]
+        
+        # Mock aggregate download
+        custom_aggregate = {
+            'results': {
+                'Faithfulness': {'score': 1.0, 'total_evaluations': 10}
+            }
+        }
+        mock_download_aggregate.return_value = (custom_aggregate, 'bedrock-job-123')
+        
+        # Mock per-example results
+        custom_results = [
+            {
+                'inputRecord': {'prompt': "[{'role': 'user', 'content': 'Test'}]"},
+                'modelResponses': [{'response': "['Response']"}],
+                'automatedEvaluationResult': {
+                    'scores': [{'metricName': 'Faithfulness', 'result': 1.0}]
+                }
+            }
+        ]
+        mock_download_results.return_value = custom_results
+        
+        # Execute
+        _show_llmaj_results(mock_pipeline_execution, limit=5, offset=0)
+        
+        # Verify aggregate displayed with None for base
+        mock_display_aggregate.assert_called_once_with(custom_aggregate, None, mock_console)
+    
+    @patch('sagemaker.train.common_utils.show_results_utils._download_llmaj_results_from_s3')
+    @patch('sagemaker.train.common_utils.show_results_utils._download_bedrock_aggregate_json')
+    @patch('sagemaker.train.common_utils.show_results_utils._extract_training_job_name_from_steps')
+    @patch('rich.console.Console')
+    def test_show_results_aggregate_not_found(
+        self, mock_console_class, mock_extract_job, mock_download_aggregate,
+        mock_download_results, mock_pipeline_execution
+    ):
+        """Test graceful degradation when aggregate results not found."""
+        mock_console = MagicMock()
+        mock_console_class.return_value = mock_console
+        
+        # Mock job name extraction
+        mock_extract_job.side_effect = ['custom-job', None]
+        
+        # Mock aggregate download failure
+        mock_download_aggregate.side_effect = FileNotFoundError("Aggregate not found")
+        
+        # Mock per-example results still work
+        custom_results = [
+            {
+                'inputRecord': {'prompt': "[{'role': 'user', 'content': 'Test'}]"},
+                'modelResponses': [{'response': "['Response']"}],
+                'automatedEvaluationResult': {
+                    'scores': [{'metricName': 'Faithfulness', 'result': 1.0}]
+                }
+            }
+        ]
+        mock_download_results.return_value = custom_results
+        
+        # Execute - should not raise exception
+        _show_llmaj_results(mock_pipeline_execution, limit=5, offset=0)
+        
+        # Verify per-example results were still attempted
+        # Note: This will fail because bedrock_job_name is None, but that's expected behavior
+        # The function should log a warning and continue