feat: add helpful error/investigation/fix for api response errors

lkomali · lkomali · commit a9a9e80d14a1 · 2025-11-20T17:36:17.000-08:00
Signed-off-by: lkomali &lt;lkomali@nvidia.com&gt;
diff --git a/src/aiperf/common/enums/data_exporter_enums.py b/src/aiperf/common/enums/data_exporter_enums.py
@@ -5,6 +5,7 @@
 
 
 class ConsoleExporterType(CaseInsensitiveStrEnum):
+    API_ERRORS = "api_errors"
     ERRORS = "errors"
     EXPERIMENTAL_METRICS = "experimental_metrics"
     INTERNAL_METRICS = "internal_metrics"
diff --git a/src/aiperf/exporters/__init__.py b/src/aiperf/exporters/__init__.py
@@ -8,6 +8,11 @@
 ## ⚠️        This file is auto-generated by mkinit                 ⚠️ ##
 ## ⚠️             Do not edit below this line                      ⚠️ ##
 ########################################################################
+from aiperf.exporters.console_api_error_insight_exporter import (
+    ConsoleApiErrorInsightExporter,
+    ErrorInsight,
+    MaxCompletionTokensDetector,
+)
 from aiperf.exporters.console_error_exporter import (
     ConsoleErrorExporter,
 )
@@ -55,15 +60,18 @@
 )
 
 __all__ = [
+    "ConsoleApiErrorInsightExporter",
     "ConsoleErrorExporter",
     "ConsoleExperimentalMetricsExporter",
     "ConsoleInternalMetricsExporter",
     "ConsoleMetricsExporter",
     "ConsoleUsageDiscrepancyExporter",
+    "ErrorInsight",
     "ExporterConfig",
     "ExporterManager",
     "FileExportInfo",
     "GPUTelemetryConsoleExporter",
+    "MaxCompletionTokensDetector",
     "MetricsBaseExporter",
     "MetricsCsvExporter",
     "MetricsJsonExporter",
diff --git a/src/aiperf/exporters/console_api_error_insight_exporter.py b/src/aiperf/exporters/console_api_error_insight_exporter.py
@@ -0,0 +1,137 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+
+import contextlib
+import json
+
+from rich.console import Console
+from rich.panel import Panel
+
+from aiperf.common.decorators import implements_protocol
+from aiperf.common.enums import ConsoleExporterType
+from aiperf.common.factories import ConsoleExporterFactory
+from aiperf.common.mixins import AIPerfLoggerMixin
+from aiperf.common.protocols import ConsoleExporterProtocol
+from aiperf.exporters.exporter_config import ExporterConfig
+
+
+class ErrorInsight:
+    """Model to describe a detected API error insight."""
+
+    def __init__(
+        self,
+        title: str,
+        problem: str,
+        causes: list[str],
+        investigation: list[str],
+        fixes: list[str],
+    ):
+        self.title = title
+        self.problem = problem
+        self.causes = causes
+        self.investigation = investigation
+        self.fixes = fixes
+
+
+class MaxCompletionTokensDetector:
+    @staticmethod
+    def detect(error_summary):
+        if not error_summary or not isinstance(error_summary, list):
+            return None
+
+        for item in error_summary:
+            err = getattr(item, "error_details", None)
+            if err is None:
+                print("No error_details, skipping")
+                continue
+
+            raw_msg = err.message or ""
+            parsed = None
+            with contextlib.suppress(Exception):
+                parsed = json.loads(raw_msg)
+
+            backend_msg = None
+            if isinstance(parsed, dict):
+                backend_msg = parsed.get("message")
+
+            error_blob = str(backend_msg or raw_msg)
+
+            if (
+                "extra_forbidden" in error_blob
+                and "max_completion_tokens" in error_blob
+                and "Extra inputs are not permitted" in error_blob
+            ):
+                return ErrorInsight(
+                    title="Unsupported Parameter: max_completion_tokens",
+                    problem=(
+                        "The backend rejected 'max_completion_tokens'. "
+                        "This backend only supports 'max_tokens'."
+                    ),
+                    causes=[
+                        "AIPerf generated 'max_completion_tokens' due to --output-tokens-mean.",
+                        "TRT-LLM rejects this field.",
+                    ],
+                    investigation=[
+                        "Inspect request payloads in profile_export.jsonl.",
+                        "Check TRT-LLM supported parameters.",
+                    ],
+                    fixes=[
+                        "Remove --output-tokens-mean.",
+                        'Or use --extra-inputs "max_tokens:<value>".',
+                    ],
+                )
+
+        return None
+
+
+@implements_protocol(ConsoleExporterProtocol)
+@ConsoleExporterFactory.register(ConsoleExporterType.API_ERRORS)
+class ConsoleApiErrorInsightExporter(AIPerfLoggerMixin):
+    """Displays helpful diagnostic panels for known API error patterns."""
+
+    DETECTORS = [
+        MaxCompletionTokensDetector(),
+    ]
+
+    def __init__(self, exporter_config: ExporterConfig, **kwargs):
+        super().__init__(**kwargs)
+        self._results = exporter_config.results
+
+    async def export(self, console: Console) -> None:
+        error_summary = getattr(self._results, "error_summary", None)
+
+        for detector in self.DETECTORS:
+            insight = detector.detect(error_summary)
+            if insight:
+                panel = Panel(
+                    self._format_text(insight),
+                    title=insight.title,
+                    border_style="bold yellow",
+                    title_align="center",
+                    padding=(0, 2),
+                    expand=False,
+                )
+                console.print()
+                console.print(panel)
+                console.file.flush()
+
+    def _format_text(self, insight: ErrorInsight) -> str:
+        return (
+            f"""\
+[bold]{insight.problem}[/bold]
+
+[bold]Possible Causes:[/bold]
+  • """
+            + "\n  • ".join(insight.causes)
+            + """
+
+[bold]Investigation Steps:[/bold]
+  1. """
+            + "\n  1. ".join(insight.investigation)
+            + """
+
+[bold]Suggested Fixes:[/bold]
+  • """
+            + "\n  • ".join(insight.fixes)
+        )