|
| 1 | +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 2 | +# SPDX-License-Identifier: Apache-2.0 |
| 3 | + |
| 4 | + |
| 5 | +import contextlib |
| 6 | +import json |
| 7 | + |
| 8 | +from rich.console import Console |
| 9 | +from rich.panel import Panel |
| 10 | + |
| 11 | +from aiperf.common.decorators import implements_protocol |
| 12 | +from aiperf.common.enums import ConsoleExporterType |
| 13 | +from aiperf.common.factories import ConsoleExporterFactory |
| 14 | +from aiperf.common.mixins import AIPerfLoggerMixin |
| 15 | +from aiperf.common.protocols import ConsoleExporterProtocol |
| 16 | +from aiperf.exporters.exporter_config import ExporterConfig |
| 17 | + |
| 18 | + |
| 19 | +class ErrorInsight: |
| 20 | + """Model to describe a detected API error insight.""" |
| 21 | + |
| 22 | + def __init__( |
| 23 | + self, |
| 24 | + title: str, |
| 25 | + problem: str, |
| 26 | + causes: list[str], |
| 27 | + investigation: list[str], |
| 28 | + fixes: list[str], |
| 29 | + ): |
| 30 | + self.title = title |
| 31 | + self.problem = problem |
| 32 | + self.causes = causes |
| 33 | + self.investigation = investigation |
| 34 | + self.fixes = fixes |
| 35 | + |
| 36 | + |
| 37 | +class MaxCompletionTokensDetector: |
| 38 | + @staticmethod |
| 39 | + def detect(error_summary): |
| 40 | + if not error_summary or not isinstance(error_summary, list): |
| 41 | + return None |
| 42 | + |
| 43 | + for item in error_summary: |
| 44 | + err = getattr(item, "error_details", None) |
| 45 | + if err is None: |
| 46 | + print("No error_details, skipping") |
| 47 | + continue |
| 48 | + |
| 49 | + raw_msg = err.message or "" |
| 50 | + parsed = None |
| 51 | + with contextlib.suppress(Exception): |
| 52 | + parsed = json.loads(raw_msg) |
| 53 | + |
| 54 | + backend_msg = None |
| 55 | + if isinstance(parsed, dict): |
| 56 | + backend_msg = parsed.get("message") |
| 57 | + |
| 58 | + error_blob = str(backend_msg or raw_msg) |
| 59 | + |
| 60 | + if ( |
| 61 | + "extra_forbidden" in error_blob |
| 62 | + and "max_completion_tokens" in error_blob |
| 63 | + and "Extra inputs are not permitted" in error_blob |
| 64 | + ): |
| 65 | + return ErrorInsight( |
| 66 | + title="Unsupported Parameter: max_completion_tokens", |
| 67 | + problem=( |
| 68 | + "The backend rejected 'max_completion_tokens'. " |
| 69 | + "This backend only supports 'max_tokens'." |
| 70 | + ), |
| 71 | + causes=[ |
| 72 | + "AIPerf generated 'max_completion_tokens' due to --output-tokens-mean.", |
| 73 | + "TRT-LLM rejects this field.", |
| 74 | + ], |
| 75 | + investigation=[ |
| 76 | + "Inspect request payloads in profile_export.jsonl.", |
| 77 | + "Check TRT-LLM supported parameters.", |
| 78 | + ], |
| 79 | + fixes=[ |
| 80 | + "Remove --output-tokens-mean.", |
| 81 | + 'Or use --extra-inputs "max_tokens:<value>".', |
| 82 | + ], |
| 83 | + ) |
| 84 | + |
| 85 | + return None |
| 86 | + |
| 87 | + |
| 88 | +@implements_protocol(ConsoleExporterProtocol) |
| 89 | +@ConsoleExporterFactory.register(ConsoleExporterType.API_ERRORS) |
| 90 | +class ConsoleApiErrorInsightExporter(AIPerfLoggerMixin): |
| 91 | + """Displays helpful diagnostic panels for known API error patterns.""" |
| 92 | + |
| 93 | + DETECTORS = [ |
| 94 | + MaxCompletionTokensDetector(), |
| 95 | + ] |
| 96 | + |
| 97 | + def __init__(self, exporter_config: ExporterConfig, **kwargs): |
| 98 | + super().__init__(**kwargs) |
| 99 | + self._results = exporter_config.results |
| 100 | + |
| 101 | + async def export(self, console: Console) -> None: |
| 102 | + error_summary = getattr(self._results, "error_summary", None) |
| 103 | + |
| 104 | + for detector in self.DETECTORS: |
| 105 | + insight = detector.detect(error_summary) |
| 106 | + if insight: |
| 107 | + panel = Panel( |
| 108 | + self._format_text(insight), |
| 109 | + title=insight.title, |
| 110 | + border_style="bold yellow", |
| 111 | + title_align="center", |
| 112 | + padding=(0, 2), |
| 113 | + expand=False, |
| 114 | + ) |
| 115 | + console.print() |
| 116 | + console.print(panel) |
| 117 | + console.file.flush() |
| 118 | + |
| 119 | + def _format_text(self, insight: ErrorInsight) -> str: |
| 120 | + return ( |
| 121 | + f"""\ |
| 122 | +[bold]{insight.problem}[/bold] |
| 123 | +
|
| 124 | +[bold]Possible Causes:[/bold] |
| 125 | + • """ |
| 126 | + + "\n • ".join(insight.causes) |
| 127 | + + """ |
| 128 | +
|
| 129 | +[bold]Investigation Steps:[/bold] |
| 130 | + 1. """ |
| 131 | + + "\n 1. ".join(insight.investigation) |
| 132 | + + """ |
| 133 | +
|
| 134 | +[bold]Suggested Fixes:[/bold] |
| 135 | + • """ |
| 136 | + + "\n • ".join(insight.fixes) |
| 137 | + ) |
0 commit comments