NVIDIA
diff --git a/‎docs/source/_static/custom.css‎
Lines changed: 25 additions & 0 deletions b/‎docs/source/_static/custom.css‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎docs/source/conf.py‎
Lines changed: 23 additions & 0 deletions b/‎docs/source/conf.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎docs/source/helper.py‎
Lines changed: 22 additions & 6 deletions b/‎docs/source/helper.py‎
Lines changed: 22 additions & 6 deletions
diff --git a/‎tensorrt_llm/llmapi/llm.py‎
Lines changed: 8 additions & 4 deletions b/‎tensorrt_llm/llmapi/llm.py‎
Lines changed: 8 additions & 4 deletions
@@ -0,0 +1,25 @@
+.tag {
+  padding: 2px 5px;
+  border-radius: 4px;
+  font-size: 0.8em;
+  margin-right: 5px;
+  color: #000;
+}
+
+code.beta {
+  display: inline-block;
+  background-color: #6c757d;
+  color: #999;
+}
+
+code.prototype {
+  display: inline-block;
+  background-color: #fd7e14;
+  color: #fff;
+}
+
+code.deprecated {
+  display: inline-block;
+  background-color: red;
+  color: #fff;
+}
@@ -12,6 +12,7 @@
 import sys
 
 import pygit2
+from docutils import nodes
 
 sys.path.insert(0, os.path.abspath('.'))
 
@@ -60,10 +61,16 @@
     'sphinx_togglebutton',
 ]
 
+autodoc_member_order = 'bysource'
 autodoc_pydantic_model_show_json = True
 autodoc_pydantic_model_show_config_summary = True
 autodoc_pydantic_field_doc_policy = "description"
 autodoc_pydantic_model_show_field_list = True  # Display field list with descriptions
+autodoc_pydantic_model_member_order = "groupwise"
+autodoc_pydantic_model_hide_pydantic_methods = True
+autodoc_pydantic_field_list_validators = False
+autodoc_pydantic_settings_signature_prefix = ""  # remove any prefix
+autodoc_pydantic_settings_hide_reused_validator = True  # hide all the validator should be better
 
 myst_url_schemes = {
     "http":
@@ -143,10 +150,26 @@
 print('CPP_INCLUDE_DIR', CPP_INCLUDE_DIR)
 print('CPP_GEN_DIR', CPP_GEN_DIR)
 
+html_css_files = [
+    'custom.css',
+]
+
+
+def tag_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
+    """A custom role for displaying tags."""
+    tag_name = text.lower()
+    node = nodes.literal(text, text, classes=['tag', tag_name])
+    return [node], []
+
 
 def setup(app):
     from helper import generate_examples, generate_llmapi
 
+    from tensorrt_llm.llmapi.utils import tag_llm_params
+    tag_llm_params()
+
+    app.add_role('tag', tag_role)
+
     generate_examples()
     generate_llmapi()
 
 
@@ -286,6 +286,18 @@ def extract_all_and_eval(file_path):
     return local_vars
 
 
+def get_pydantic_methods() -> list[str]:
+    from pydantic import BaseModel
+
+    class Dummy(BaseModel):
+        pass
+
+    methods = set(
+        [method for method in dir(Dummy) if not method.startswith('_')])
+    methods.discard("__init__")
+    return list(methods)
+
+
 def generate_llmapi():
     root_dir = Path(__file__).parent.parent.parent.resolve()
 
@@ -301,14 +313,18 @@ def generate_llmapi():
     for cls_name in public_classes_names:
         cls_name = cls_name.strip()
         options = [
-            "    :members:", "    :undoc-members:", "    :show-inheritance:"
+            "    :members:",
+            "    :undoc-members:",
+            "    :show-inheritance:",
+            "    :special-members: __init__",
+            "    :member-order: groupwise",
         ]
 
-        if cls_name != 'LLM':  # Conditionally add :special-members: __init__
-            options.append("    :special-members: __init__")
-
-        if cls_name in ['TrtLLM', 'TorchLLM', 'LLM']:
-            options.append("    :inherited-members:")
+        options.append("    :inherited-members:")
+        if cls_name in ["TorchLlmArgs", "TrtLlmArgs"]:
+            # exclude tons of methods from Pydantic
+            options.append(
+                f"    :exclude-members: {','.join(get_pydantic_methods())}")
 
         content += f".. autoclass:: tensorrt_llm.llmapi.{cls_name}\n"
         content += "\n".join(options) + "\n\n"
 
@@ -40,7 +40,7 @@
                         _xgrammar_tokenizer_info)
 # TODO[chunweiy]: move the following symbols back to utils scope, and remove the following import
 from .utils import (append_docstring, exception_handler, get_device_count,
-                    print_colored_debug)
+                    print_colored_debug, set_api_status)
 
 
 class RequestOutput(DetokenizedGenerationResultBase, GenerationResult):
@@ -212,6 +212,7 @@ def __init__(self,
         atexit.register(LLM._shutdown_wrapper, weakref.ref(self))
 
     @property
+    @set_api_status("beta")
     def llm_id(self) -> str:
         if self._llm_id is None:
             hostname = socket.gethostname()
@@ -421,6 +422,7 @@ def generate_async(
         return RequestOutput._from_generation_result(result, prompt,
                                                      self.tokenizer)
 
+    @set_api_status("beta")
     def get_stats(self, timeout: Optional[float] = 2) -> List[dict]:
         '''Get iteration statistics from the runtime.
         To collect statistics, call this function after prompts have been submitted with LLM().generate().
@@ -434,6 +436,7 @@ def get_stats(self, timeout: Optional[float] = 2) -> List[dict]:
         '''
         return self._executor.get_stats(timeout=timeout)
 
+    @set_api_status("beta")
     def get_stats_async(self, timeout: Optional[float] = 2) -> IterationResult:
         '''Get iteration statistics from the runtime.
         To collect statistics, you can call this function in an async coroutine or the /metrics endpoint (if you're using trtllm-serve)
@@ -447,6 +450,7 @@ def get_stats_async(self, timeout: Optional[float] = 2) -> IterationResult:
         '''
         return self._executor.aget_stats(timeout=timeout)
 
+    @set_api_status("beta")
     def get_kv_cache_events(self, timeout: Optional[float] = 2) -> List[dict]:
         '''Get iteration KV events from the runtime.
 
@@ -468,6 +472,7 @@ def get_kv_cache_events(self, timeout: Optional[float] = 2) -> List[dict]:
         '''
         return self._executor.get_kv_events(timeout=timeout)
 
+    @set_api_status("beta")
     def get_kv_cache_events_async(self,
                                   timeout: Optional[float] = 2
                                   ) -> IterationResult:
@@ -664,6 +669,7 @@ def tokenizer(self) -> Optional[TokenizerBase]:
     def tokenizer(self, tokenizer: TokenizerBase):
         self._tokenizer = tokenizer
 
+    @set_api_status("beta")
     def shutdown(self) -> None:
         if hasattr(self, "_executor") and self._executor is not None:
             self._executor.shutdown()
@@ -1037,13 +1043,11 @@ def __init__(self,
                          revision, tokenizer_revision, **kwargs)
 
 
-_LLM_REPR = "TorchLLM"
-
 # sphinx will ignore the LLM's docstring if it is not explicitly set
 LLM.__doc__ = \
     f"""LLM class is the main class for running a LLM model.
 
-    This class is an alias of {_LLM_REPR}.
+    For more details about the arguments, please refer to :class:`TorchLlmArgs`.
 
     Parameters:
 """ + TORCH_LLM_DOCSTRING