Skip to content

Commit 7d34d45

Browse files
Superjomnnv-guomingz
authored andcommitted
chore: add tags to API reference
Signed-off-by: Superjomn <[email protected]> Signed-off-by: nv-guomingz <[email protected]>
1 parent 37d0b68 commit 7d34d45

File tree

11 files changed

+464
-68
lines changed

11 files changed

+464
-68
lines changed

docs/source/_static/custom.css

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
.tag {
2+
padding: 2px 5px;
3+
border-radius: 4px;
4+
font-size: 0.8em;
5+
margin-right: 5px;
6+
color: #000;
7+
}
8+
9+
code.beta {
10+
display: inline-block;
11+
background-color: #6c757d;
12+
color: #999;
13+
}
14+
15+
code.prototype {
16+
display: inline-block;
17+
background-color: #fd7e14;
18+
color: #fff;
19+
}
20+
21+
code.deprecated {
22+
display: inline-block;
23+
background-color: red;
24+
color: #fff;
25+
}

docs/source/conf.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import sys
1313

1414
import pygit2
15+
from docutils import nodes
1516

1617
sys.path.insert(0, os.path.abspath('.'))
1718

@@ -60,10 +61,16 @@
6061
'sphinx_togglebutton',
6162
]
6263

64+
autodoc_member_order = 'bysource'
6365
autodoc_pydantic_model_show_json = True
6466
autodoc_pydantic_model_show_config_summary = True
6567
autodoc_pydantic_field_doc_policy = "description"
6668
autodoc_pydantic_model_show_field_list = True # Display field list with descriptions
69+
autodoc_pydantic_model_member_order = "groupwise"
70+
autodoc_pydantic_model_hide_pydantic_methods = True
71+
autodoc_pydantic_field_list_validators = False
72+
autodoc_pydantic_settings_signature_prefix = "" # remove any prefix
73+
autodoc_pydantic_settings_hide_reused_validator = True # hide all the validator should be better
6774

6875
myst_url_schemes = {
6976
"http":
@@ -143,10 +150,26 @@
143150
print('CPP_INCLUDE_DIR', CPP_INCLUDE_DIR)
144151
print('CPP_GEN_DIR', CPP_GEN_DIR)
145152

153+
html_css_files = [
154+
'custom.css',
155+
]
156+
157+
158+
def tag_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
159+
"""A custom role for displaying tags."""
160+
tag_name = text.lower()
161+
node = nodes.literal(text, text, classes=['tag', tag_name])
162+
return [node], []
163+
146164

147165
def setup(app):
148166
from helper import generate_examples, generate_llmapi
149167

168+
from tensorrt_llm.llmapi.utils import tag_llm_params
169+
tag_llm_params()
170+
171+
app.add_role('tag', tag_role)
172+
150173
generate_examples()
151174
generate_llmapi()
152175

docs/source/helper.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,18 @@ def extract_all_and_eval(file_path):
286286
return local_vars
287287

288288

289+
def get_pydantic_methods() -> list[str]:
290+
from pydantic import BaseModel
291+
292+
class Dummy(BaseModel):
293+
pass
294+
295+
methods = set(
296+
[method for method in dir(Dummy) if not method.startswith('_')])
297+
methods.discard("__init__")
298+
return list(methods)
299+
300+
289301
def generate_llmapi():
290302
root_dir = Path(__file__).parent.parent.parent.resolve()
291303

@@ -301,14 +313,18 @@ def generate_llmapi():
301313
for cls_name in public_classes_names:
302314
cls_name = cls_name.strip()
303315
options = [
304-
" :members:", " :undoc-members:", " :show-inheritance:"
316+
" :members:",
317+
" :undoc-members:",
318+
" :show-inheritance:",
319+
" :special-members: __init__",
320+
" :member-order: groupwise",
305321
]
306322

307-
if cls_name != 'LLM': # Conditionally add :special-members: __init__
308-
options.append(" :special-members: __init__")
309-
310-
if cls_name in ['TrtLLM', 'TorchLLM', 'LLM']:
311-
options.append(" :inherited-members:")
323+
options.append(" :inherited-members:")
324+
if cls_name in ["TorchLlmArgs", "TrtLlmArgs"]:
325+
# exclude tons of methods from Pydantic
326+
options.append(
327+
f" :exclude-members: {','.join(get_pydantic_methods())}")
312328

313329
content += f".. autoclass:: tensorrt_llm.llmapi.{cls_name}\n"
314330
content += "\n".join(options) + "\n\n"

tensorrt_llm/bench/benchmark/throughput.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,12 +382,16 @@ def throughput_command(
382382
logger.warning(
383383
"Ignore extended_runtime_perf_knob_config for pytorch backend."
384384
)
385+
if kwargs.pop("batching_type", None):
386+
logger.warning("Ignore batching_type for pytorch backend.")
385387
llm = PyTorchLLM(**kwargs)
386388
elif runtime_config.backend == "_autodeploy":
387389
if kwargs.pop("extended_runtime_perf_knob_config", None):
388390
logger.warning(
389391
"Ignore extended_runtime_perf_knob_config for _autodeploy backend."
390392
)
393+
if kwargs.pop("batching_type", None):
394+
logger.warning("Ignore batching_type for pytorch backend.")
391395
llm = AutoDeployLLM(**kwargs)
392396
else:
393397
llm = LLM(**kwargs)

tensorrt_llm/llmapi/llm.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
_xgrammar_tokenizer_info)
4141
# TODO[chunweiy]: move the following symbols back to utils scope, and remove the following import
4242
from .utils import (append_docstring, exception_handler, get_device_count,
43-
print_colored_debug)
43+
print_colored_debug, set_api_status)
4444

4545

4646
class RequestOutput(DetokenizedGenerationResultBase, GenerationResult):
@@ -212,6 +212,7 @@ def __init__(self,
212212
atexit.register(LLM._shutdown_wrapper, weakref.ref(self))
213213

214214
@property
215+
@set_api_status("beta")
215216
def llm_id(self) -> str:
216217
if self._llm_id is None:
217218
hostname = socket.gethostname()
@@ -422,6 +423,7 @@ def generate_async(
422423
return RequestOutput._from_generation_result(result, prompt,
423424
self.tokenizer)
424425

426+
@set_api_status("beta")
425427
def get_stats(self, timeout: Optional[float] = 2) -> List[dict]:
426428
'''Get iteration statistics from the runtime.
427429
To collect statistics, call this function after prompts have been submitted with LLM().generate().
@@ -435,6 +437,7 @@ def get_stats(self, timeout: Optional[float] = 2) -> List[dict]:
435437
'''
436438
return self._executor.get_stats(timeout=timeout)
437439

440+
@set_api_status("beta")
438441
def get_stats_async(self, timeout: Optional[float] = 2) -> IterationResult:
439442
'''Get iteration statistics from the runtime.
440443
To collect statistics, you can call this function in an async coroutine or the /metrics endpoint (if you're using trtllm-serve)
@@ -448,6 +451,7 @@ def get_stats_async(self, timeout: Optional[float] = 2) -> IterationResult:
448451
'''
449452
return self._executor.aget_stats(timeout=timeout)
450453

454+
@set_api_status("beta")
451455
def get_kv_cache_events(self, timeout: Optional[float] = 2) -> List[dict]:
452456
'''Get iteration KV events from the runtime.
453457
@@ -469,6 +473,7 @@ def get_kv_cache_events(self, timeout: Optional[float] = 2) -> List[dict]:
469473
'''
470474
return self._executor.get_kv_events(timeout=timeout)
471475

476+
@set_api_status("beta")
472477
def get_kv_cache_events_async(self,
473478
timeout: Optional[float] = 2
474479
) -> IterationResult:
@@ -667,6 +672,7 @@ def tokenizer(self) -> Optional[TokenizerBase]:
667672
def tokenizer(self, tokenizer: TokenizerBase):
668673
self._tokenizer = tokenizer
669674

675+
@set_api_status("beta")
670676
def shutdown(self) -> None:
671677
if hasattr(self, "_executor") and self._executor is not None:
672678
self._executor.shutdown()
@@ -924,12 +930,17 @@ def _build_model(self):
924930
max_beam_width=self.args.max_beam_width,
925931
scheduler_config=PybindMirror.maybe_to_pybind(
926932
self.args.scheduler_config),
927-
batching_type=PybindMirror.maybe_to_pybind(self.args.batching_type)
928-
or tllm.BatchingType.INFLIGHT,
929933
max_batch_size=max_batch_size,
930934
max_num_tokens=max_num_tokens,
931935
gather_generation_logits=self.args.gather_generation_logits)
932936

937+
if hasattr(self.args,
938+
"batching_type") and self.args.batching_type is not None:
939+
self._executor_config.batching_type = PybindMirror.maybe_to_pybind(
940+
self.args.batching_type)
941+
else:
942+
self._executor_config.batching_type = tllm.BatchingType.INFLIGHT
943+
933944
if self.args.kv_cache_config is not None:
934945
self._executor_config.kv_cache_config = PybindMirror.maybe_to_pybind(
935946
self.args.kv_cache_config)
@@ -957,7 +968,6 @@ def _build_model(self):
957968
f"Unsupported guided decoding backend {self.args.guided_decoding_backend}"
958969
)
959970

960-
self._executor_config.normalize_log_probs = self.args.normalize_log_probs
961971
self._executor_config.enable_chunked_context = self.args.enable_chunked_prefill
962972
self._executor_config.max_beam_width = self.args.max_beam_width
963973
if self.args.cache_transceiver_config is not None:
@@ -1040,13 +1050,11 @@ def __init__(self,
10401050
revision, tokenizer_revision, **kwargs)
10411051

10421052

1043-
_LLM_REPR = "TorchLLM"
1044-
10451053
# sphinx will ignore the LLM's docstring if it is not explicitly set
10461054
LLM.__doc__ = \
10471055
f"""LLM class is the main class for running a LLM model.
10481056
1049-
This class is an alias of {_LLM_REPR}.
1057+
For more details about the arguments, please refer to :class:`TorchLlmArgs`.
10501058
10511059
Parameters:
10521060
""" + TORCH_LLM_DOCSTRING

0 commit comments

Comments
 (0)