We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
lmdeploy serve api_server
1 parent 9901f76 commit 6f65b74Copy full SHA for 6f65b74
lmdeploy/cli/serve.py
@@ -268,6 +268,8 @@ def gradio(args):
268
cache_block_seq_len=args.cache_block_seq_len,
269
enable_prefix_caching=args.enable_prefix_caching,
270
max_prefill_token_num=args.max_prefill_token_num,
271
+ num_tokens_per_iter=args.num_tokens_per_iter,
272
+ max_prefill_iters=args.max_prefill_iters,
273
communicator=args.communicator)
274
chat_template_config = get_chat_template(args.chat_template)
275
run(args.model_path_or_server,
0 commit comments