Support min_p in openai completions_v1 (#3506)

josephrocca · web-flow · commit b05789420714 · 2025-04-30T20:29:35.000+08:00
* Support min_p in openai completions_v1

* Support min_p in CompletionRequest protocol
diff --git a/lmdeploy/serve/openai/api_server.py b/lmdeploy/serve/openai/api_server.py
@@ -596,6 +596,11 @@ async def completions_v1(request: CompletionRequest, raw_request: Request = None
         this to False. This is setup to True in slow tokenizers.
     - top_k (int): The number of the highest probability vocabulary
         tokens to keep for top-k-filtering
+    - min_p (float): Minimum token probability, which will be scaled by the
+        probability of the most likely token. It must be a value between
+        0 and 1. Typical values are in the 0.01-0.2 range, comparably
+        selective as setting `top_p` in the 0.99-0.8 range (use the
+        opposite of normal `top_p` values)
 
     Currently we do not support the following features:
     - logprobs (not supported yet)
@@ -633,6 +638,7 @@ async def completions_v1(request: CompletionRequest, raw_request: Request = None
                                   ignore_eos=request.ignore_eos,
                                   stop_words=request.stop,
                                   skip_special_tokens=request.skip_special_tokens,
+                                  min_p=request.min_p,
                                   random_seed=random_seed,
                                   spaces_between_special_tokens=request.spaces_between_special_tokens)
     generators = []
diff --git a/lmdeploy/serve/openai/protocol.py b/lmdeploy/serve/openai/protocol.py
@@ -276,6 +276,7 @@ class CompletionRequest(BaseModel):
     spaces_between_special_tokens: Optional[bool] = True
     top_k: Optional[int] = 40  # for opencompass
     seed: Optional[int] = None
+    min_p: float = 0.0
 
 
 class CompletionResponseChoice(BaseModel):