Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion .github/workflows/tests-integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:

- name: Set expected providers for local integration tests
id: set_local_providers
run: echo "expected_providers=ollama,llamacpp,llamafile" >> $GITHUB_OUTPUT
run: echo "expected_providers=ollama,llamacpp,llamafile,vllm" >> $GITHUB_OUTPUT

determine-jobs-to-run:
needs: expected-providers
Expand Down Expand Up @@ -144,6 +144,14 @@ jobs:
restore-keys: |
${{ runner.os }}-llamafile-

- uses: actions/cache@v4
if: github.event.inputs.filter == '' || contains(github.event.inputs.filter, 'vllm')
with:
path: ~/.cache/huggingface
key: ${{ runner.os }}-vllm-models-${{ hashFiles('tests/conftest.py') }}
restore-keys: |
${{ runner.os }}-vllm-models-

- name: Setup Ollama
if: github.event.inputs.filter == '' || contains(github.event.inputs.filter, 'ollama')
uses: ai-action/setup-ollama@v1
Expand Down Expand Up @@ -192,6 +200,19 @@ jobs:
run: |
timeout 60 bash -c 'until curl -s http://localhost:8090/health >/dev/null; do sleep 1; done'

- name: Install and Run vLLM (CPU)
if: github.event.inputs.filter == '' || contains(github.event.inputs.filter, 'vllm')
run: |
uv pip install vllm --extra-index-url https://wheels.vllm.ai/nightly/cpu --prerelease=allow --index-strategy unsafe-best-match --torch-backend cpu
export VLLM_CPU_KVCACHE_SPACE=4
vllm serve Qwen/Qwen2.5-0.5B-Instruct --dtype bfloat16 --max-model-len 2048 --port 8080 > vllm.log 2>&1 &
echo $! > vllm.pid

- name: Wait for vLLM to be ready
if: github.event.inputs.filter == '' || contains(github.event.inputs.filter, 'vllm')
run: |
timeout 600 bash -c 'until curl -s http://localhost:8080/v1/models >/dev/null; do echo "Waiting for vLLM..."; sleep 5; done' || (echo "=== vLLM logs ===" && cat vllm.log && exit 1)

- name: Run Local Provider Integration tests
env:
INCLUDE_LOCAL_PROVIDERS: "true"
Expand All @@ -211,6 +232,14 @@ jobs:
rm llamafile.pid
fi

- name: Cleanup vLLM process
if: always()
run: |
if [ -f vllm.pid ]; then
kill $(cat vllm.pid) || true
rm vllm.pid
fi

- name: Upload coverage reports to Codecov
if: always()
uses: codecov/codecov-action@v5
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ dependencies = [
[project.optional-dependencies]

all = [
"any-llm-sdk[mistral,anthropic,huggingface,gemini,vertexai,cohere,cerebras,fireworks,groq,bedrock,azure,azureopenai,watsonx,together,sambanova,ollama,moonshot,nebius,xai,databricks,deepseek,inception,openai,openrouter,portkey,lmstudio,llama,voyage,perplexity,platform,llamafile,llamacpp,sagemaker,gateway,zai,minimax]"
"any-llm-sdk[mistral,anthropic,huggingface,gemini,vertexai,cohere,cerebras,fireworks,groq,bedrock,azure,azureopenai,watsonx,together,sambanova,ollama,moonshot,nebius,xai,databricks,deepseek,inception,openai,openrouter,portkey,lmstudio,llama,voyage,perplexity,platform,llamafile,llamacpp,sagemaker,gateway,zai,minimax,vllm]"
]

platform = [
Expand Down Expand Up @@ -109,6 +109,7 @@ openrouter = []
portkey = []
sambanova = []
minimax = []
vllm = []
gateway = [
"fastapi>=0.115.0",
"uvicorn[standard]>=0.30.0",
Expand Down
1 change: 1 addition & 0 deletions src/any_llm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class LLMProvider(StrEnum):
SAGEMAKER = "sagemaker"
TOGETHER = "together"
VERTEXAI = "vertexai"
VLLM = "vllm"
VOYAGE = "voyage"
WATSONX = "watsonx"
XAI = "xai"
Expand Down
3 changes: 3 additions & 0 deletions src/any_llm/providers/vllm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .vllm import VllmProvider

__all__ = ["VllmProvider"]
18 changes: 18 additions & 0 deletions src/any_llm/providers/vllm/vllm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from any_llm.providers.openai.base import BaseOpenAIProvider


class VllmProvider(BaseOpenAIProvider):
API_BASE = "http://localhost:8000/v1"
ENV_API_KEY_NAME = "VLLM_API_KEY"
PROVIDER_NAME = "vllm"
PROVIDER_DOCUMENTATION_URL = "https://docs.vllm.ai/"

SUPPORTS_EMBEDDING = True
SUPPORTS_COMPLETION_REASONING = True
SUPPORTS_COMPLETION_STREAMING = True
SUPPORTS_COMPLETION_PDF = False

def _verify_and_set_api_key(self, api_key: str | None = None) -> str | None:
# vLLM server by default doesn't require an API key
# but can be configured to use one via --api-key flag
return api_key or ""
3 changes: 3 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def provider_reasoning_model_map() -> dict[LLMProvider, str]:
LLMProvider.OPENROUTER: "google/gemini-2.5-flash-lite",
LLMProvider.LLAMAFILE: "N/A",
LLMProvider.LLAMACPP: "N/A",
LLMProvider.VLLM: "N/A",
LLMProvider.LMSTUDIO: "openai/gpt-oss-20b", # You must have LM Studio running and the server enabled
LLMProvider.AZUREOPENAI: "azure/<your_deployment_name>",
LLMProvider.CEREBRAS: "gpt-oss-120b",
Expand Down Expand Up @@ -60,6 +61,7 @@ def provider_model_map() -> dict[LLMProvider, str]:
LLMProvider.OLLAMA: "llama3.2:1b",
LLMProvider.LLAMAFILE: "N/A",
LLMProvider.LMSTUDIO: "google/gemma-3n-e4b", # You must have LM Studio running and the server enabled
LLMProvider.VLLM: "Qwen/Qwen2.5-0.5B-Instruct",
LLMProvider.COHERE: "command-a-03-2025",
LLMProvider.CEREBRAS: "llama-3.3-70b",
LLMProvider.HUGGINGFACE: "huggingface/tgi", # This is the syntax used in `litellm` when using HF Inference Endpoints (https://docs.litellm.ai/docs/providers/huggingface#dedicated-inference-endpoints)
Expand Down Expand Up @@ -131,6 +133,7 @@ def provider_client_config() -> dict[LLMProvider, dict[str, Any]]:
LLMProvider.OPENAI: {"timeout": 100},
LLMProvider.HUGGINGFACE: {"api_base": "https://oze7k8n86bjfzgjk.us-east-1.aws.endpoints.huggingface.cloud/v1"},
LLMProvider.LLAMACPP: {"api_base": "http://127.0.0.1:8090/v1"},
LLMProvider.VLLM: {"api_base": "http://127.0.0.1:8080/v1"},
LLMProvider.MISTRAL: {"timeout_ms": 100000},
LLMProvider.NEBIUS: {"api_base": "https://api.studio.nebius.com/v1/"},
LLMProvider.OPENAI: {"timeout": 10},
Expand Down
1 change: 1 addition & 0 deletions tests/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
LLMProvider.LMSTUDIO,
LLMProvider.LLAMAFILE,
LLMProvider.GATEWAY,
LLMProvider.VLLM,
]

EXPECTED_PROVIDERS = os.environ.get("EXPECTED_PROVIDERS", "").split(",")
Expand Down
13 changes: 13 additions & 0 deletions tests/unit/providers/test_vllm_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from any_llm.providers.vllm.vllm import VllmProvider


def test_provider_without_api_key() -> None:
provider = VllmProvider()
assert provider.PROVIDER_NAME == "vllm"
assert provider.API_BASE == "http://localhost:8000/v1"
assert provider.ENV_API_KEY_NAME == "VLLM_API_KEY"


def test_provider_with_api_key() -> None:
provider = VllmProvider(api_key="test-api-key")
assert provider.PROVIDER_NAME == "vllm"
1 change: 1 addition & 0 deletions tests/unit/test_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def test_providers_raise_MissingApiKeyError(provider: LLMProvider) -> None:
LLMProvider.OLLAMA,
LLMProvider.SAGEMAKER,
LLMProvider.VERTEXAI,
LLMProvider.VLLM,
LLMProvider.GATEWAY,
):
pytest.skip("This provider handles `api_key` differently.")
Expand Down
Loading