|
21 | 21 | from typing import Dict, Union |
22 | 22 | from fmbench.scripts import constants |
23 | 23 | from ec2_metadata import ec2_metadata |
24 | | -from fmbench.scripts.inference_containers import (djl, vllm, triton, ollama) |
| 24 | +from fmbench.scripts.inference_containers import (djl, vllm, vllm_gpu, triton, ollama) |
25 | 25 | from fmbench.scripts.constants import (IS_NEURON_INSTANCE, LISTEN_PORT) |
26 | 26 | from fmbench.scripts.prepare_for_multi_model_containers import prepare_docker_compose_yml |
27 | 27 |
|
@@ -75,6 +75,8 @@ def _create_deployment_script(image_uri, |
75 | 75 | deploy_script_content = djl.create_script(region, image_uri, model_id, model_name, env_str, privileged_str, hf_token, directory) |
76 | 76 | case constants.CONTAINER_TYPE_VLLM: |
77 | 77 | deploy_script_content = vllm.create_script(region, image_uri, model_id, model_name, env_str, privileged_str, hf_token, directory) |
| 78 | + case constants.CONTAINER_TYPE_VLLM_GPU: |
| 79 | + deploy_script_content = vllm_gpu.create_script(region, image_uri, model_id, model_name, env_str, privileged_str, hf_token, directory) |
78 | 80 | case constants.CONTAINER_TYPE_TRITON: |
79 | 81 | deploy_script_content = triton.create_script(region, image_uri, model_id, model_name, env_str, privileged_str, hf_token, directory) |
80 | 82 | case constants.CONTAINER_TYPE_OLLAMA: |
@@ -115,7 +117,9 @@ def _check_model_deployment(endpoint, model_id, container_type, model_loading_ti |
115 | 117 | logger.info(f"Checking deployment status at {endpoint}") |
116 | 118 | if container_type == constants.CONTAINER_TYPE_DJL: |
117 | 119 | data = {"inputs": "tell me a story of the little red riding hood"} |
118 | | - elif container_type == constants.CONTAINER_TYPE_VLLM or container_type == constants.CONTAINER_TYPE_OLLAMA : |
| 120 | + elif container_type == constants.CONTAINER_TYPE_VLLM \ |
| 121 | + or container_type == constants.CONTAINER_TYPE_OLLAMA \ |
| 122 | + or container_type == constants.CONTAINER_TYPE_VLLM_GPU: |
119 | 123 | data = {"model": model_id, # Specify the model to use |
120 | 124 | "prompt": "tell me a story of the little red riding hood",} |
121 | 125 | elif container_type == constants.CONTAINER_TYPE_TRITON: |
|
0 commit comments