Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
a708b0e
enable remote endpoints
alexsin368 Jun 3, 2025
9380bc1
add align_outputs and align_generator
alexsin368 Jun 5, 2025
d226760
fix next_inputs[text when aligning inputs
alexsin368 Jun 5, 2025
bc2fe15
Merge branch 'opea-project:main' into audioqna-remote-endpoint
alexsin368 Jun 11, 2025
88f28a0
enable remote endpoints
alexsin368 Jun 3, 2025
b8c8519
add align_outputs and align_generator
alexsin368 Jun 5, 2025
5807ff5
fix next_inputs[text when aligning inputs
alexsin368 Jun 5, 2025
c21f034
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 27, 2025
bd16ac5
merge
alexsin368 Jun 27, 2025
b2eb382
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 27, 2025
6a9f1d7
import json library
alexsin368 Jun 27, 2025
2d073dc
Merge branch 'audioqna-remote-endpoint' of https://github.com/alexsin…
alexsin368 Jun 27, 2025
de6e8c6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 27, 2025
cad98eb
Merge branch 'opea-project:main' into audioqna-remote-endpoint
alexsin368 Jun 30, 2025
6e056f1
update instructions and name for API_KEY
alexsin368 Jun 30, 2025
0ae2897
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 30, 2025
ea99757
Merge branch 'main' into audioqna-remote-endpoint
alexsin368 Jul 2, 2025
9c0d201
Merge branch 'main' into audioqna-remote-endpoint
alexsin368 Jul 3, 2025
6f0dd43
add test for remote endpoints, update Enterprise Inference link
alexsin368 Jul 3, 2025
011c7c2
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 3, 2025
e16f279
remove CI for remote endpoints
alexsin368 Jul 3, 2025
8c3e3f7
Merge branch 'main' into audioqna-remote-endpoint
alexsin368 Aug 5, 2025
ef8337b
remove reference to Denvr
alexsin368 Aug 5, 2025
1a18c6b
Merge branch 'audioqna-remote-endpoint' of https://github.com/alexsin…
alexsin368 Aug 5, 2025
f10eb2d
Merge branch 'main' into audioqna-remote-endpoint
chensuyue Aug 11, 2025
9d47534
Merge branch 'main' into audioqna-remote-endpoint
alexsin368 Aug 11, 2025
fe5f5f4
Merge branch 'main' into audioqna-remote-endpoint
alexsin368 Aug 15, 2025
71b3a6f
remove faqgen
alexsin368 Aug 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 42 additions & 4 deletions AudioQnA/audioqna.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@
from fastapi import Request

MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))

WHISPER_SERVER_HOST_IP = os.getenv("WHISPER_SERVER_HOST_IP", "0.0.0.0")
WHISPER_SERVER_PORT = int(os.getenv("WHISPER_SERVER_PORT", 7066))
SPEECHT5_SERVER_HOST_IP = os.getenv("SPEECHT5_SERVER_HOST_IP", "0.0.0.0")
SPEECHT5_SERVER_PORT = int(os.getenv("SPEECHT5_SERVER_PORT", 7055))
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 3006))
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", None)


def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
Expand All @@ -29,23 +29,60 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
next_inputs["top_p"] = llm_parameters_dict["top_p"]
next_inputs["stream"] = inputs["stream"] # False as default
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
# next_inputs["presence_penalty"] = inputs["presence_penalty"]
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
next_inputs["temperature"] = inputs["temperature"]
inputs = next_inputs
elif self.services[cur_node].service_type == ServiceType.TTS:
next_inputs = {}
next_inputs["text"] = inputs["choices"][0]["message"]["content"]
next_inputs["text"] = inputs["text"]
next_inputs["voice"] = kwargs["voice"]
inputs = next_inputs
return inputs


def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
next_data = {}
if self.services[cur_node].service_type == ServiceType.LLM and not llm_parameters_dict["stream"]:
if "faqgen" in self.services[cur_node].endpoint:
next_data = data
else:
next_data["text"] = data["choices"][0]["message"]["content"]
else:
next_data = data

return next_data


def align_generator(self, gen, **kwargs):
# OpenAI response format
# b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
for line in gen:
line = line.decode("utf-8")
start = line.find("{")
end = line.rfind("}") + 1

json_str = line[start:end]
try:
# sometimes yield empty chunk, do a fallback here
json_data = json.loads(json_str)
if "ops" in json_data and "op" in json_data["ops"][0]:
if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str):
yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n"
else:
pass
elif "content" in json_data["choices"][0]["delta"]:
yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
except Exception as e:
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
yield "data: [DONE]\n\n"


class AudioQnAService:
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
ServiceOrchestrator.align_inputs = align_inputs
ServiceOrchestrator.align_outputs = align_outputs
ServiceOrchestrator.align_generator = align_generator
self.megaservice = ServiceOrchestrator()

self.endpoint = str(MegaServiceEndpoint.AUDIO_QNA)
Expand All @@ -63,6 +100,7 @@ def add_remote_service(self):
name="llm",
host=LLM_SERVER_HOST_IP,
port=LLM_SERVER_PORT,
api_key=OPENAI_API_KEY,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
Expand Down
13 changes: 13 additions & 0 deletions AudioQnA/docker_compose/intel/cpu/xeon/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,19 @@ In the context of deploying an AudioQnA pipeline on an Intel® Xeon® platform,
| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework and redis as vector database |
| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as the default |
| [compose_multilang.yaml](./compose_multilang.yaml) | The TTS component is GPT-SoVITS. All other configurations remain the same as the default |
| [compose_remote.yaml](./compose_remote.yaml) | The LLM used is hosted on a remote server and an endpoint is used to access this model. Additional environment variables need to be set before running. See [instructions](#running-llm-models-deployed-on-remote-servers-with-compose_remoteyaml) below. |

### Running LLM models deployed on remote servers with `compose_remote.yaml`

To run the LLM model on a remote server, the environment variable `LLM_MODEL_ID` may need to be overwritten, and two new environment variables `REMOTE_ENDPOINT` and `OPENAI_API_KEY` need to be set. An example endpoint is https://api.inference.example.com, but the actual value will depend on how it is set up on the remote server. The key is used to access the remote server.

```bash
export LLM_MODEL_ID=<name-of-llm-model-card>
export REMOTE_ENDPOINT=<https-endpoint-of-remote-server>
export OPENAI_API_KEY=<your-openai-api-key>
```

After setting these environment variables, run `docker compose` with `compose_remote.yaml`.

## Validate MicroServices

Expand Down
67 changes: 67 additions & 0 deletions AudioQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

services:
whisper-service:
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
container_name: whisper-service
ports:
- ${WHISPER_SERVER_PORT:-7066}:7066
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
restart: unless-stopped
speecht5-service:
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
container_name: speecht5-service
ports:
- ${SPEECHT5_SERVER_PORT:-7055}:7055
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
restart: unless-stopped
audioqna-xeon-backend-server:
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
container_name: audioqna-xeon-backend-server
depends_on:
- whisper-service
- speecht5-service
ports:
- "3008:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
- LLM_SERVER_HOST_IP=${REMOTE_ENDPOINT}
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
- LLM_MODEL_ID=${LLM_MODEL_ID}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
- SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
ipc: host
restart: always
audioqna-xeon-ui-server:
image: ${REGISTRY:-opea}/audioqna-ui:${TAG:-latest}
container_name: audioqna-xeon-ui-server
depends_on:
- audioqna-xeon-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- CHAT_URL=${BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always

networks:
default:
driver: bridge
Loading