From 985cddd2c5e3091ed74f4ab9758eed58acc2c146 Mon Sep 17 00:00:00 2001 From: Ed Lee <16417837+edlee123@users.noreply.github.com> Date: Tue, 10 Jun 2025 11:29:26 -0500 Subject: [PATCH 01/44] Compose file for ChatQnA example with openai-like endpoint Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- .../cpu/xeon/compose_endpoint_openai.yaml | 176 ++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 ChatQnA/docker_compose/intel/cpu/xeon/compose_endpoint_openai.yaml diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_endpoint_openai.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_endpoint_openai.yaml new file mode 100644 index 0000000000..802d2020ec --- /dev/null +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_endpoint_openai.yaml @@ -0,0 +1,176 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + ports: + - "6379:6379" + - "8001:8001" + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 3s + retries: 10 + dataprep-redis-service: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + redis-vector-db: + condition: service_healthy + tei-embedding-service: + condition: service_started + ports: + - "6007:5000" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: redis://redis-vector-db:6379 + REDIS_HOST: redis-vector-db + INDEX_NAME: ${INDEX_NAME} + TEI_ENDPOINT: http://tei-embedding-service:80 + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] + interval: 10s + timeout: 5s + retries: 50 + restart: unless-stopped + tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-server + ports: + - "6006:80" + volumes: + - "${MODEL_CACHE:-./data}:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate + retriever: + image: ${REGISTRY:-opea}/retriever:${TAG:-latest} + container_name: retriever-redis-server + depends_on: + - redis-vector-db + ports: + - "7000:7000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: redis://redis-vector-db:6379 + REDIS_HOST: redis-vector-db + INDEX_NAME: ${INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + LOGFLAG: ${LOGFLAG} + RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" + restart: unless-stopped + tei-reranking-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-reranking-server + ports: + - "8808:80" + volumes: + - "${MODEL_CACHE:-./data}:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + command: --model-id ${RERANK_MODEL_ID} --auto-truncate + # Substitute vllm with OpeaTextGenService + textgen-service-endpoint-openai: # Used in stead of vllm + image: opea/llm-textgen:${TAG:-latest} # Changed image + container_name: textgen-service-endpoint-openai # Updated container name + ipc: host + ports: + - "9000:9000" # Changed port mapping + environment: + LLM_COMPONENT_NAME: OpeaTextGenService + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_MODEL_ID: ${LLM_MODEL_ID} # Set to model ID + LLM_ENDPOINT: ${LLM_ENDPOINT} # An openai compatible endpoint, e.g. Hugging Face, OpenRouter, OpenAI + OPENAI_API_KEY: ${OPENAI_API_KEY} # Add OpenRouter API Key + chatqna-xeon-backend-server: + image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} + container_name: chatqna-xeon-backend-server + depends_on: + redis-vector-db: + condition: service_started + dataprep-redis-service: + condition: service_healthy + tei-embedding-service: + condition: service_started + retriever: + condition: service_started + tei-reranking-service: + condition: service_started + textgen-service-endpoint-openai: + condition: service_started + ports: + - "8888:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server + - EMBEDDING_SERVER_HOST_IP=tei-embedding-service + - EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80} + - RETRIEVER_SERVICE_HOST_IP=retriever + - RERANK_SERVER_HOST_IP=tei-reranking-service + - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80} + - LLM_SERVER_HOST_IP=textgen-service-endpoint-openai # Updated host IP + - LLM_SERVER_PORT=${LLM_SERVER_PORT:-9000} + - LLM_MODEL=${LLM_MODEL_ID} + - LOGFLAG=${LOGFLAG} + ipc: host + restart: always + chatqna-xeon-ui-server: + image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest} + container_name: chatqna-xeon-ui-server + depends_on: + - chatqna-xeon-backend-server + ports: + - "5173:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + ipc: host + restart: always + chatqna-xeon-nginx-server: + image: ${REGISTRY:-opea}/nginx:${TAG:-latest} + container_name: chatqna-xeon-nginx-server + depends_on: + - chatqna-xeon-backend-server + - chatqna-xeon-ui-server + ports: + - "${NGINX_PORT:-80}:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - FRONTEND_SERVICE_IP=chatqna-xeon-ui-server + - FRONTEND_SERVICE_PORT=5173 + - BACKEND_SERVICE_NAME=chatqna + - BACKEND_SERVICE_IP=chatqna-xeon-backend-server + - BACKEND_SERVICE_PORT=8888 + - DATAPREP_SERVICE_IP=dataprep-redis-service + - DATAPREP_SERVICE_PORT=5000 + ipc: host + restart: always + +networks: + default: + driver: bridge From eec42f2841b65c31363f607368f53a76456d7eb3 Mon Sep 17 00:00:00 2001 From: Ed Lee <16417837+edlee123@users.noreply.github.com> Date: Tue, 10 Jun 2025 11:30:13 -0500 Subject: [PATCH 02/44] Adding README.md for ChatQnA + endpoint Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- .../intel/cpu/xeon/README_endpoint_openai.md | 453 ++++++++++++++++++ 1 file changed, 453 insertions(+) create mode 100644 ChatQnA/docker_compose/intel/cpu/xeon/README_endpoint_openai.md diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_endpoint_openai.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_endpoint_openai.md new file mode 100644 index 0000000000..c7e6b3bc31 --- /dev/null +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_endpoint_openai.md @@ -0,0 +1,453 @@ +# Build Mega Service of ChatQnA on Xeon with an LLM Endpoint + +This document outlines the single node deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservices on Intel Xeon server. The steps include pulling Docker images, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank` and `llm`. + +## Table of contents + +1. [ChatQnA Quick Start Deployment](#chatqna-quick-start-Deployment) +2. [ChatQnA Docker Compose file Options](#chatqna-docker-compose-files) +3. [ChatQnA with Conversational UI](#chatqna-with-conversational-ui-optional) + +## ChatQnA Quick Start Deployment + +This section describes how to quickly deploy and test the ChatQnA service manually on an Intel® Xeon® processor. The basic steps are: + +1. [Access the Code](#access-the-code) +2. [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token) +3. [Configure the Deployment Environment](#configure-the-deployment-environment) +4. [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose) +5. [Check the Deployment Status](#check-the-deployment-status) +6. [Test the Pipeline](#test-the-pipeline) +7. [Cleanup the Deployment](#cleanup-the-deployment) + +### Access the Code + +Clone the GenAIExample repository and access the ChatQnA Intel® Gaudi® platform Docker Compose files and supporting scripts: + +``` +git clone https://github.com/opea-project/GenAIComps +cd GenAIComps + +# Build the opea/llm-textgen image. + +docker build \ + --no-cache \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy \ + -t opea/llm-textgen:latest \ + -f comps/llms/src/text-generation/Dockerfile . + + +cd ../ +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/ +``` + +### Generate a HuggingFace Access Token + +Some HuggingFace resources, such as some models, are only accessible if the developer have an access token. In the absence of a HuggingFace access token, the developer can create one by first creating an account by following the steps provided at [HuggingFace](https://huggingface.co/) and then generating a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token). + +## Endpoint Access + +An OpenAI-compatible endpoint is required e.g., OpenRouter.ai. Please obtain a valid API key. + +### Configure the Deployment Environment + +To set up environment variables for deploying ChatQnA services, set up some parameters specific to the deployment environment and source the _setup_env.sh_ script in this directory: + +```bash +cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon +source set_env.sh # source environment variables then override below. + +export host_ip="External_Public_IP" # e.g. export host_ip=$(hostname -I | awk '{print $1}') +export HF_TOKEN="Your_Huggingface_API_Token" +export OPENAI_API_KEY="key for openAI-like endpoint" + +export LLM_MODEL_ID="" # e.g. "google/gemma-3-1b-it:free" +export LLM_ENDPOINT="" # e.g. "https://openrouter.ai/api" (please make sure to omit /v1 suffix) +export no_proxy="" # Can set if any no proxy variables. See set_envh.sh +``` + +Consult the section on [ChatQnA Service configuration](#chatqna-configuration) for information on how service specific configuration parameters affect deployments. + +### Deploy the Services Using Docker Compose + +To deploy the ChatQnA services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute the command below. It uses the 'compose.yaml' file. + +```bash +NGINX_PORT=8080 docker compose -f compose_endpoint_openai.yaml up -d +``` + +Usage of NGINX_PORT=8080 allows you to access the chat console on localhost:8080 since webbrowser may use port 80. + +To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file. +CPU example with Open Telemetry feature: + +> NOTE : To get supported Grafana Dashboard, please run download_opea_dashboard.sh following below commands. + +```bash +./grafana/dashboards/download_opea_dashboard.sh +docker compose -f compose_endpoint_openai.yaml -f compose.telemetry.yaml up -d +``` + +**Note**: developers should build docker image from source when: + +- Developing off the git main branch (as the container's ports in the repo may be different from the published docker image). +- Unable to download the docker image. +- Use a specific version of Docker image. + +Please refer to the table below to build different microservices from source: + +| Microservice | Deployment Guide | +| ------------ | --------------------------------------------------------------------------------------------- | +| Dataprep | https://github.com/opea-project/GenAIComps/tree/main/comps/dataprep | +| Embedding | https://github.com/opea-project/GenAIComps/tree/main/comps/embeddings | +| Retriever | https://github.com/opea-project/GenAIComps/tree/main/comps/retrievers | +| Reranker | https://github.com/opea-project/GenAIComps/tree/main/comps/rerankings | +| LLM | https://github.com/opea-project/GenAIComps/tree/main/comps/llms | +| Megaservice | [Megaservice build guide](../../../../README_miscellaneous.md#build-megaservice-docker-image) | +| UI | [Basic UI build guide](../../../../README_miscellaneous.md#build-ui-docker-image) | + +### Check the Deployment Status + +After running docker compose, check if all the containers launched via docker compose have started: + +``` +docker ps -a +``` + +For the endpoint-based deployment, the following 9 containers should be running: + +```bash +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +04f0e3607457 opea/nginx:${RELEASE_VERSION} "/docker-entrypoint.…" 17 minutes ago Up 16 minutes 0.0.0.0:8080->80/tcp, [::]:8080->80/tcp chatqna-xeon-nginx-server +6d7fe1bfd0a5 opea/chatqna-ui:${RELEASE_VERSION} "docker-entrypoint.s…" 17 minutes ago Up 16 minutes 0.0.0.0:5173->5173/tcp, :::5173->5173/tcp chatqna-xeon-ui-server +71d01fe8bc94 opea/chatqna:${RELEASE_VERSION} "python chatqna.py" 17 minutes ago Up 16 minutes 0.0.0.0:8888->8888/tcp, :::8888->8888/tcp chatqna-xeon-backend-server +ea12fab1c70e opea/retriever:${RELEASE_VERSION} "python opea_retriev…" 17 minutes ago Up 17 minutes 0.0.0.0:7000->7000/tcp, :::7000->7000/tcp retriever-redis-server +253622403ed6 opea/dataprep:${RELEASE_VERSION} "sh -c 'python $( [ …" 17 minutes ago Up 17 minutes (healthy) 0.0.0.0:6007->5000/tcp, [::]:6007->5000/tcp dataprep-redis-server +a552cf4f0dd0 redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 17 minutes ago Up 17 minutes (healthy) 0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp redis-vector-db +6795a52137f7 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 17 minutes ago Up 17 minutes 0.0.0.0:6006->80/tcp, [::]:6006->80/tcp tei-embedding-server +3e55313e714b opea/llm-textgen:${RELEASE_VERSION} "bash entrypoint.sh" 17 minutes ago Up 17 minutes 0.0.0.0:9000->9000/tcp, :::9000->9000/tcp textgen-service-endpoint-openai +10318f82c943 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 17 minutes ago Up 17 minutes 0.0.0.0:8808->80/tcp, [::]:8808->80/tcp tei-reranking-server +``` + +If any issues are encountered during deployment, refer to the [troubleshooting](../../../../README_miscellaneous.md##troubleshooting) section. + +### Test the Pipeline + +Once the ChatQnA services are running, test the pipeline using the following command. This will send a sample query to the ChatQnA service and return a response. + +```bash +curl http://${host_ip}:8888/v1/chatqna \ + -H "Content-Type: application/json" \ + -d '{ + "messages": "What is the revenue of Nike in 2023?" + }' +``` + +**Note** : Access the ChatQnA UI by web browser through this URL: `http://${host_ip}:8080`. Please confirm the `8080` port is opened in the firewall. To validate each microservice used in the pipeline refer to the [Validate microservices](#validate-microservices) section. + +### Cleanup the Deployment + +To stop the containers associated with the deployment, execute the following command: + +``` +docker compose -f compose.yaml down +``` + +## ChatQnA Docker Compose Files + +In the context of deploying a ChatQnA pipeline on an Intel® Xeon® platform, we can pick and choose different vector databases, large language model serving frameworks, and remove pieces of the pipeline such as the reranker. The table below outlines the various configurations that are available as part of the application. These configurations can be used as templates and can be extended to different components available in [GenAIComps](https://github.com/opea-project/GenAIComps.git). + +| File | Description | +| -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework and redis as vector database | +| [compose_endpoint_openai.yaml](./compose_endpoint_openai.yaml) | Uses OpenAI-compatible endpoint (remote or local) as LLM serving framework with redis as vector database. | +| [compose_milvus.yaml](./compose_milvus.yaml) | Uses Milvus as the vector database. All other configurations remain the same as the default | +| [compose_pinecone.yaml](./compose_pinecone.yaml) | Uses Pinecone as the vector database. All other configurations remain the same as the default. For more details, refer to [README_pinecone.md](./README_pinecone.md). | +| [compose_qdrant.yaml](./compose_qdrant.yaml) | Uses Qdrant as the vector database. All other configurations remain the same as the default. For more details, refer to [README_qdrant.md](./README_qdrant.md). | +| [compose_tgi.yaml](./compose_tgi.yaml) | Uses TGI as the LLM serving framework. All other configurations remain the same as the default | +| [compose_without_rerank.yaml](./compose_without_rerank.yaml) | Default configuration without the reranker | +| [compose_faqgen.yaml](./compose_faqgen.yaml) | Enables FAQ generation using vLLM as the LLM serving framework. For more details, refer to [README_faqgen.md](./README_faqgen.md). | +| [compose_faqgen_tgi.yaml](./compose_faqgen_tgi.yaml) | Enables FAQ generation using TGI as the LLM serving framework. For more details, refer to [README_faqgen.md](./README_faqgen.md). | +| [compose.telemetry.yaml](./compose.telemetry.yaml) | Helper file for telemetry features for vllm. Can be used along with any compose files that serves vllm | +| [compose_tgi.telemetry.yaml](./compose_tgi.telemetry.yaml) | Helper file for telemetry features for tgi. Can be used along with any compose files that serves tgi | +| [compose_mariadb.yaml](./compose_mariadb.yaml) | Uses MariaDB Server as the vector database. All other configurations remain the same as the default | + +## ChatQnA with Conversational UI (Optional) + +To access the Conversational UI (react based) frontend, modify the UI service in the `compose` file used to deploy. Replace `chaqna-xeon-ui-server` service with the `chatqna-xeon-conversation-ui-server` service as per the config below: + +```yaml +chatqna-xeon-conversation-ui-server: + image: opea/chatqna-conversation-ui:latest + container_name: chatqna-xeon-conversation-ui-server + environment: + - APP_BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT} + - APP_DATA_PREP_SERVICE_URL=${DATAPREP_SERVICE_ENDPOINT} + ports: + - "5174:80" + depends_on: + - chaqna-xeon-backend-server + ipc: host + restart: always +``` + +Once the services are up, open the following URL in the browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If the developer prefers to use a different host port to access the frontend, it can be modified by port mapping in the `compose.yaml` file as shown below: + +```yaml + chaqna-gaudi-conversation-ui-server: + image: opea/chatqna-conversation-ui:latest + ... + ports: + - "80:80" +``` + +Here is an example of running ChatQnA (default UI): + +![project-screenshot](../../../../assets/img/chat_ui_response.png) + +Here is an example of running ChatQnA with Conversational UI (React): + +![project-screenshot](../../../../assets/img/conversation_ui_response.png) + +### Validate Microservices + +Note, when verifying the microservices by curl or API from remote client, please make sure the **ports** of the microservices are opened in the firewall of the cloud node. +Follow the instructions to validate MicroServices. +For details on how to verify the correctness of the response, refer to [how-to-validate_service](../../hpu/gaudi/how_to_validate_service.md). + +1. **TEI Embedding Service** + Send a test request to the TEI Embedding Service to ensure it is running correctly: + + ```bash + curl http://${host_ip}:6006/embed \ + -X POST \ + -d '{"inputs":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json' + ``` + + If you receive a connection error, ensure that the service is running and the port 6006 is open in the firewall. + +2. **Retriever Microservice** + + To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector + is determined by the embedding model. + Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768. + + Check the vector dimension of your embedding model, set `your_embedding` dimension equal to it. + + ```bash + export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + curl http://${host_ip}:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' + ``` + + If the response indicates an invalid embedding vector, verify that the vector size matches the model's expected dimension. + +3. **TEI Reranking Service** + + To test the TEI Reranking Service, use the following `curl` command: + + > Skip for ChatQnA without Rerank pipeline + + ```bash + curl http://${host_ip}:8808/rerank \ + -X POST \ + -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \ + -H 'Content-Type: application/json' + ``` + +4. **LLM Backend Service** + + In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready. + + Try the command below to check whether the LLM serving is ready. + + ```bash + docker logs textgen-service-endpoint-openai 2>&1 | grep complete + # If the service is ready, you will get the response like below. + INFO: Application startup complete. + ``` + + Then try the `cURL` command below to validate services. + +You may also test your underlying LLM endpoint. E.g., if OpenRouter.ai: + +```bash +curl https://openrouter.ai/api/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": ${LLM_MODEL_ID}, + "messages": [ + { + "role": "user", + "content": "What is the meaning of life?" + } + ] +}' +``` + +To test the OPEA service that is based on the above: + +```bash + curl http://${host_ip}:9000/v1/chat/completions \ + -X POST \ + -d '{"model": "{$LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ + -H 'Content-Type: application/json' +``` + +5. **MegaService** + + Use the following `curl` command to test the MegaService: + + ```bash + curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{ + "messages": "What is the revenue of Nike in 2023?" + }' + ``` + +6. **Nginx Service** + + Use the following curl command to test the Nginx Service: + + ```bash + curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \ + -H "Content-Type: application/json" \ + -d '{"messages": "What is the revenue of Nike in 2023?"}' + ``` + +7. **Dataprep Microservice(Optional) ** + + If you want to update the default knowledge base, you can use the following commands: + + Update Knowledge Base via Local File [nke-10k-2023.pdf](https://github.com/opea-project/GenAIComps/blob/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf). Or + click [here](https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf) to download the file via any web browser. + Or run this command to get the file on a terminal. + + ```bash + wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf + ``` + + Upload: + + ```bash + curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./nke-10k-2023.pdf" + ``` + + This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment. + + Add Knowledge Base via HTTP Links: + + ```bash + curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ + -H "Content-Type: multipart/form-data" \ + -F 'link_list=["https://opea.dev"]' + ``` + + This command updates a knowledge base by submitting a list of HTTP links for processing. + + Also, you are able to get the file list that you uploaded: + + ```bash + curl -X POST "http://${host_ip}:6007/v1/dataprep/get" \ + -H "Content-Type: application/json" + ``` + + Then you will get the response JSON like this. Notice that the returned `name`/`id` of the uploaded link is `https://xxx.txt`. + + ```json + [ + { + "name": "nke-10k-2023.pdf", + "id": "nke-10k-2023.pdf", + "type": "File", + "parent": "" + }, + { + "name": "https://opea.dev.txt", + "id": "https://opea.dev.txt", + "type": "File", + "parent": "" + } + ] + ``` + + To delete the file/link you uploaded: + + The `file_path` here should be the `id` get from `/v1/dataprep/get` API. + + ```bash + # delete link + curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ + -d '{"file_path": "https://opea.dev.txt"}' \ + -H "Content-Type: application/json" + + # delete file + curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ + -d '{"file_path": "nke-10k-2023.pdf"}' \ + -H "Content-Type: application/json" + + # delete all uploaded files and links + curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ + -d '{"file_path": "all"}' \ + -H "Content-Type: application/json" + ``` + +### Profile Microservices + +To further analyze MicroService Performance, users could follow the instructions to profile MicroServices. + +#### 1. LLM Endpoint Service + +Users can profile the performance of the endpoint service using standard HTTP/network profiling tools such as: + +- cURL timing statistics +- Browser developer tools +- Network monitoring tools + +Example using cURL with timing data: + +```bash +curl -w "\nTime Statistics:\n-----------------\n\ +DNS Lookup: %{time_namelookup}s\n\ +TCP Connect: %{time_connect}s\n\ +TLS Handshake: %{time_appconnect}s\n\ +First Byte: %{time_starttransfer}s\n\ +Total Time: %{time_total}s\n" \ +-H "Content-Type: application/json" \ +-H "Authorization: Bearer $OPENAI_API_KEY" \ +-d '{ + "model": "${LLM_MODEL_ID}", + "messages": [ + { + "role": "user", + "content": "What is machine learning?" + } + ] +}' \ +${LLM_ENDPOINT}/v1/chat/completions +``` + +You can also use tools like `ab` (Apache Benchmark) for load testing: + +```bash +ab -n 100 -c 10 -p payload.json -T 'application/json' \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + ${LLM_ENDPOINT}/v1/chat/completions +``` + +For detailed API latency monitoring, consider using: + +- Grafana for visualization +- Prometheus for metrics collection +- OpenTelemetry for distributed tracing + +## Conclusion + +This guide should enable developer to deploy the default configuration or any of the other compose yaml files for different configurations. It also highlights the configurable parameters that can be set before deployment. From 39962f641e0a3ed95952b130f79af8124f88be96 Mon Sep 17 00:00:00 2001 From: Ed Lee <16417837+edlee123@users.noreply.github.com> Date: Tue, 10 Jun 2025 11:36:17 -0500 Subject: [PATCH 03/44] In chatqna.py handle null openai api response since UI would show show null json. Also improved exception handling and logging Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- ChatQnA/chatqna.py | 87 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 18 deletions(-) diff --git a/ChatQnA/chatqna.py b/ChatQnA/chatqna.py index d802bf3a51..6266dace8f 100644 --- a/ChatQnA/chatqna.py +++ b/ChatQnA/chatqna.py @@ -3,9 +3,15 @@ import argparse import json +import logging import os import re +# Configure logging +logger = logging.getLogger(__name__) +log_level = logging.DEBUG if os.getenv("LOGFLAG", "").lower() == "true" else logging.INFO +logging.basicConfig(level=log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType from comps.cores.mega.utils import handle_message from comps.cores.proto.api_protocol import ( @@ -62,6 +68,10 @@ def generate_rag_prompt(question, documents): def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs): + logger.debug( + f"Aligning inputs for service: {self.services[cur_node].name}, type: {self.services[cur_node].service_type}" + ) + if self.services[cur_node].service_type == ServiceType.EMBEDDING: inputs["inputs"] = inputs["text"] del inputs["text"] @@ -83,6 +93,9 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k # next_inputs["repetition_penalty"] = inputs["repetition_penalty"] next_inputs["temperature"] = inputs["temperature"] inputs = next_inputs + + # Log the aligned inputs (be careful with sensitive data) + logger.debug(f"Aligned inputs for {self.services[cur_node].name}: {type(inputs)}") return inputs @@ -123,7 +136,9 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di elif input_variables == ["question"]: prompt = prompt_template.format(question=data["initial_query"]) else: - print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']") + logger.warning( + f"{prompt_template} not used, we only support 2 input variables ['question', 'context']" + ) prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs) else: prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs) @@ -152,7 +167,7 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di elif input_variables == ["question"]: prompt = prompt_template.format(question=prompt) else: - print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']") + logger.warning(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']") prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs) else: prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs) @@ -171,29 +186,65 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di def align_generator(self, gen, **kwargs): - # OpenAI response format - # b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n' - for line in gen: - line = line.decode("utf-8") - start = line.find("{") - end = line.rfind("}") + 1 + """Aligns the generator output to match ChatQnA's format of sending bytes. + + Handles different LLM output formats (TGI, OpenAI) and properly filters + empty or null content chunks to avoid UI display issues. + """ + # OpenAI response format example: + # b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct", + # "system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"}, + # "logprobs":null,"finish_reason":null}]}\n\n' - json_str = line[start:end] + for line in gen: try: - # sometimes yield empty chunk, do a fallback here + line = line.decode("utf-8") + start = line.find("{") + end = line.rfind("}") + 1 + + # Skip lines with invalid JSON structure + if start == -1 or end <= start: + logger.debug("Skipping line with invalid JSON structure") + continue + + json_str = line[start:end] + + # Parse the JSON data json_data = json.loads(json_str) + + # Handle TGI format responses if "ops" in json_data and "op" in json_data["ops"][0]: if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str): yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n" - else: - pass - elif ( - json_data["choices"][0]["finish_reason"] != "eos_token" - and "content" in json_data["choices"][0]["delta"] - ): - yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n" + # Empty value chunks are silently skipped + + # Handle OpenAI format responses + elif "choices" in json_data and len(json_data["choices"]) > 0: + # Only yield content if it exists and is not null + if ( + "delta" in json_data["choices"][0] + and "content" in json_data["choices"][0]["delta"] + and json_data["choices"][0]["delta"]["content"] is not None + ): + content = json_data["choices"][0]["delta"]["content"] + yield f"data: {repr(content.encode('utf-8'))}\n\n" + # Null content chunks are silently skipped + elif ( + "delta" in json_data["choices"][0] + and "content" in json_data["choices"][0]["delta"] + and json_data["choices"][0]["delta"]["content"] is None + ): + logger.debug("Skipping null content chunk") + + except json.JSONDecodeError as e: + # Log the error with the problematic JSON string for better debugging + logger.error(f"JSON parsing error in align_generator: {e}\nProblematic JSON: {json_str[:200]}") + # Skip sending invalid JSON to avoid UI issues + continue except Exception as e: - yield f"data: {repr(json_str.encode('utf-8'))}\n\n" + logger.error(f"Unexpected error in align_generator: {e}, line snippet: {line[:100]}...") + # Skip sending to avoid UI issues + continue yield "data: [DONE]\n\n" From d227878e3de764c0bfc1f24af8e212c5faa0617b Mon Sep 17 00:00:00 2001 From: "Ed Lee @ Intel" <16417837+edlee123@users.noreply.github.com> Date: Tue, 10 Jun 2025 11:55:33 -0500 Subject: [PATCH 04/44] Update ChatQnA/docker_compose/intel/cpu/xeon/compose_endpoint_openai.yaml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- .../docker_compose/intel/cpu/xeon/compose_endpoint_openai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_endpoint_openai.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_endpoint_openai.yaml index 802d2020ec..6b5a6fad07 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_endpoint_openai.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_endpoint_openai.yaml @@ -88,7 +88,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate # Substitute vllm with OpeaTextGenService - textgen-service-endpoint-openai: # Used in stead of vllm + textgen-service-endpoint-openai: # Used instead of vllm image: opea/llm-textgen:${TAG:-latest} # Changed image container_name: textgen-service-endpoint-openai # Updated container name ipc: host From 139972cd6cadb0689b004f085e12e268aa1a56d6 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Thu, 29 May 2025 11:01:34 +0800 Subject: [PATCH 05/44] Add tests for different input formats (#2006) Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- CodeGen/tests/test_compose_on_gaudi.sh | 7 +++++++ CodeGen/tests/test_compose_on_xeon.sh | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh index 87acfbaa5a..38354233d9 100644 --- a/CodeGen/tests/test_compose_on_gaudi.sh +++ b/CodeGen/tests/test_compose_on_gaudi.sh @@ -143,6 +143,13 @@ function validate_megaservice() { "codegen-gaudi-backend-server" \ '{ "index_name": "test_redis", "agents_flag": "True", "messages": "def print_hello_world():", "max_tokens": 256}' + validate_services \ + "${ip_address}:7778/v1/codegen" \ + "class" \ + "mega-codegen" \ + "codegen-xeon-backend-server" \ + '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}' + } function validate_frontend() { diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh index a50e5f0a7e..4d7267d615 100644 --- a/CodeGen/tests/test_compose_on_xeon.sh +++ b/CodeGen/tests/test_compose_on_xeon.sh @@ -146,6 +146,13 @@ function validate_megaservice() { "codegen-xeon-backend-server" \ '{ "index_name": "test_redis", "agents_flag": "True", "messages": "def print_hello_world():", "max_tokens": 256}' + validate_services \ + "${ip_address}:7778/v1/codegen" \ + "class" \ + "mega-codegen" \ + "codegen-xeon-backend-server" \ + '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}' + } function validate_frontend() { From 9fc12351ca475cc8cc9a3e3ea12cc1072da189d6 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Thu, 29 May 2025 11:02:09 +0800 Subject: [PATCH 06/44] Fix security issues in workflows (#1977) Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- .github/workflows/_build_comps_base_image.yml | 7 +++- .github/workflows/_build_image.yml | 17 +++++++- .github/workflows/_example-workflow.yml | 17 +++++++- .github/workflows/_get-image-list.yml | 3 +- .github/workflows/_gmc-e2e.yml | 3 +- .github/workflows/_gmc-workflow.yml | 3 +- .github/workflows/dockerhub-description.yml | 2 + .github/workflows/manual-docker-clean.yml | 2 + .github/workflows/manual-example-workflow.yml | 19 ++++++++- .github/workflows/manual-freeze-tag.yml | 3 +- .github/workflows/manual-image-build.yml | 16 ++++++++ .../workflows/manual-reset-local-registry.yml | 16 ++++++++ .github/workflows/mix-trellix.yml | 3 +- .../nightly-docker-build-publish.yml | 40 +++++++++++++++++++ .github/workflows/pr-chart-e2e.yml | 3 +- .../workflows/pr-check-duplicated-image.yml | 3 +- .github/workflows/pr-code-scan.yml | 4 +- .github/workflows/pr-docker-compose-e2e.yml | 3 ++ ...pr-dockerfile-path-and-build-yaml-scan.yml | 3 +- .github/workflows/pr-link-path-scan.yml | 3 ++ .github/workflows/push-image-build.yml | 17 ++++++++ .../workflows/push-images-path-detection.yml | 4 +- .../workflows/push-infra-issue-creation.yml | 4 ++ .github/workflows/weekly-example-test.yml | 24 +++++++++++ 24 files changed, 205 insertions(+), 14 deletions(-) diff --git a/.github/workflows/_build_comps_base_image.yml b/.github/workflows/_build_comps_base_image.yml index 2f3cb00312..04c8a55440 100644 --- a/.github/workflows/_build_comps_base_image.yml +++ b/.github/workflows/_build_comps_base_image.yml @@ -2,7 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 name: Build Comps Base Image -permissions: read-all + +permissions: + attestations: read + models: read + security-events: read + on: workflow_call: inputs: diff --git a/.github/workflows/_build_image.yml b/.github/workflows/_build_image.yml index 79cab22216..a62686b3c5 100644 --- a/.github/workflows/_build_image.yml +++ b/.github/workflows/_build_image.yml @@ -2,7 +2,22 @@ # SPDX-License-Identifier: Apache-2.0 name: Build Images -permissions: read-all +permissions: + actions: read + contents: read + checks: read + deployments: read + discussions: read + issues: read + packages: read + pages: read + pull-requests: read + repository-projects: read + statuses: read + security-events: read + id-token: write + attestations: read + models: read on: workflow_call: inputs: diff --git a/.github/workflows/_example-workflow.yml b/.github/workflows/_example-workflow.yml index bc54f6e63b..35b1f16cd1 100644 --- a/.github/workflows/_example-workflow.yml +++ b/.github/workflows/_example-workflow.yml @@ -2,7 +2,22 @@ # SPDX-License-Identifier: Apache-2.0 name: Example jobs -permissions: read-all +permissions: + actions: read + contents: read + checks: read + deployments: read + discussions: read + issues: read + packages: read + pages: read + pull-requests: read + repository-projects: read + statuses: read + security-events: read + id-token: write + attestations: read + models: read on: workflow_call: inputs: diff --git a/.github/workflows/_get-image-list.yml b/.github/workflows/_get-image-list.yml index 9abd893ecc..7a790d764a 100644 --- a/.github/workflows/_get-image-list.yml +++ b/.github/workflows/_get-image-list.yml @@ -2,7 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 name: Get Image List -permissions: read-all +permissions: + contents: read on: workflow_call: inputs: diff --git a/.github/workflows/_gmc-e2e.yml b/.github/workflows/_gmc-e2e.yml index 331eea0c81..ba50e8b955 100644 --- a/.github/workflows/_gmc-e2e.yml +++ b/.github/workflows/_gmc-e2e.yml @@ -3,7 +3,8 @@ # This workflow will only test GMC pipeline and will not install GMC any more name: Single GMC E2e Test For CD Workflow Call - +permissions: + contents: read on: workflow_call: inputs: diff --git a/.github/workflows/_gmc-workflow.yml b/.github/workflows/_gmc-workflow.yml index 77c01177a5..32ff08266d 100644 --- a/.github/workflows/_gmc-workflow.yml +++ b/.github/workflows/_gmc-workflow.yml @@ -2,7 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 name: Build and deploy GMC system on call and manual - +permissions: + contents: read on: workflow_dispatch: inputs: diff --git a/.github/workflows/dockerhub-description.yml b/.github/workflows/dockerhub-description.yml index 4dcfee1f36..296f464f47 100644 --- a/.github/workflows/dockerhub-description.yml +++ b/.github/workflows/dockerhub-description.yml @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 name: Update Docker Hub Description +permissions: + contents: read on: schedule: - cron: "0 0 * * 0" diff --git a/.github/workflows/manual-docker-clean.yml b/.github/workflows/manual-docker-clean.yml index 25cf228721..886cf27234 100644 --- a/.github/workflows/manual-docker-clean.yml +++ b/.github/workflows/manual-docker-clean.yml @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 name: Clean up container on manual event +permissions: + contents: read on: workflow_dispatch: inputs: diff --git a/.github/workflows/manual-example-workflow.yml b/.github/workflows/manual-example-workflow.yml index 919ccdeae7..338454d9aa 100644 --- a/.github/workflows/manual-example-workflow.yml +++ b/.github/workflows/manual-example-workflow.yml @@ -2,6 +2,24 @@ # SPDX-License-Identifier: Apache-2.0 name: Examples CD workflow on manual event + +permissions: + actions: read + contents: read + checks: read + deployments: read + discussions: read + issues: read + packages: read + pages: read + pull-requests: read + repository-projects: read + statuses: read + security-events: read + id-token: write + attestations: read + models: read + on: workflow_dispatch: inputs: @@ -51,7 +69,6 @@ on: required: false type: boolean -permissions: read-all jobs: get-test-matrix: runs-on: ubuntu-latest diff --git a/.github/workflows/manual-freeze-tag.yml b/.github/workflows/manual-freeze-tag.yml index 6dd55c9032..88c1bb6c85 100644 --- a/.github/workflows/manual-freeze-tag.yml +++ b/.github/workflows/manual-freeze-tag.yml @@ -2,7 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 name: Freeze OPEA images release tag - +permissions: + contents: read on: workflow_dispatch: inputs: diff --git a/.github/workflows/manual-image-build.yml b/.github/workflows/manual-image-build.yml index 92da9c2231..fbfd2bef26 100644 --- a/.github/workflows/manual-image-build.yml +++ b/.github/workflows/manual-image-build.yml @@ -2,6 +2,22 @@ # SPDX-License-Identifier: Apache-2.0 name: Build specific images on manual event +permissions: + actions: read + contents: read + checks: read + deployments: read + discussions: read + issues: read + packages: read + pages: read + pull-requests: read + repository-projects: read + statuses: read + security-events: read + id-token: write + attestations: read + models: read on: workflow_dispatch: inputs: diff --git a/.github/workflows/manual-reset-local-registry.yml b/.github/workflows/manual-reset-local-registry.yml index de9cfd78e9..88c839ab82 100644 --- a/.github/workflows/manual-reset-local-registry.yml +++ b/.github/workflows/manual-reset-local-registry.yml @@ -2,6 +2,22 @@ # SPDX-License-Identifier: Apache-2.0 name: Clean up Local Registry on manual event +permissions: + actions: read + contents: read + checks: read + deployments: read + discussions: read + issues: read + packages: read + pages: read + pull-requests: read + repository-projects: read + statuses: read + security-events: read + id-token: write + attestations: read + models: read on: workflow_dispatch: inputs: diff --git a/.github/workflows/mix-trellix.yml b/.github/workflows/mix-trellix.yml index 8779f3b9ad..65f18e6dbc 100644 --- a/.github/workflows/mix-trellix.yml +++ b/.github/workflows/mix-trellix.yml @@ -2,7 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 name: Trellix Command Line Scanner - +permissions: + contents: read on: workflow_dispatch: schedule: diff --git a/.github/workflows/nightly-docker-build-publish.yml b/.github/workflows/nightly-docker-build-publish.yml index adac4b6d14..1d776c8433 100644 --- a/.github/workflows/nightly-docker-build-publish.yml +++ b/.github/workflows/nightly-docker-build-publish.yml @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 name: Nightly build/publish latest docker images +permissions: + security-events: read on: schedule: @@ -33,12 +35,32 @@ jobs: echo "PUBLISH_TAGS=$PUBLISH_TAGS" >> $GITHUB_OUTPUT build-comps-base: + permissions: + attestations: read + models: read + security-events: read needs: [get-build-matrix] uses: ./.github/workflows/_build_comps_base_image.yml with: node: gaudi build-images: + permissions: + actions: read + contents: read + checks: read + deployments: read + discussions: read + issues: read + packages: read + pages: read + pull-requests: read + repository-projects: read + statuses: read + security-events: read + id-token: write + attestations: read + models: read needs: [get-build-matrix, build-comps-base] strategy: matrix: @@ -53,6 +75,22 @@ jobs: test-example: needs: [get-build-matrix] + permissions: + actions: read + contents: read + checks: read + deployments: read + discussions: read + issues: read + packages: read + pages: read + pull-requests: read + repository-projects: read + statuses: read + security-events: read + id-token: write + attestations: read + models: read if: ${{ needs.get-build-matrix.outputs.examples_json != '' }} strategy: matrix: @@ -69,6 +107,8 @@ jobs: get-image-list: needs: [get-build-matrix] + permissions: + contents: read uses: ./.github/workflows/_get-image-list.yml with: examples: ${{ needs.get-build-matrix.outputs.EXAMPLES }} diff --git a/.github/workflows/pr-chart-e2e.yml b/.github/workflows/pr-chart-e2e.yml index 876960e7d9..3990e5fce9 100644 --- a/.github/workflows/pr-chart-e2e.yml +++ b/.github/workflows/pr-chart-e2e.yml @@ -2,7 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 name: E2E Test with Helm Charts - +permissions: + contents: read on: pull_request_target: branches: [main] diff --git a/.github/workflows/pr-check-duplicated-image.yml b/.github/workflows/pr-check-duplicated-image.yml index 0cdba415a2..2922b8f4fe 100644 --- a/.github/workflows/pr-check-duplicated-image.yml +++ b/.github/workflows/pr-check-duplicated-image.yml @@ -2,7 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 name: Check Duplicated Images - +permissions: + contents: read on: pull_request: branches: [main] diff --git a/.github/workflows/pr-code-scan.yml b/.github/workflows/pr-code-scan.yml index 7accb94ea2..10d3ea5fa4 100644 --- a/.github/workflows/pr-code-scan.yml +++ b/.github/workflows/pr-code-scan.yml @@ -2,7 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 name: Code Scan - +permissions: + contents: read + security-events: write on: pull_request: branches: [main] diff --git a/.github/workflows/pr-docker-compose-e2e.yml b/.github/workflows/pr-docker-compose-e2e.yml index a7604f29af..d7aba56bb5 100644 --- a/.github/workflows/pr-docker-compose-e2e.yml +++ b/.github/workflows/pr-docker-compose-e2e.yml @@ -3,6 +3,9 @@ name: E2E test with docker compose +permissions: + contents: read + on: pull_request_target: branches: ["main", "*rc"] diff --git a/.github/workflows/pr-dockerfile-path-and-build-yaml-scan.yml b/.github/workflows/pr-dockerfile-path-and-build-yaml-scan.yml index 3b8be26137..2775c2ae13 100644 --- a/.github/workflows/pr-dockerfile-path-and-build-yaml-scan.yml +++ b/.github/workflows/pr-dockerfile-path-and-build-yaml-scan.yml @@ -2,7 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 name: Compose file and dockerfile path checking - +permissions: + contents: read on: pull_request: branches: [main] diff --git a/.github/workflows/pr-link-path-scan.yml b/.github/workflows/pr-link-path-scan.yml index 1f389a7c69..d165b82ea1 100644 --- a/.github/workflows/pr-link-path-scan.yml +++ b/.github/workflows/pr-link-path-scan.yml @@ -3,6 +3,9 @@ name: Check hyperlinks and relative path validity +permissions: + contents: read + on: pull_request: branches: [main] diff --git a/.github/workflows/push-image-build.yml b/.github/workflows/push-image-build.yml index 0cbbb970db..9f551a67bc 100644 --- a/.github/workflows/push-image-build.yml +++ b/.github/workflows/push-image-build.yml @@ -3,6 +3,23 @@ # Test name: Build latest images on push event +permissions: + actions: read + contents: read + checks: read + deployments: read + discussions: read + issues: read + packages: read + pages: read + pull-requests: read + repository-projects: read + statuses: read + security-events: read + id-token: write + attestations: read + models: read + on: push: branches: [ 'main' ] diff --git a/.github/workflows/push-images-path-detection.yml b/.github/workflows/push-images-path-detection.yml index 299ee4d180..9edfad2678 100644 --- a/.github/workflows/push-images-path-detection.yml +++ b/.github/workflows/push-images-path-detection.yml @@ -3,10 +3,12 @@ name: Check the validity of links in docker_images_list. +permissions: + contents: read + on: push: branches: [main] - types: [opened, reopened, ready_for_review, synchronize] jobs: check-dockerfile-paths: diff --git a/.github/workflows/push-infra-issue-creation.yml b/.github/workflows/push-infra-issue-creation.yml index 132f64d1a8..2dd2de23c0 100644 --- a/.github/workflows/push-infra-issue-creation.yml +++ b/.github/workflows/push-infra-issue-creation.yml @@ -8,6 +8,10 @@ on: - "**/docker_compose/**/compose*.yaml" name: Create an issue to GenAIInfra on push + +permissions: + contents: read + jobs: job1: name: Create issue diff --git a/.github/workflows/weekly-example-test.yml b/.github/workflows/weekly-example-test.yml index 4b8391a1dd..832cc11681 100644 --- a/.github/workflows/weekly-example-test.yml +++ b/.github/workflows/weekly-example-test.yml @@ -3,6 +3,10 @@ name: Weekly test all examples on multiple HWs +permissions: + contents: read + id-token: write + on: schedule: - cron: "30 2 * * 6" # UTC time @@ -31,6 +35,10 @@ jobs: build-comps-base: needs: [get-test-matrix] + permissions: + attestations: read + models: read + security-events: read strategy: matrix: node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }} @@ -39,6 +47,22 @@ jobs: node: ${{ matrix.node }} run-examples: + permissions: + contents: read + id-token: write + actions: read + attestations: read + checks: read + deployments: read + discussions: read + issues: read + models: read + packages: read + pages: read + pull-requests: read + repository-projects: read + statuses: read + security-events: read needs: [get-test-matrix, build-comps-base] strategy: matrix: From 70db6c5f7a38188b57cd0d494d91de326d3cb970 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Thu, 29 May 2025 11:27:06 +0800 Subject: [PATCH 07/44] Integrate MultimodalQnA set_env to ut scripts. (#1965) Integrate MultimodalQnA set_env to ut scripts. Add README.md for UT scripts. Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- .../docker_compose/amd/gpu/rocm/compose.yaml | 2 +- .../amd/gpu/rocm/compose_vllm.yaml | 2 +- .../docker_compose/amd/gpu/rocm/set_env.sh | 8 +-- .../amd/gpu/rocm/set_env_vllm.sh | 14 ++--- .../intel/cpu/xeon/compose.yaml | 2 +- .../intel/cpu/xeon/compose_milvus.yaml | 2 +- .../docker_compose/intel/cpu/xeon/set_env.sh | 54 ------------------- .../intel/hpu/gaudi/compose.yaml | 2 +- .../intel/hpu/gaudi/compose_milvus.yaml | 2 +- .../intel/{hpu/gaudi => }/set_env.sh | 4 +- MultimodalQnA/tests/README.md | 45 ++++++++++++++++ .../tests/test_compose_milvus_on_xeon.sh | 29 +--------- MultimodalQnA/tests/test_compose_on_gaudi.sh | 38 +------------ MultimodalQnA/tests/test_compose_on_rocm.sh | 32 +---------- MultimodalQnA/tests/test_compose_on_xeon.sh | 37 +------------ .../tests/test_compose_vllm_on_rocm.sh | 31 +---------- 16 files changed, 74 insertions(+), 230 deletions(-) delete mode 100755 MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh rename MultimodalQnA/docker_compose/intel/{hpu/gaudi => }/set_env.sh (96%) create mode 100644 MultimodalQnA/tests/README.md diff --git a/MultimodalQnA/docker_compose/amd/gpu/rocm/compose.yaml b/MultimodalQnA/docker_compose/amd/gpu/rocm/compose.yaml index 1691cbb33f..2688b1ed68 100644 --- a/MultimodalQnA/docker_compose/amd/gpu/rocm/compose.yaml +++ b/MultimodalQnA/docker_compose/amd/gpu/rocm/compose.yaml @@ -166,7 +166,7 @@ services: MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE} MM_RETRIEVER_SERVICE_HOST_IP: ${MM_RETRIEVER_SERVICE_HOST_IP} LVM_SERVICE_HOST_IP: ${LVM_SERVICE_HOST_IP} - WHISPER_SERVER_PORT: ${WHISPER_PORT} + WHISPER_SERVER_PORT: ${WHISPER_SERVER_PORT} WHISPER_SERVER_ENDPOINT: ${WHISPER_SERVER_ENDPOINT} ipc: host restart: always diff --git a/MultimodalQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/MultimodalQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml index 40166110ab..5575e7cdc8 100644 --- a/MultimodalQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/MultimodalQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -170,7 +170,7 @@ services: MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE} MM_RETRIEVER_SERVICE_HOST_IP: ${MM_RETRIEVER_SERVICE_HOST_IP} LVM_SERVICE_HOST_IP: ${LVM_SERVICE_HOST_IP} - WHISPER_SERVER_PORT: ${WHISPER_PORT} + WHISPER_SERVER_PORT: ${WHISPER_SERVER_PORT} WHISPER_SERVER_ENDPOINT: ${WHISPER_SERVER_ENDPOINT} ipc: host restart: always diff --git a/MultimodalQnA/docker_compose/amd/gpu/rocm/set_env.sh b/MultimodalQnA/docker_compose/amd/gpu/rocm/set_env.sh index 5c7516e7a4..31635d5768 100644 --- a/MultimodalQnA/docker_compose/amd/gpu/rocm/set_env.sh +++ b/MultimodalQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -3,8 +3,8 @@ # Copyright (C) 2024 Advanced Micro Devices, Inc. # SPDX-License-Identifier: Apache-2.0 -export HOST_IP=${your_host_ip_address} -export MULTIMODAL_HUGGINGFACEHUB_API_TOKEN=${your_huggingfacehub_token} +export HOST_IP=${ip_address} +export MULTIMODAL_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} export MULTIMODAL_TGI_SERVICE_PORT="8399" export no_proxy=${your_no_proxy} export http_proxy=${your_http_proxy} @@ -31,5 +31,5 @@ export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/datap export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/generate_captions" export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/get" export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/delete" -export WHISPER_PORT="7066" -export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_PORT}/v1/asr" +export WHISPER_SERVER_PORT=7066 +export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_SERVER_PORT}/v1/asr" diff --git a/MultimodalQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/MultimodalQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh index 623d0c5272..6bf17d9b0b 100644 --- a/MultimodalQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/MultimodalQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -3,9 +3,9 @@ # Copyright (C) 2024 Advanced Micro Devices, Inc. # SPDX-License-Identifier: Apache-2.0 -export HOST_IP=${your_host_ip_address} -export MULTIMODAL_HUGGINGFACEHUB_API_TOKEN=${your_huggingfacehub_token} -export MULTIMODAL_TGI_SERVICE_PORT="8399" +export HOST_IP=${ip_address} +export MULTIMODAL_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} +export MULTIMODAL_VLLM_SERVICE_PORT="8399" export no_proxy=${your_no_proxy} export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} @@ -17,9 +17,9 @@ export REDIS_URL="redis://${HOST_IP}:6379" export REDIS_HOST=${HOST_IP} export INDEX_NAME="mm-rag-redis" export VLLM_SERVER_PORT=8081 -export LVM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVER_PORT}" +export LVM_ENDPOINT="http://${HOST_IP}:8399" export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc" -export LVM_MODEL_ID="Xkev/Llama-3.2V-11B-cot" +export MULTIMODAL_LLM_MODEL_ID="Xkev/Llama-3.2V-11B-cot" export WHISPER_MODEL="base" export MM_EMBEDDING_SERVICE_HOST_IP=${HOST_IP} export MM_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} @@ -31,5 +31,5 @@ export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/datap export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/generate_captions" export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/get" export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/delete" -export WHISPER_PORT="7066" -export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_PORT}/v1/asr" +export WHISPER_SERVER_PORT=7066 +export WHISPER_SERVER_ENDPOINT="http://${HOST_IP}:${WHISPER_SERVER_PORT}/v1/asr" diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml index bed2374fbd..2f2318de07 100644 --- a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -39,7 +39,7 @@ services: - redis-vector-db - lvm-llava ports: - - "${DATAPREP_MMR_PORT}:5000" + - "${DATAPREP_MMR_PORT:-6007}:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml index 77a2e0bb01..250d2633a5 100644 --- a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml +++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml @@ -81,7 +81,7 @@ services: - milvus-standalone - lvm-llava ports: - - "${DATAPREP_MMR_PORT}:5000" + - "${DATAPREP_MMR_PORT:-6007}:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh b/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh deleted file mode 100755 index 4cb7b5ba92..0000000000 --- a/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -pushd "../../../../../" > /dev/null -source .set_env.sh -popd > /dev/null - -export host_ip=$(hostname -I | awk '{print $1}') - -export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip} -export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip} -export LVM_SERVICE_HOST_IP=${host_ip} -export MEGA_SERVICE_HOST_IP=${host_ip} - -export TTS_PORT=7055 -export TTS_ENDPOINT="http://${host_ip}:${TTS_PORT}/v1/tts" - -export WHISPER_PORT=7066 -export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_PORT}/v1/asr" -export WHISPER_MODEL="base" -export MAX_IMAGES=1 - -export REDIS_DB_PORT=6379 -export REDIS_INSIGHTS_PORT=8001 -export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}" -export REDIS_HOST=${host_ip} -export INDEX_NAME="mm-rag-redis" - -export DATAPREP_MMR_PORT=6007 -export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/ingest" -export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/generate_transcripts" -export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/generate_captions" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/get" -export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/delete" - -export EMM_BRIDGETOWER_PORT=6006 -export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc" -export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMM_BRIDGETOWER_PORT" -export MM_EMBEDDING_PORT_MICROSERVICE=6000 -export BRIDGE_TOWER_EMBEDDING=true - -export REDIS_RETRIEVER_PORT=7000 - -export LVM_PORT=9399 -export LLAVA_SERVER_PORT=8399 -export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf" -export LVM_ENDPOINT="http://${host_ip}:${LLAVA_SERVER_PORT}" - -export MEGA_SERVICE_PORT=8888 -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${MEGA_SERVICE_PORT}/v1/multimodalqna" - -export UI_PORT=5173 -export UI_TIMEOUT=240 diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml index c5e1b29bda..c3dcc9f8cc 100644 --- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -41,7 +41,7 @@ services: - redis-vector-db - lvm ports: - - "${DATAPREP_MMR_PORT}:5000" + - "${DATAPREP_MMR_PORT:-6007}:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose_milvus.yaml b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose_milvus.yaml index 98df452697..165760003c 100644 --- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose_milvus.yaml +++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose_milvus.yaml @@ -79,7 +79,7 @@ services: - "milvus-standalone" - "lvm" ports: - - "${DATAPREP_MMR_PORT}:5000" + - "${DATAPREP_MMR_PORT:-6007}:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/MultimodalQnA/docker_compose/intel/set_env.sh similarity index 96% rename from MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh rename to MultimodalQnA/docker_compose/intel/set_env.sh index c92076253f..8d31674a29 100755 --- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/MultimodalQnA/docker_compose/intel/set_env.sh @@ -2,12 +2,12 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -pushd "../../../../../" > /dev/null +pushd "../../../" > /dev/null source .set_env.sh popd > /dev/null export host_ip=$(hostname -I | awk '{print $1}') - +export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip} export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip} export LVM_SERVICE_HOST_IP=${host_ip} diff --git a/MultimodalQnA/tests/README.md b/MultimodalQnA/tests/README.md new file mode 100644 index 0000000000..279576500f --- /dev/null +++ b/MultimodalQnA/tests/README.md @@ -0,0 +1,45 @@ +# MultimodalQnA E2E test scripts + +## Set the required environment variable + +```bash +export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +``` + +## Run test + +On Intel Xeon with vLLM: + +```bash +bash test_compose_on_xeon.sh +``` + +On Intel Xeon with TGI: + +```bash +bash test_compose_tgi_on_xeon.sh +``` + +On Intel Gaudi with vLLM: + +```bash +bash test_compose_on_gaudi.sh +``` + +On Intel Gaudi with TGI: + +```bash +bash test_compose_tgi_on_gaudi.sh +``` + +On AMD ROCm with TGI: + +```bash +bash test_compose_on_rocm.sh +``` + +On AMD ROCm with vLLM: + +```bash +bash test_compose_vllm_on_rocm.sh +``` diff --git a/MultimodalQnA/tests/test_compose_milvus_on_xeon.sh b/MultimodalQnA/tests/test_compose_milvus_on_xeon.sh index 60ea474f33..c82e0a7c62 100644 --- a/MultimodalQnA/tests/test_compose_milvus_on_xeon.sh +++ b/MultimodalQnA/tests/test_compose_milvus_on_xeon.sh @@ -65,37 +65,12 @@ function build_docker_images() { } function setup_env() { - export host_ip=${ip_address} - export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip} - export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip} - export LVM_SERVICE_HOST_IP=${host_ip} - export MEGA_SERVICE_HOST_IP=${host_ip} - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export WHISPER_PORT=7066 - export MAX_IMAGES=1 - export WHISPER_MODEL="base" - export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_PORT}/v1/asr" export COLLECTION_NAME="LangChainCollection" export MILVUS_HOST=${host_ip} - export DATAPREP_MMR_PORT=6007 - export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/ingest" - export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/generate_transcripts" - export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/generate_captions" - export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/get" - export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/delete" - export EMM_BRIDGETOWER_PORT=6006 - export BRIDGE_TOWER_EMBEDDING=true - export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc" - export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMM_BRIDGETOWER_PORT" - export MM_EMBEDDING_PORT_MICROSERVICE=6000 export MILVUS_RETRIEVER_PORT=7000 - export LVM_PORT=9399 - export LLAVA_SERVER_PORT=8399 export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf" - export LVM_ENDPOINT="http://${host_ip}:$LLAVA_SERVER_PORT" - export MEGA_SERVICE_PORT=8888 - export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:$MEGA_SERVICE_PORT/v1/multimodalqna" - export UI_PORT=5173 + cd $WORKPATH/docker_compose/intel + source set_env.sh } diff --git a/MultimodalQnA/tests/test_compose_on_gaudi.sh b/MultimodalQnA/tests/test_compose_on_gaudi.sh index 774347e435..a945de0033 100644 --- a/MultimodalQnA/tests/test_compose_on_gaudi.sh +++ b/MultimodalQnA/tests/test_compose_on_gaudi.sh @@ -69,42 +69,8 @@ function build_docker_images() { } function setup_env() { - export host_ip=${ip_address} - export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip} - export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip} - export LVM_SERVICE_HOST_IP=${host_ip} - export MEGA_SERVICE_HOST_IP=${host_ip} - export REDIS_DB_PORT=6379 - export REDIS_INSIGHTS_PORT=8001 - export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}" - export REDIS_HOST=${host_ip} - export INDEX_NAME="mm-rag-redis" - export WHISPER_PORT=7066 - export MAX_IMAGES=1 - export WHISPER_MODEL="base" - export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_PORT}/v1/asr" - export TTS_PORT=7055 - export TTS_ENDPOINT="http://${host_ip}:${TTS_PORT}/v1/tts" - export DATAPREP_MMR_PORT=6007 - export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/ingest" - export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/generate_transcripts" - export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/generate_captions" - export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/get" - export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/delete" - export EMM_BRIDGETOWER_PORT=6006 - export BRIDGE_TOWER_EMBEDDING=true - export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc" - export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMM_BRIDGETOWER_PORT" - export MM_EMBEDDING_PORT_MICROSERVICE=6000 - export REDIS_RETRIEVER_PORT=7000 - export LVM_PORT=9399 - export LLAVA_SERVER_PORT=8399 - export TGI_GAUDI_PORT="${LLAVA_SERVER_PORT}:80" - export LVM_MODEL_ID="llava-hf/llava-v1.6-vicuna-13b-hf" - export LVM_ENDPOINT="http://${host_ip}:${LLAVA_SERVER_PORT}" - export MEGA_SERVICE_PORT=8888 - export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${MEGA_SERVICE_PORT}/v1/multimodalqna" - export UI_PORT=5173 + cd $WORKPATH/docker_compose/intel + source set_env.sh } function start_services() { diff --git a/MultimodalQnA/tests/test_compose_on_rocm.sh b/MultimodalQnA/tests/test_compose_on_rocm.sh index efa363ed3c..208f7c78a4 100644 --- a/MultimodalQnA/tests/test_compose_on_rocm.sh +++ b/MultimodalQnA/tests/test_compose_on_rocm.sh @@ -42,38 +42,10 @@ function build_docker_images() { } function setup_env() { - export HOST_IP=${ip_address} export host_ip=${ip_address} - export MULTIMODAL_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export MULTIMODAL_TGI_SERVICE_PORT="8399" - export no_proxy=${your_no_proxy} - export http_proxy=${your_http_proxy} - export https_proxy=${your_http_proxy} - export BRIDGE_TOWER_EMBEDDING=true - export EMBEDDER_PORT=6006 - export MMEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:$EMBEDDER_PORT" - export MM_EMBEDDING_PORT_MICROSERVICE=6000 - export WHISPER_SERVER_PORT=7066 - export WHISPER_SERVER_ENDPOINT="http://${HOST_IP}:${WHISPER_SERVER_PORT}/v1/asr" - export REDIS_URL="redis://${HOST_IP}:6379" - export REDIS_HOST=${HOST_IP} - export INDEX_NAME="mm-rag-redis" - export LLAVA_SERVER_PORT=8399 - export LVM_ENDPOINT="http://${HOST_IP}:8399" - export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc" - export LVM_MODEL_ID="Xkev/Llama-3.2V-11B-cot" - export WHISPER_MODEL="base" - export MM_EMBEDDING_SERVICE_HOST_IP=${HOST_IP} - export MM_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} - export LVM_SERVICE_HOST_IP=${HOST_IP} - export MEGA_SERVICE_HOST_IP=${HOST_IP} - export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:8888/v1/multimodalqna" - export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/ingest" - export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/generate_transcripts" - export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/generate_captions" - export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/get" - export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/delete" export MODEL_CACHE=${model_cache:-"/var/opea/multimodalqna-service/data"} + cd $WORKPATH/docker_compose/amd/gpu/rocm + source set_env.sh } function start_services() { diff --git a/MultimodalQnA/tests/test_compose_on_xeon.sh b/MultimodalQnA/tests/test_compose_on_xeon.sh index 1a62915abe..10f015aa7b 100644 --- a/MultimodalQnA/tests/test_compose_on_xeon.sh +++ b/MultimodalQnA/tests/test_compose_on_xeon.sh @@ -66,41 +66,8 @@ function build_docker_images() { } function setup_env() { - export host_ip=${ip_address} - export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip} - export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip} - export LVM_SERVICE_HOST_IP=${host_ip} - export MEGA_SERVICE_HOST_IP=${host_ip} - export WHISPER_PORT=7066 - export MAX_IMAGES=1 - export WHISPER_MODEL="base" - export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_PORT}/v1/asr" - export TTS_PORT=7055 - export TTS_ENDPOINT="http://${host_ip}:${TTS_PORT}/v1/tts" - export REDIS_DB_PORT=6379 - export REDIS_INSIGHTS_PORT=8001 - export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}" - export REDIS_HOST=${host_ip} - export INDEX_NAME="mm-rag-redis" - export DATAPREP_MMR_PORT=6007 - export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/ingest" - export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/generate_transcripts" - export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/generate_captions" - export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/get" - export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/delete" - export EMM_BRIDGETOWER_PORT=6006 - export BRIDGE_TOWER_EMBEDDING=true - export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc" - export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMM_BRIDGETOWER_PORT" - export MM_EMBEDDING_PORT_MICROSERVICE=6000 - export REDIS_RETRIEVER_PORT=7000 - export LVM_PORT=9399 - export LLAVA_SERVER_PORT=8399 - export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf" - export LVM_ENDPOINT="http://${host_ip}:$LLAVA_SERVER_PORT" - export MEGA_SERVICE_PORT=8888 - export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:$MEGA_SERVICE_PORT/v1/multimodalqna" - export UI_PORT=5173 + cd $WORKPATH/docker_compose/intel + source set_env.sh } diff --git a/MultimodalQnA/tests/test_compose_vllm_on_rocm.sh b/MultimodalQnA/tests/test_compose_vllm_on_rocm.sh index 65fb87d6c9..77d76dc49a 100644 --- a/MultimodalQnA/tests/test_compose_vllm_on_rocm.sh +++ b/MultimodalQnA/tests/test_compose_vllm_on_rocm.sh @@ -42,36 +42,9 @@ function build_docker_images() { function setup_env() { export HOST_IP=${ip_address} - export host_ip=${ip_address} - export MULTIMODAL_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export MULTIMODAL_VLLM_SERVICE_PORT="8399" - export no_proxy=${your_no_proxy} - export http_proxy=${your_http_proxy} - export https_proxy=${your_http_proxy} - export BRIDGE_TOWER_EMBEDDING=true - export EMBEDDER_PORT=6006 - export MMEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:$EMBEDDER_PORT" - export MM_EMBEDDING_PORT_MICROSERVICE=6000 - export WHISPER_SERVER_PORT=7066 - export WHISPER_SERVER_ENDPOINT="http://${HOST_IP}:${WHISPER_SERVER_PORT}/v1/asr" - export REDIS_URL="redis://${HOST_IP}:6379" - export REDIS_HOST=${HOST_IP} - export INDEX_NAME="mm-rag-redis" - export LVM_ENDPOINT="http://${HOST_IP}:8399" - export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc" - export MULTIMODAL_LLM_MODEL_ID="Xkev/Llama-3.2V-11B-cot" - export WHISPER_MODEL="base" - export MM_EMBEDDING_SERVICE_HOST_IP=${HOST_IP} - export MM_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} - export LVM_SERVICE_HOST_IP=${HOST_IP} - export MEGA_SERVICE_HOST_IP=${HOST_IP} - export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:8888/v1/multimodalqna" - export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/ingest" - export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/generate_transcripts" - export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/generate_captions" - export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/get" - export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/delete" export MODEL_CACHE=${model_cache:-"/var/opea/multimodalqna-service/data"} + cd $WORKPATH/docker_compose/amd/gpu/rocm + source set_env_vllm.sh } function start_services() { From 7ada28f3be85df04a97d4b631011c151cb7dd033 Mon Sep 17 00:00:00 2001 From: "chen, suyue" Date: Fri, 30 May 2025 11:26:57 +0800 Subject: [PATCH 08/44] Optimize benchmark scripts (#1949) Signed-off-by: chensuyue Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- ChatQnA/benchmark_chatqna.yaml | 2 +- deploy.py | 2 +- deploy_and_benchmark.py | 94 ++++++++++++++++++++++------------ 3 files changed, 64 insertions(+), 34 deletions(-) diff --git a/ChatQnA/benchmark_chatqna.yaml b/ChatQnA/benchmark_chatqna.yaml index ae74aa9b92..e528bb9d7a 100644 --- a/ChatQnA/benchmark_chatqna.yaml +++ b/ChatQnA/benchmark_chatqna.yaml @@ -37,7 +37,7 @@ deploy: llm: engine: vllm # or tgi - model_id: "meta-llama/Meta-Llama-3-8B-Instruct" # mandatory + model_id: "meta-llama/Llama-3.1-8B-Instruct" # mandatory replicaCount: with_teirerank: [7, 15, 31, 63] # When teirerank.enabled is True without_teirerank: [8, 16, 32, 64] # When teirerank.enabled is False diff --git a/deploy.py b/deploy.py index e74700ca53..184b82e187 100644 --- a/deploy.py +++ b/deploy.py @@ -192,7 +192,7 @@ def configure_rerank(values, with_rerank, deploy_config, example_type, node_sele values["teirerank"]["nodeSelector"] = {key: value for key, value in node_selector.items()} else: if example_type == "chatqna": - values["image"] = {"repository": "opea/chatqna-without-rerank"} + values["CHATQNA_TYPE"] = "CHATQNA_NO_RERANK" if "teirerank" not in values: values["teirerank"] = {"enabled": False} elif "enabled" not in values["teirerank"]: diff --git a/deploy_and_benchmark.py b/deploy_and_benchmark.py index bb729c7b48..495a554525 100644 --- a/deploy_and_benchmark.py +++ b/deploy_and_benchmark.py @@ -143,13 +143,14 @@ def pull_helm_chart(chart_pull_url, version, chart_name): return untar_dir -def main(yaml_file, target_node=None, test_mode="oob"): +def main(yaml_file, target_node=None, test_mode="oob", clean_up=True): """Main function to process deployment configuration. Args: yaml_file: Path to the YAML configuration file target_node: Optional target number of nodes to deploy. If not specified, will process all nodes. test_mode: Test mode, either "oob" (out of box) or "tune". Defaults to "oob". + clean_up: Whether to clean up after the test. Defaults to True. """ if test_mode not in ["oob", "tune"]: print("Error: test_mode must be either 'oob' or 'tune'") @@ -185,6 +186,11 @@ def main(yaml_file, target_node=None, test_mode="oob"): if not chart_dir: return + # Set HF_TOKEN + HF_TOKEN = deploy_config.get("HUGGINGFACEHUB_API_TOKEN", "") + os.environ["HF_TOKEN"] = HF_TOKEN + os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_TOKEN + for node in nodes_to_process: try: print(f"\nProcessing configuration for {node} nodes...") @@ -278,6 +284,9 @@ def main(yaml_file, target_node=None, test_mode="oob"): chart_dir, ] result = subprocess.run(cmd, check=True, capture_output=True, text=True) + print("Show deploy logs...") + print(result.stdout) + print("End of show deploy logs.") match = re.search(r"values_file_path: (\S+)", result.stdout) if match: @@ -306,6 +315,9 @@ def main(yaml_file, target_node=None, test_mode="oob"): "--update-service", ] result = subprocess.run(cmd, check=True, capture_output=True, text=True) + print("Show deploy logs...") + print(result.stdout) + print("End of show deploy logs.") if result.returncode != 0: print(f"Update failed for {node} nodes configuration with {param_name} {batch_param}") break # Skip remaining {param_name} for this node @@ -372,36 +384,48 @@ def main(yaml_file, target_node=None, test_mode="oob"): os.remove(temp_config_file) finally: - # Uninstall the deployment - print(f"\nUninstalling deployment for {node} nodes...") - cmd = [ - python_cmd, - "deploy.py", - "--chart-name", - chart_name, - "--namespace", - namespace, - "--uninstall", - ] - try: - result = subprocess.run(cmd, check=True) - if result.returncode != 0: - print(f"Failed to uninstall deployment for {node} nodes") - except Exception as e: - print(f"Error while uninstalling deployment for {node} nodes: {str(e)}") - - # Delete labels for current node configuration - print(f"Deleting labels for {node} nodes...") - cmd = [python_cmd, "deploy.py", "--chart-name", chart_name, "--num-nodes", str(node), "--delete-label"] - if current_node_names: - cmd.extend(["--node-names"] + current_node_names) - - try: - result = subprocess.run(cmd, check=True) - if result.returncode != 0: - print(f"Failed to delete labels for {node} nodes") - except Exception as e: - print(f"Error while deleting labels for {node} nodes: {str(e)}") + if clean_up: + # Uninstall the deployment + print(f"\nUninstalling deployment for {node} nodes...") + cmd = [ + python_cmd, + "deploy.py", + "--chart-name", + chart_name, + "--namespace", + namespace, + "--uninstall", + ] + try: + result = subprocess.run(cmd, check=True) + if result.returncode != 0: + print(f"Failed to uninstall deployment for {node} nodes") + except Exception as e: + print(f"Error while uninstalling deployment for {node} nodes: {str(e)}") + + # Delete labels for current node configuration + print(f"Deleting labels for {node} nodes...") + cmd = [ + python_cmd, + "deploy.py", + "--chart-name", + chart_name, + "--num-nodes", + str(node), + "--delete-label", + ] + if current_node_names: + cmd.extend(["--node-names"] + current_node_names) + + try: + result = subprocess.run(cmd, check=True) + if result.returncode != 0: + print(f"Failed to delete labels for {node} nodes") + except Exception as e: + print(f"Error while deleting labels for {node} nodes: {str(e)}") + else: + print("Skipping cleanup for local debug. Manual cleanup may be required.") + exit(0) except Exception as e: print(f"Error processing configuration for {node} nodes: {str(e)}") @@ -419,6 +443,12 @@ def main(yaml_file, target_node=None, test_mode="oob"): parser.add_argument("yaml_file", help="Path to the YAML configuration file") parser.add_argument("--target-node", type=int, help="Optional: Target number of nodes to deploy.", default=None) parser.add_argument("--test-mode", type=str, help="Test mode, either 'oob' (out of box) or 'tune'.", default="oob") + parser.add_argument( + "--no-clean-up", + action="store_false", + dest="clean_up", + help="Clean up after test, which can be closed for local debug.", + ) args = parser.parse_args() - main(args.yaml_file, args.target_node, args.test_mode) + main(args.yaml_file, args.target_node, args.test_mode, args.clean_up) From 8c80b0806d8a588a6b9c9bda486ebcfb2637038e Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Fri, 30 May 2025 14:54:07 +0800 Subject: [PATCH 09/44] Fix permissions error. (#2008) Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- .github/workflows/_get-test-matrix.yml | 12 ++++++++++- .github/workflows/_run-docker-compose.yml | 12 ++++++++++- .github/workflows/pr-docker-compose-e2e.yml | 22 +++++++++++++++++++++ 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_get-test-matrix.yml b/.github/workflows/_get-test-matrix.yml index 4d2fefabdd..25f39607da 100644 --- a/.github/workflows/_get-test-matrix.yml +++ b/.github/workflows/_get-test-matrix.yml @@ -3,7 +3,17 @@ # Support push and pull_request events name: Get Test Matrix -permissions: read-all +permissions: + actions: read + contents: read + checks: read + deployments: read + issues: read + packages: read + pages: read + pull-requests: read + statuses: read + security-events: read on: workflow_call: inputs: diff --git a/.github/workflows/_run-docker-compose.yml b/.github/workflows/_run-docker-compose.yml index fa9b560c09..7af15e11be 100644 --- a/.github/workflows/_run-docker-compose.yml +++ b/.github/workflows/_run-docker-compose.yml @@ -2,7 +2,17 @@ # SPDX-License-Identifier: Apache-2.0 name: Image Build -permissions: read-all +permissions: + actions: read + contents: read + checks: write + deployments: write + issues: write + packages: write + pages: write + pull-requests: write + statuses: write + security-events: read on: workflow_call: inputs: diff --git a/.github/workflows/pr-docker-compose-e2e.yml b/.github/workflows/pr-docker-compose-e2e.yml index d7aba56bb5..d502ea0094 100644 --- a/.github/workflows/pr-docker-compose-e2e.yml +++ b/.github/workflows/pr-docker-compose-e2e.yml @@ -28,12 +28,34 @@ concurrency: jobs: get-test-matrix: + permissions: + actions: read + contents: read + checks: read + deployments: read + issues: read + packages: read + pages: read + pull-requests: read + statuses: read + security-events: read if: ${{ !github.event.pull_request.draft }} uses: ./.github/workflows/_get-test-matrix.yml with: diff_excluded_files: '\.github|\.md|\.txt|kubernetes|gmc|assets|benchmark' example-test: + permissions: + actions: read + contents: read + checks: write + deployments: write + issues: write + packages: write + pages: write + pull-requests: write + statuses: write + security-events: read needs: [get-test-matrix] if: ${{ needs.get-test-matrix.outputs.run_matrix != '' }} strategy: From a943345ea1475da2cca35c917fa35df7a537cde2 Mon Sep 17 00:00:00 2001 From: "chen, suyue" Date: Tue, 3 Jun 2025 10:15:25 +0800 Subject: [PATCH 10/44] Build comps-base:ci for AgentQnA test (#2010) Signed-off-by: chensuyue Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- .github/workflows/pr-docker-compose-e2e.yml | 2 +- AgentQnA/tests/step1_build_images.sh | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr-docker-compose-e2e.yml b/.github/workflows/pr-docker-compose-e2e.yml index d502ea0094..29a2b40300 100644 --- a/.github/workflows/pr-docker-compose-e2e.yml +++ b/.github/workflows/pr-docker-compose-e2e.yml @@ -15,7 +15,7 @@ on: - "**.py" - "**/docker_compose/**" - "**/docker_image_build/**" - - "**/tests/test_compose**" + - "**/tests/**" - "**/ui/**" - "!**.md" - "!**.txt" diff --git a/AgentQnA/tests/step1_build_images.sh b/AgentQnA/tests/step1_build_images.sh index 8edd7b623a..58b5c8d6e8 100644 --- a/AgentQnA/tests/step1_build_images.sh +++ b/AgentQnA/tests/step1_build_images.sh @@ -13,6 +13,10 @@ function get_genai_comps() { if [ ! -d "GenAIComps" ] ; then git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git fi + pushd GenAIComps + echo "GenAIComps test commit is $(git rev-parse HEAD)" + docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . + popd && sleep 1s } function build_docker_images_for_retrieval_tool(){ From b63bdb3378955f99ec8f3b3a1410fa255e06ccf9 Mon Sep 17 00:00:00 2001 From: "chen, suyue" Date: Tue, 3 Jun 2025 10:19:02 +0800 Subject: [PATCH 11/44] Stop CI test on rocm due to lack of test machine (#2017) Signed-off-by: chensuyue Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- .github/workflows/scripts/get_test_matrix.sh | 5 +++++ .github/workflows/weekly-example-test.yml | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/scripts/get_test_matrix.sh b/.github/workflows/scripts/get_test_matrix.sh index 5ad6992104..1b5b807ec1 100644 --- a/.github/workflows/scripts/get_test_matrix.sh +++ b/.github/workflows/scripts/get_test_matrix.sh @@ -40,6 +40,11 @@ for example in ${examples}; do done fi for hw in ${run_hardware}; do + # TODO: remove this condition when ROCm hardware is available + if [[ "${hw}" == "rocm" ]]; then + echo "Skip test on ROCm hardware for 2 weeks due to lack of test machine..." + continue + fi run_matrix="${run_matrix}{\"example\":\"${example}\",\"hardware\":\"${hw}\"}," done done diff --git a/.github/workflows/weekly-example-test.yml b/.github/workflows/weekly-example-test.yml index 832cc11681..c3b9c000b6 100644 --- a/.github/workflows/weekly-example-test.yml +++ b/.github/workflows/weekly-example-test.yml @@ -14,7 +14,7 @@ on: env: EXAMPLES: ${{ vars.NIGHTLY_RELEASE_EXAMPLES }} - NODES: "gaudi,xeon,rocm,arc" + NODES: "gaudi,xeon,arc" jobs: get-test-matrix: From 2a8f3fb7ca3ef17d3aab11fc358a612361bf3365 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Tue, 3 Jun 2025 11:21:10 +0800 Subject: [PATCH 12/44] Fix workflow permission issues. (#2018) Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- .github/workflows/_example-workflow.yml | 29 +++++++++++++++ .github/workflows/manual-example-workflow.yml | 16 +++++++++ .github/workflows/manual-image-build.yml | 16 +++++++++ .../workflows/manual-reset-local-registry.yml | 16 +++++++++ .../nightly-docker-build-publish.yml | 36 +++++++++++++------ .github/workflows/push-image-build.yml | 16 +++++++++ .github/workflows/weekly-example-test.yml | 14 ++++---- 7 files changed, 125 insertions(+), 18 deletions(-) diff --git a/.github/workflows/_example-workflow.yml b/.github/workflows/_example-workflow.yml index 35b1f16cd1..6906fd4910 100644 --- a/.github/workflows/_example-workflow.yml +++ b/.github/workflows/_example-workflow.yml @@ -69,6 +69,22 @@ jobs: # Image Build #################################################################################################### build-images: + permissions: + actions: read + contents: read + checks: read + deployments: read + discussions: read + issues: read + packages: read + pages: read + pull-requests: read + repository-projects: read + statuses: read + security-events: read + id-token: write + attestations: read + models: read uses: ./.github/workflows/_build_image.yml with: node: ${{ inputs.node }} @@ -83,6 +99,17 @@ jobs: # Docker Compose Test #################################################################################################### test-example-compose: + permissions: + actions: read + contents: read + checks: write + deployments: write + issues: write + packages: write + pages: write + pull-requests: write + statuses: write + security-events: read needs: [build-images] if: ${{ inputs.test_compose }} uses: ./.github/workflows/_run-docker-compose.yml @@ -99,6 +126,8 @@ jobs: # helmchart Test #################################################################################################### test-helmchart: + permissions: + contents: read if: ${{ fromJSON(inputs.test_helmchart) }} uses: ./.github/workflows/_helm-e2e.yml with: diff --git a/.github/workflows/manual-example-workflow.yml b/.github/workflows/manual-example-workflow.yml index 338454d9aa..1826512c68 100644 --- a/.github/workflows/manual-example-workflow.yml +++ b/.github/workflows/manual-example-workflow.yml @@ -99,6 +99,22 @@ jobs: opea_branch: ${{ inputs.opea_branch }} run-examples: + permissions: + actions: read + attestations: read + discussions: read + models: read + repository-projects: read + id-token: write + contents: read + checks: write + deployments: write + issues: write + packages: write + pages: write + pull-requests: write + statuses: write + security-events: read needs: [get-test-matrix, build-comps-base] strategy: matrix: diff --git a/.github/workflows/manual-image-build.yml b/.github/workflows/manual-image-build.yml index fbfd2bef26..a9cbbe0575 100644 --- a/.github/workflows/manual-image-build.yml +++ b/.github/workflows/manual-image-build.yml @@ -66,6 +66,22 @@ jobs: echo "nodes=$nodes_json" >> $GITHUB_OUTPUT image-build: + permissions: + actions: read + attestations: read + discussions: read + models: read + repository-projects: read + id-token: write + contents: read + checks: write + deployments: write + issues: write + packages: write + pages: write + pull-requests: write + statuses: write + security-events: read needs: get-test-matrix if: ${{ needs.get-test-matrix.outputs.nodes != '' }} strategy: diff --git a/.github/workflows/manual-reset-local-registry.yml b/.github/workflows/manual-reset-local-registry.yml index 88c839ab82..7ee79ab035 100644 --- a/.github/workflows/manual-reset-local-registry.yml +++ b/.github/workflows/manual-reset-local-registry.yml @@ -63,6 +63,22 @@ jobs: docker ps | grep registry build: + permissions: + actions: read + attestations: read + discussions: read + models: read + repository-projects: read + id-token: write + contents: read + checks: write + deployments: write + issues: write + packages: write + pages: write + pull-requests: write + statuses: write + security-events: read needs: [get-build-matrix, clean-up] if: ${{ needs.get-image-list.outputs.matrix != '' }} strategy: diff --git a/.github/workflows/nightly-docker-build-publish.yml b/.github/workflows/nightly-docker-build-publish.yml index 1d776c8433..729568f373 100644 --- a/.github/workflows/nightly-docker-build-publish.yml +++ b/.github/workflows/nightly-docker-build-publish.yml @@ -3,7 +3,21 @@ name: Nightly build/publish latest docker images permissions: + actions: read + contents: read + checks: read + deployments: read + discussions: read + issues: read + packages: read + pages: read + pull-requests: read + repository-projects: read + statuses: read security-events: read + id-token: write + attestations: read + models: read on: schedule: @@ -77,20 +91,20 @@ jobs: needs: [get-build-matrix] permissions: actions: read - contents: read - checks: read - deployments: read + attestations: read discussions: read - issues: read - packages: read - pages: read - pull-requests: read + models: read repository-projects: read - statuses: read - security-events: read id-token: write - attestations: read - models: read + contents: read + checks: write + deployments: write + issues: write + packages: write + pages: write + pull-requests: write + statuses: write + security-events: read if: ${{ needs.get-build-matrix.outputs.examples_json != '' }} strategy: matrix: diff --git a/.github/workflows/push-image-build.yml b/.github/workflows/push-image-build.yml index 9f551a67bc..7c474ea23f 100644 --- a/.github/workflows/push-image-build.yml +++ b/.github/workflows/push-image-build.yml @@ -40,6 +40,22 @@ jobs: test_mode: "docker_image_build" image-build: + permissions: + actions: read + attestations: read + discussions: read + models: read + repository-projects: read + id-token: write + contents: read + checks: write + deployments: write + issues: write + packages: write + pages: write + pull-requests: write + statuses: write + security-events: read needs: job1 if: ${{ needs.job1.outputs.run_matrix != '{"include":[]}' }} strategy: diff --git a/.github/workflows/weekly-example-test.yml b/.github/workflows/weekly-example-test.yml index c3b9c000b6..7dcb609160 100644 --- a/.github/workflows/weekly-example-test.yml +++ b/.github/workflows/weekly-example-test.yml @@ -52,16 +52,16 @@ jobs: id-token: write actions: read attestations: read - checks: read - deployments: read + checks: write + deployments: write discussions: read - issues: read + issues: write models: read - packages: read - pages: read - pull-requests: read + packages: write + pages: write + pull-requests: write repository-projects: read - statuses: read + statuses: write security-events: read needs: [get-test-matrix, build-comps-base] strategy: From 34d40dcfc17092e945a797e9d976b42019a84687 Mon Sep 17 00:00:00 2001 From: Mustafa <109312699+MSCetin37@users.noreply.github.com> Date: Mon, 2 Jun 2025 23:06:32 -0700 Subject: [PATCH 13/44] Refine the README, folder/file hierarchy and test file for FinanceAgent (#1996) Signed-off-by: Mustafa Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- FinanceAgent/README.md | 180 +++----------- .../docker_compose/intel/hpu/gaudi/README.md | 205 ++++++++++++++++ .../intel/hpu/gaudi/compose.yaml | 230 ++++++++++++------ .../intel/hpu/gaudi/dataprep_compose.yaml | 82 ------- .../intel/hpu/gaudi/launch_agents.sh | 36 --- .../intel/hpu/gaudi/launch_dataprep.sh | 15 -- .../intel/hpu/gaudi/launch_vllm.sh | 7 - .../intel/hpu/gaudi/vllm_compose.yaml | 35 --- FinanceAgent/docker_compose/intel/set_env.sh | 89 +++++++ FinanceAgent/tests/test_compose_on_gaudi.sh | 140 ++++++----- 10 files changed, 559 insertions(+), 460 deletions(-) create mode 100644 FinanceAgent/docker_compose/intel/hpu/gaudi/README.md delete mode 100644 FinanceAgent/docker_compose/intel/hpu/gaudi/dataprep_compose.yaml delete mode 100644 FinanceAgent/docker_compose/intel/hpu/gaudi/launch_agents.sh delete mode 100644 FinanceAgent/docker_compose/intel/hpu/gaudi/launch_dataprep.sh delete mode 100644 FinanceAgent/docker_compose/intel/hpu/gaudi/launch_vllm.sh delete mode 100644 FinanceAgent/docker_compose/intel/hpu/gaudi/vllm_compose.yaml create mode 100644 FinanceAgent/docker_compose/intel/set_env.sh diff --git a/FinanceAgent/README.md b/FinanceAgent/README.md index 64ce01cc0a..640f7113d0 100644 --- a/FinanceAgent/README.md +++ b/FinanceAgent/README.md @@ -1,6 +1,26 @@ -# Finance Agent +# Finance Agent Example -## 1. Overview +## Table of Contents + +- [Overview](#overview) +- [Problem Motivation](#problem-motivation) +- [Architecture](#architecture) + - [High-Level Diagram](#high-level-diagram) + - [OPEA Microservices Diagram for Data Handling](#opea-microservices-diagram-for-data-handling) +- [Deployment Options](#deployment-options) +- [Contribution](#contribution) + +## Overview + +The Finance Agent exemplifies a hierarchical multi-agent system designed to streamline financial document processing and analysis for users. It offers three core functionalities: summarizing lengthy financial documents, answering queries related to these documents, and conducting research to generate investment reports on public companies. + +Navigating and analyzing extensive financial documents can be both challenging and time-consuming. Users often need concise summaries, answers to specific queries, or comprehensive investment reports. The Finance Agent effectively addresses these needs by automating document summarization, query answering, and research tasks, thereby enhancing productivity and decision-making efficiency. + +Users interact with the system through a graphical user interface (UI), where a supervisor agent manages requests by delegating tasks to worker agents or the summarization microservice. The system also supports document uploads via the UI for processing. + +## Architecture + +### High-Level Diagram The architecture of this Finance Agent example is shown in the figure below. The agent is a hierarchical multi-agent system and has 3 main functions: @@ -12,6 +32,8 @@ The user interacts with the supervisor agent through the graphical UI. The super ![Finance Agent Architecture](assets/finance_agent_arch.png) +### OPEA Microservices Diagram for Data Handling + The architectural diagram of the `dataprep` microservice is shown below. We use [docling](https://github.com/docling-project/docling) to extract text from PDFs and URLs into markdown format. Both the full document content and tables are extracted. We then use an LLM to extract metadata from the document, including the company name, year, quarter, document type, and document title. The full document markdown then gets chunked, and LLM is used to summarize each chunk, and the summaries are embedded and saved to a vector database. Each table is also summarized by LLM and the summaries are embedded and saved to the vector database. The chunks and tables are also saved into a KV store. The pipeline is designed as such to improve retrieval accuracy of the `search_knowledge_base` tool used by the Question Answering worker agent. ![dataprep architecture](assets/fin_agent_dataprep.png) @@ -30,154 +52,16 @@ The Question Answering worker agent uses `search_knowledge_base` tool to get rel ![finqa search tool arch](assets/finqa_tool.png) -## 2. Getting started - -### 2.1 Download repos - -```bash -mkdir /path/to/your/workspace/ -export WORKDIR=/path/to/your/workspace/ -cd $WORKDIR -git clone https://github.com/opea-project/GenAIExamples.git -``` - -### 2.2 Set up env vars - -```bash -export ip_address="External_Public_IP" -export no_proxy=${your_no_proxy},${ip_address} -export HF_CACHE_DIR=/path/to/your/model/cache/ -export HF_TOKEN= -export FINNHUB_API_KEY= # go to https://finnhub.io/ to get your free api key -export FINANCIAL_DATASETS_API_KEY= # go to https://docs.financialdatasets.ai/ to get your free api key -``` - -### 2.3 [Optional] Build docker images - -Only needed when docker pull failed. - -```bash -cd $WORKDIR/GenAIExamples/FinanceAgent/docker_image_build -# get GenAIComps repo -git clone https://github.com/opea-project/GenAIComps.git -# build the images -docker compose -f build.yaml build --no-cache -``` - -If deploy on Gaudi, also need to build vllm image. - -```bash -cd $WORKDIR -git clone https://github.com/HabanaAI/vllm-fork.git -# get the latest release tag of vllm gaudi -cd vllm-fork -VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)") -echo "Check out vLLM tag ${VLLM_VER}" -git checkout ${VLLM_VER} -docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -``` - -## 3. Deploy with docker compose - -### 3.1 Launch vllm endpoint - -Below is the command to launch a vllm endpoint on Gaudi that serves `meta-llama/Llama-3.3-70B-Instruct` model on 4 Gaudi cards. - -```bash -cd $WORKDIR/GenAIExamples/FinanceAgent/docker_compose/intel/hpu/gaudi -bash launch_vllm.sh -``` - -### 3.2 Prepare knowledge base - -The commands below will upload some example files into the knowledge base. You can also upload files through UI. - -First, launch the redis databases and the dataprep microservice. - -```bash -# inside $WORKDIR/GenAIExamples/FinanceAgent/docker_compose/intel/hpu/gaudi/ -bash launch_dataprep.sh -``` - -Validate datat ingest data and retrieval from database: - -```bash -python $WORKDIR/GenAIExamples/FinanceAgent/tests/test_redis_finance.py --port 6007 --test_option ingest -python $WORKDIR/GenAIExamples/FinanceAgent/tests/test_redis_finance.py --port 6007 --test_option get -``` - -### 3.3 Launch the multi-agent system - -The command below will launch 3 agent microservices, 1 docsum microservice, 1 UI microservice. - -```bash -# inside $WORKDIR/GenAIExamples/FinanceAgent/docker_compose/intel/hpu/gaudi/ -bash launch_agents.sh -``` - -### 3.4 Validate agents - -FinQA Agent: - -```bash -export agent_port="9095" -prompt="What is Gap's revenue in 2024?" -python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port -``` - -Research Agent: - -```bash -export agent_port="9096" -prompt="generate NVDA financial research report" -python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port --tool_choice "get_current_date" --tool_choice "get_share_performance" -``` - -Supervisor Agent single turns: - -```bash -export agent_port="9090" -python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --agent_role "supervisor" --ext_port $agent_port --stream -``` - -Supervisor Agent multi turn: - -```bash -python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --agent_role "supervisor" --ext_port $agent_port --multi-turn --stream - -``` - -## How to interact with the agent system with UI - -The UI microservice is launched in the previous step with the other microservices. -To see the UI, open a web browser to `http://${ip_address}:5175` to access the UI. Note the `ip_address` here is the host IP of the UI microservice. - -1. Create Admin Account with a random value - -2. Enter the endpoints in the `Connections` settings - - First, click on the user icon in the upper right corner to open `Settings`. Click on `Admin Settings`. Click on `Connections`. - - Then, enter the supervisor agent endpoint in the `OpenAI API` section: `http://${ip_address}:9090/v1`. Enter the API key as "empty". Add an arbitrary model id in `Model IDs`, for example, "opea_agent". The `ip_address` here should be the host ip of the agent microservice. - - Then, enter the dataprep endpoint in the `Icloud File API` section. You first need to enable `Icloud File API` by clicking on the button on the right to turn it into green and then enter the endpoint url, for example, `http://${ip_address}:6007/v1`. The `ip_address` here should be the host ip of the dataprep microservice. - - You should see screen like the screenshot below when the settings are done. - -![opea-agent-setting](assets/ui_connections_settings.png) - -3. Upload documents with UI - - Click on the `Workplace` icon in the top left corner. Click `Knowledge`. Click on the "+" sign to the right of `Icloud Knowledge`. You can paste an url in the left hand side of the pop-up window, or upload a local file by click on the cloud icon on the right hand side of the pop-up window. Then click on the `Upload Confirm` button. Wait till the processing is done and the pop-up window will be closed on its own when the data ingestion is done. See the screenshot below. - - Note: the data ingestion may take a few minutes depending on the length of the document. Please wait patiently and do not close the pop-up window. +## Deployment Options -![upload-doc-ui](assets/upload_doc_ui.png) +This Finance Agent example can be deployed manually on Docker Compose. -4. Test agent with UI +| Hardware | Deployment Mode | Guide Link | +| :----------------------------- | :------------------- | :----------------------------------------------------------------------- | +| Intel® Gaudi® AI Accelerator | Single Node (Docker) | [Gaudi Docker Compose Guide](./docker_compose/intel/hpu/gaudi/README.md) | - After the settings are done and documents are ingested, you can start to ask questions to the agent. Click on the `New Chat` icon in the top left corner, and type in your questions in the text box in the middle of the UI. +_Note: Building custom microservice images can be done using the resources in [GenAIComps](https://github.com/opea-project/GenAIComps)._ - The UI will stream the agent's response tokens. You need to expand the `Thinking` tab to see the agent's reasoning process. After the agent made tool calls, you would also see the tool output after the tool returns output to the agent. Note: it may take a while to get the tool output back if the tool execution takes time. +## Contribution -![opea-agent-test](assets/opea-agent-test.png) +We welcome contributions to the OPEA project. Please refer to the [contribution guidelines](https://github.com/opea-project/docs/blob/main/community/CONTRIBUTING.md) for more information. diff --git a/FinanceAgent/docker_compose/intel/hpu/gaudi/README.md b/FinanceAgent/docker_compose/intel/hpu/gaudi/README.md new file mode 100644 index 0000000000..79f0a9dec9 --- /dev/null +++ b/FinanceAgent/docker_compose/intel/hpu/gaudi/README.md @@ -0,0 +1,205 @@ +# Deploy Finance Agent on Intel® Gaudi® AI Accelerator with Docker Compose + +This README provides instructions for deploying the Finance Agent application using Docker Compose on systems equipped with Intel® Gaudi® AI Accelerators. + +## Table of Contents + +- [Overview](#overview) +- [Prerequisites](#prerequisites) +- [Start Deployment](#start-deployment) +- [Validate Services](#validate-services) +- [Accessing the User Interface (UI)](#accessing-the-user-interface-ui) + +## Overview + +This guide focuses on running the pre-configured Finance Agent service using Docker Compose on Intel® Gaudi® AI Accelerators. It leverages containers optimized for Gaudi for the LLM serving component, along with CPU-based containers for other microservices like embedding, retrieval, data preparation and the UI. + +## Prerequisites + +- Docker and Docker Compose installed. +- Intel® Gaudi® AI Accelerator(s) with the necessary drivers and software stack installed on the host system. (Refer to Intel Gaudi Documentation). +- Git installed (for cloning repository). +- Hugging Face Hub API Token (for downloading models). +- Access to the internet (or a private model cache). +- Finnhub API Key. Go to https://docs.financialdatasets.ai/ to get your free api key +- Financial Datgasets API Key. Go to https://docs.financialdatasets.ai/ to get your free api key + +Clone the GenAIExamples repository: + +```shell +mkdir /path/to/your/workspace/ +export WORKDIR=/path/to/your/workspace/ +cd $WORKDIR +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/FinanceAgent/docker_compose/intel/hpu/gaudi +``` + +## Start Deployment + +This uses the default vLLM-based deployment profile (vllm-gaudi-server). + +### Configure Environment + +Set required environment variables in your shell: + +```shell +# Path to your model cache +export HF_CACHE_DIR="./data" +# Some models from Hugging Face require approval beforehand. Ensure you have the necessary permissions to access them. +export HF_TOKEN="your_huggingface_token" +export FINNHUB_API_KEY="your-finnhub-api-key" +export FINANCIAL_DATASETS_API_KEY="your-financial-datgasets-api-key" + +# Optional: Configure HOST_IP if needed +# Replace with your host's external IP address (do not use localhost or 127.0.0.1). +# export HOST_IP=$(hostname -I | awk '{print $1}') + +# Optional: Configure proxy if needed +# export HTTP_PROXY="${http_proxy}" +# export HTTPS_PROXY="${https_proxy}" +# export NO_PROXY="${NO_PROXY},${HOST_IP}" + +source ../../set_env.sh +``` + +Note: The compose file might read additional variables from set_env.sh. Ensure all required variables like ports (LLM_SERVICE_PORT, TEI_EMBEDDER_PORT, etc.) are set if not using defaults from the compose file. For instance, edit the set_env.sh to change the LLM model: + +### Start Services + +#### Deploy with Docker Compose + +Below is the command to launch services + +- vllm-gaudi-server +- tei-embedding-serving +- redis-vector-db +- redis-kv-store +- dataprep-redis-server-finance +- finqa-agent-endpoint +- research-agent-endpoint +- docsum-vllm-gaudi +- supervisor-agent-endpoint +- agent-ui + +```shell +docker compose -f compose.yaml up -d +``` + +#### [Optional] Build docker images + +This is only needed if the Docker image is unavailable or the pull operation fails. + +```bash +cd $WORKDIR/GenAIExamples/FinanceAgent/docker_image_build +# get GenAIComps repo +git clone https://github.com/opea-project/GenAIComps.git +# build the images +docker compose -f build.yaml build --no-cache +``` + +If deploy on Gaudi, also need to build vllm image. + +```bash +cd $WORKDIR +git clone https://github.com/HabanaAI/vllm-fork.git +# get the latest release tag of vllm gaudi +cd vllm-fork +VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)") +echo "Check out vLLM tag ${VLLM_VER}" +git checkout ${VLLM_VER} +docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy +``` + +## Validate Services + +Wait several minutes for models to download and services to initialize (Gaudi initialization can take time). Check container logs (docker compose logs -f , especially vllm-gaudi-server). + +```bash +docker logs --tail 2000 -f vllm-gaudi-server +``` + +> Below is the expected output of the `vllm-gaudi-server` service. + +``` + INFO: Started server process [1] + INFO: Waiting for application startup. + INFO: Application startup complete. + INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit) + INFO: : - "GET /health HTTP/1.1" 200 OK + +``` + +### Validate Data Services + +Ingest data and retrieval from database + +```bash +python $WORKDIR/GenAIExamples/FinanceAgent/tests/test_redis_finance.py --port 6007 --test_option ingest +python $WORKDIR/GenAIExamples/FinanceAgent/tests/test_redis_finance.py --port 6007 --test_option get +``` + +### Validate Agents + +FinQA Agent: + +```bash +export agent_port="9095" +prompt="What is Gap's revenue in 2024?" +python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port +``` + +Research Agent: + +```bash +export agent_port="9096" +prompt="generate NVDA financial research report" +python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port --tool_choice "get_current_date" --tool_choice "get_share_performance" +``` + +Supervisor Agent single turns: + +```bash +export agent_port="9090" +python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --agent_role "supervisor" --ext_port $agent_port --stream +``` + +Supervisor Agent multi turn: + +```bash +python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --agent_role "supervisor" --ext_port $agent_port --multi-turn --stream +``` + +## Accessing the User Interface (UI) + +The UI microservice is launched in the previous step with the other microservices. +To see the UI, open a web browser to `http://${HOST_IP}:5175` to access the UI. Note the `HOST_IP` here is the host IP of the UI microservice. + +1. Create Admin Account with a random value + +2. Enter the endpoints in the `Connections` settings + + First, click on the user icon in the upper right corner to open `Settings`. Click on `Admin Settings`. Click on `Connections`. + + Then, enter the supervisor agent endpoint in the `OpenAI API` section: `http://${HOST_IP}:9090/v1`. Enter the API key as "empty". Add an arbitrary model id in `Model IDs`, for example, "opea_agent". The `HOST_IP` here should be the host ip of the agent microservice. + + Then, enter the dataprep endpoint in the `Icloud File API` section. You first need to enable `Icloud File API` by clicking on the button on the right to turn it into green and then enter the endpoint url, for example, `http://${HOST_IP}:6007/v1`. The `HOST_IP` here should be the host ip of the dataprep microservice. + + You should see screen like the screenshot below when the settings are done. + +![opea-agent-setting](../../../../assets/ui_connections_settings.png) + +3. Upload documents with UI + + Click on the `Workplace` icon in the top left corner. Click `Knowledge`. Click on the "+" sign to the right of `iCloud Knowledge`. You can paste an url in the left hand side of the pop-up window, or upload a local file by click on the cloud icon on the right hand side of the pop-up window. Then click on the `Upload Confirm` button. Wait till the processing is done and the pop-up window will be closed on its own when the data ingestion is done. See the screenshot below. + Then, enter the dataprep endpoint in the `iCloud File API` section. You first need to enable `iCloud File API` by clicking on the button on the right to turn it into green and then enter the endpoint url, for example, `http://${HOST_IP}:6007/v1`. The `HOST_IP` here should be the host ip of the dataprep microservice. + Note: the data ingestion may take a few minutes depending on the length of the document. Please wait patiently and do not close the pop-up window. + +![upload-doc-ui](../../../../assets/upload_doc_ui.png) + +4. Test agent with UI + + After the settings are done and documents are ingested, you can start to ask questions to the agent. Click on the `New Chat` icon in the top left corner, and type in your questions in the text box in the middle of the UI. + + The UI will stream the agent's response tokens. You need to expand the `Thinking` tab to see the agent's reasoning process. After the agent made tool calls, you would also see the tool output after the tool returns output to the agent. Note: it may take a while to get the tool output back if the tool execution takes time. + +![opea-agent-test](../../../../assets/opea-agent-test.png) diff --git a/FinanceAgent/docker_compose/intel/hpu/gaudi/compose.yaml b/FinanceAgent/docker_compose/intel/hpu/gaudi/compose.yaml index 997aade843..e788c5899a 100644 --- a/FinanceAgent/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/FinanceAgent/docker_compose/intel/hpu/gaudi/compose.yaml @@ -1,37 +1,146 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 + +x-common-environment: + &common-env + no_proxy: ${NO_PROXY} + http_proxy: ${HTTP_PROXY} + https_proxy: ${HTTPS_PROXY} + +x-common-agent-environment: + &common-agent-env + <<: *common-env + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + llm_endpoint_url: ${LLM_ENDPOINT} + model: ${LLM_MODEL_ID} + REDIS_URL_VECTOR: ${REDIS_URL_VECTOR} + REDIS_URL_KV: ${REDIS_URL_KV} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + ip_address: ${HOST_IP} + strategy: react_llama + require_human_feedback: false + services: + + vllm-service: + image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} + container_name: vllm-gaudi-server + ports: + - "8086:8000" + volumes: + - ${HF_CACHE_DIR:-./data}:/data + environment: + <<: *common-env + HF_TOKEN: ${HF_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} + HF_HOME: ./data + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + LLM_MODEL_ID: ${LLM_MODEL_ID} + VLLM_TORCH_PROFILER_DIR: "/mnt" + VLLM_SKIP_WARMUP: true + PT_HPU_ENABLE_LAZY_COLLECTIVES: true + healthcheck: + test: ["CMD-SHELL", "curl -f http://$HOST_IP:8086/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + runtime: habana + cap_add: + - SYS_NICE + ipc: host + command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 8000 --max-seq-len-to-capture $MAX_LEN + + tei-embedding-serving: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-serving + entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate" + ports: + - "${TEI_EMBEDDER_PORT:-10221}:80" + volumes: + - ${HF_CACHE_DIR:-./data}:/data + shm_size: 1g + environment: + <<: *common-env + HF_TOKEN: ${HF_TOKEN} + host_ip: ${HOST_IP} + healthcheck: + test: ["CMD", "curl", "-f", "http://${HOST_IP}:${TEI_EMBEDDER_PORT}/health"] + interval: 10s + timeout: 6s + retries: 48 + + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + ports: + - "${REDIS_PORT1:-6379}:6379" + - "${REDIS_PORT2:-8001}:8001" + environment: + <<: *common-env + healthcheck: + test: ["CMD", "redis-cli", "ping"] + timeout: 10s + retries: 3 + start_period: 10s + + redis-kv-store: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-kv-store + ports: + - "${REDIS_PORT3:-6380}:6379" + - "${REDIS_PORT4:-8002}:8001" + environment: + <<: *common-env + healthcheck: + test: ["CMD", "redis-cli", "ping"] + timeout: 10s + retries: 3 + start_period: 10s + + dataprep-redis-finance: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-redis-server-finance + depends_on: + redis-vector-db: + condition: service_healthy + redis-kv-store: + condition: service_healthy + tei-embedding-serving: + condition: service_healthy + ports: + - "${DATAPREP_PORT:-6007}:5000" + environment: + <<: *common-env + DATAPREP_COMPONENT_NAME: ${DATAPREP_COMPONENT_NAME} + REDIS_URL_VECTOR: ${REDIS_URL_VECTOR} + REDIS_URL_KV: ${REDIS_URL_KV} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL: ${LLM_MODEL_ID} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN} + LOGFLAG: true + worker-finqa-agent: image: opea/agent:latest container_name: finqa-agent-endpoint volumes: - ${TOOLSET_PATH}:/home/user/tools/ - ${PROMPT_PATH}:/home/user/prompts/ + ipc: host ports: - "9095:9095" - ipc: host environment: - ip_address: ${ip_address} - strategy: react_llama + <<: *common-agent-env with_memory: false - recursion_limit: ${recursion_limit_worker} - llm_engine: vllm - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - llm_endpoint_url: ${LLM_ENDPOINT_URL} - model: ${LLM_MODEL_ID} + recursion_limit: ${RECURSION_LIMIT_WORKER} temperature: ${TEMPERATURE} max_new_tokens: ${MAX_TOKENS} stream: false tools: /home/user/tools/finqa_agent_tools.yaml custom_prompt: /home/user/prompts/finqa_prompt.py - require_human_feedback: false - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - REDIS_URL_VECTOR: $REDIS_URL_VECTOR - REDIS_URL_KV: $REDIS_URL_KV - TEI_EMBEDDING_ENDPOINT: $TEI_EMBEDDING_ENDPOINT port: 9095 worker-research-agent: @@ -40,67 +149,20 @@ services: volumes: - ${TOOLSET_PATH}:/home/user/tools/ - ${PROMPT_PATH}:/home/user/prompts/ + ipc: host ports: - "9096:9096" - ipc: host environment: - ip_address: ${ip_address} - strategy: react_llama + <<: *common-agent-env with_memory: false - recursion_limit: 25 - llm_engine: vllm - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - llm_endpoint_url: ${LLM_ENDPOINT_URL} - model: ${LLM_MODEL_ID} + recursion_limit: ${RECURSION_LIMIT_WORKER} stream: false tools: /home/user/tools/research_agent_tools.yaml custom_prompt: /home/user/prompts/research_prompt.py - require_human_feedback: false - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} FINNHUB_API_KEY: ${FINNHUB_API_KEY} FINANCIAL_DATASETS_API_KEY: ${FINANCIAL_DATASETS_API_KEY} port: 9096 - supervisor-react-agent: - image: opea/agent:latest - container_name: supervisor-agent-endpoint - depends_on: - - worker-finqa-agent - - worker-research-agent - volumes: - - ${TOOLSET_PATH}:/home/user/tools/ - - ${PROMPT_PATH}:/home/user/prompts/ - ports: - - "9090:9090" - ipc: host - environment: - ip_address: ${ip_address} - strategy: react_llama - with_memory: true - recursion_limit: ${recursion_limit_supervisor} - llm_engine: vllm - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - llm_endpoint_url: ${LLM_ENDPOINT_URL} - model: ${LLM_MODEL_ID} - temperature: ${TEMPERATURE} - max_new_tokens: ${MAX_TOKENS} - stream: true - tools: /home/user/tools/supervisor_agent_tools.yaml - custom_prompt: /home/user/prompts/supervisor_prompt.py - require_human_feedback: false - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - WORKER_FINQA_AGENT_URL: $WORKER_FINQA_AGENT_URL - WORKER_RESEARCH_AGENT_URL: $WORKER_RESEARCH_AGENT_URL - DOCSUM_ENDPOINT: $DOCSUM_ENDPOINT - REDIS_URL_VECTOR: $REDIS_URL_VECTOR - REDIS_URL_KV: $REDIS_URL_KV - TEI_EMBEDDING_ENDPOINT: $TEI_EMBEDDING_ENDPOINT - port: 9090 - docsum-vllm-gaudi: image: opea/llm-docsum:latest container_name: docsum-vllm-gaudi @@ -108,26 +170,48 @@ services: - ${DOCSUM_PORT:-9000}:9000 ipc: host environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} + <<: *common-env LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} HF_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG:-False} MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} - DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumvLLM} + DocSum_COMPONENT_NAME: ${DOCSUM_COMPONENT_NAME:-OpeaDocSumvLLM} restart: unless-stopped + supervisor-react-agent: + image: opea/agent:latest + container_name: supervisor-agent-endpoint + volumes: + - ${TOOLSET_PATH}:/home/user/tools/ + - ${PROMPT_PATH}:/home/user/prompts/ + ipc: host + depends_on: + - worker-finqa-agent + - worker-research-agent + ports: + - "9090:9090" + environment: + <<: *common-agent-env + with_memory: "true" + recursion_limit: ${RECURSION_LIMIT_SUPERVISOR} + temperature: ${TEMPERATURE} + max_new_tokens: ${MAX_TOKENS} + stream: "true" + tools: /home/user/tools/supervisor_agent_tools.yaml + custom_prompt: /home/user/prompts/supervisor_prompt.py + WORKER_FINQA_AGENT_URL: ${WORKER_FINQA_AGENT_URL} + WORKER_RESEARCH_AGENT_URL: ${WORKER_RESEARCH_AGENT_URL} + DOCSUM_ENDPOINT: ${DOCSUM_ENDPOINT} + port: 9090 + agent-ui: image: opea/agent-ui:latest container_name: agent-ui environment: - host_ip: ${host_ip} - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} + <<: *common-env + host_ip: ${HOST_IP} ports: - "5175:8080" ipc: host diff --git a/FinanceAgent/docker_compose/intel/hpu/gaudi/dataprep_compose.yaml b/FinanceAgent/docker_compose/intel/hpu/gaudi/dataprep_compose.yaml deleted file mode 100644 index 5e4333c7d2..0000000000 --- a/FinanceAgent/docker_compose/intel/hpu/gaudi/dataprep_compose.yaml +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (C) 2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - tei-embedding-serving: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - container_name: tei-embedding-serving - entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate" - ports: - - "${TEI_EMBEDDER_PORT:-10221}:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - host_ip: ${host_ip} - HF_TOKEN: ${HF_TOKEN} - healthcheck: - test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"] - interval: 10s - timeout: 6s - retries: 48 - - redis-vector-db: - image: redis/redis-stack:7.2.0-v9 - container_name: redis-vector-db - ports: - - "${REDIS_PORT1:-6379}:6379" - - "${REDIS_PORT2:-8001}:8001" - environment: - - no_proxy=${no_proxy} - - http_proxy=${http_proxy} - - https_proxy=${https_proxy} - healthcheck: - test: ["CMD", "redis-cli", "ping"] - timeout: 10s - retries: 3 - start_period: 10s - - redis-kv-store: - image: redis/redis-stack:7.2.0-v9 - container_name: redis-kv-store - ports: - - "${REDIS_PORT3:-6380}:6379" - - "${REDIS_PORT4:-8002}:8001" - environment: - - no_proxy=${no_proxy} - - http_proxy=${http_proxy} - - https_proxy=${https_proxy} - healthcheck: - test: ["CMD", "redis-cli", "ping"] - timeout: 10s - retries: 3 - start_period: 10s - - dataprep-redis-finance: - image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} - container_name: dataprep-redis-server-finance - depends_on: - redis-vector-db: - condition: service_healthy - redis-kv-store: - condition: service_healthy - tei-embedding-serving: - condition: service_healthy - ports: - - "${DATAPREP_PORT:-6007}:5000" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - DATAPREP_COMPONENT_NAME: ${DATAPREP_COMPONENT_NAME} - REDIS_URL_VECTOR: ${REDIS_URL_VECTOR} - REDIS_URL_KV: ${REDIS_URL_KV} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - LLM_ENDPOINT: ${LLM_ENDPOINT} - LLM_MODEL: ${LLM_MODEL} - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - HF_TOKEN: ${HF_TOKEN} - LOGFLAG: true diff --git a/FinanceAgent/docker_compose/intel/hpu/gaudi/launch_agents.sh b/FinanceAgent/docker_compose/intel/hpu/gaudi/launch_agents.sh deleted file mode 100644 index 55dcbb7d3d..0000000000 --- a/FinanceAgent/docker_compose/intel/hpu/gaudi/launch_agents.sh +++ /dev/null @@ -1,36 +0,0 @@ - -# Copyright (C) 2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -export ip_address=$(hostname -I | awk '{print $1}') -export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} -export TOOLSET_PATH=$WORKDIR/GenAIExamples/FinanceAgent/tools/ -echo "TOOLSET_PATH=${TOOLSET_PATH}" -export PROMPT_PATH=$WORKDIR/GenAIExamples/FinanceAgent/prompts/ -echo "PROMPT_PATH=${PROMPT_PATH}" -export recursion_limit_worker=12 -export recursion_limit_supervisor=10 - -vllm_port=8086 -export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" -export LLM_ENDPOINT_URL="http://${ip_address}:${vllm_port}" -export TEMPERATURE=0.5 -export MAX_TOKENS=4096 - -export WORKER_FINQA_AGENT_URL="http://${ip_address}:9095/v1/chat/completions" -export WORKER_RESEARCH_AGENT_URL="http://${ip_address}:9096/v1/chat/completions" - -export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" -export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:10221" -export REDIS_URL_VECTOR="redis://${ip_address}:6379" -export REDIS_URL_KV="redis://${ip_address}:6380" - -export MAX_INPUT_TOKENS=2048 -export MAX_TOTAL_TOKENS=4096 -export DocSum_COMPONENT_NAME="OpeaDocSumvLLM" -export DOCSUM_ENDPOINT="http://${ip_address}:9000/v1/docsum" - -export FINNHUB_API_KEY=${FINNHUB_API_KEY} -export FINANCIAL_DATASETS_API_KEY=${FINANCIAL_DATASETS_API_KEY} - -docker compose -f compose.yaml up -d diff --git a/FinanceAgent/docker_compose/intel/hpu/gaudi/launch_dataprep.sh b/FinanceAgent/docker_compose/intel/hpu/gaudi/launch_dataprep.sh deleted file mode 100644 index 9bb006c191..0000000000 --- a/FinanceAgent/docker_compose/intel/hpu/gaudi/launch_dataprep.sh +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (C) 2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -export host_ip=${ip_address} -export DATAPREP_PORT="6007" -export TEI_EMBEDDER_PORT="10221" -export REDIS_URL_VECTOR="redis://${ip_address}:6379" -export REDIS_URL_KV="redis://${ip_address}:6380" -export LLM_MODEL=$model -export LLM_ENDPOINT="http://${ip_address}:${vllm_port}" -export DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_REDIS_FINANCE" -export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" -export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}" - -docker compose -f dataprep_compose.yaml up -d diff --git a/FinanceAgent/docker_compose/intel/hpu/gaudi/launch_vllm.sh b/FinanceAgent/docker_compose/intel/hpu/gaudi/launch_vllm.sh deleted file mode 100644 index 5d8d58641b..0000000000 --- a/FinanceAgent/docker_compose/intel/hpu/gaudi/launch_vllm.sh +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (C) 2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" -export MAX_LEN=16384 - -docker compose -f vllm_compose.yaml up -d diff --git a/FinanceAgent/docker_compose/intel/hpu/gaudi/vllm_compose.yaml b/FinanceAgent/docker_compose/intel/hpu/gaudi/vllm_compose.yaml deleted file mode 100644 index 8ca62e1e46..0000000000 --- a/FinanceAgent/docker_compose/intel/hpu/gaudi/vllm_compose.yaml +++ /dev/null @@ -1,35 +0,0 @@ - -# Copyright (C) 2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - vllm-service: - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} - container_name: vllm-gaudi-server - ports: - - "8086:8000" - volumes: - - ${HF_CACHE_DIR}:/data - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HF_TOKEN} - HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} - HF_HOME: /data - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - LLM_MODEL_ID: ${LLM_MODEL_ID} - VLLM_TORCH_PROFILER_DIR: "/mnt" - VLLM_SKIP_WARMUP: true - PT_HPU_ENABLE_LAZY_COLLECTIVES: true - healthcheck: - test: ["CMD-SHELL", "curl -f http://$host_ip:8086/health || exit 1"] - interval: 10s - timeout: 10s - retries: 100 - runtime: habana - cap_add: - - SYS_NICE - ipc: host - command: --model $LLM_MODEL_ID --tensor-parallel-size 4 --host 0.0.0.0 --port 8000 --max-seq-len-to-capture $MAX_LEN diff --git a/FinanceAgent/docker_compose/intel/set_env.sh b/FinanceAgent/docker_compose/intel/set_env.sh new file mode 100644 index 0000000000..16893f3ab5 --- /dev/null +++ b/FinanceAgent/docker_compose/intel/set_env.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Navigate to the parent directory and source the environment +pushd "../../" > /dev/null +source .set_env.sh +popd > /dev/null + +# Function to check if a variable is set +check_var() { + local var_name="$1" + local var_value="${!var_name}" + if [ -z "${var_value}" ]; then + echo "Error: ${var_name} is not set. Please set ${var_name}." + return 1 # Return an error code but do not exit the script + fi +} + +# Check critical variables +check_var "HF_TOKEN" +check_var "HOST_IP" + +# VLLM configuration +export VLLM_PORT="${VLLM_PORT:-8086}" +export VLLM_VOLUME="${VLLM_VOLUME:-/data2/huggingface}" +export VLLM_IMAGE="${VLLM_IMAGE:-opea/vllm-gaudi:latest}" +export LLM_MODEL_ID="${LLM_MODEL_ID:-meta-llama/Llama-3.3-70B-Instruct}" +export LLM_ENDPOINT="http://${HOST_IP}:${VLLM_PORT}" +export MAX_LEN="${MAX_LEN:-16384}" +export NUM_CARDS="${NUM_CARDS:-4}" +export HF_CACHE_DIR="${HF_CACHE_DIR:-"./data"}" + +# Data preparation and embedding configuration +export DATAPREP_PORT="${DATAPREP_PORT:-6007}" +export TEI_EMBEDDER_PORT="${TEI_EMBEDDER_PORT:-10221}" +export REDIS_URL_VECTOR="redis://${HOST_IP}:6379" +export REDIS_URL_KV="redis://${HOST_IP}:6380" +export DATAPREP_COMPONENT_NAME="${DATAPREP_COMPONENT_NAME:-OPEA_DATAPREP_REDIS_FINANCE}" +export EMBEDDING_MODEL_ID="${EMBEDDING_MODEL_ID:-BAAI/bge-base-en-v1.5}" +export TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${TEI_EMBEDDER_PORT}" + +# Hugging Face API token +export HUGGINGFACEHUB_API_TOKEN="${HF_TOKEN}" + +# Recursion limits +export RECURSION_LIMIT_WORKER="${RECURSION_LIMIT_WORKER:-12}" +export RECURSION_LIMIT_SUPERVISOR="${RECURSION_LIMIT_SUPERVISOR:-10}" + +# LLM configuration +export TEMPERATURE="${TEMPERATURE:-0.5}" +export MAX_TOKENS="${MAX_TOKENS:-4096}" +export MAX_INPUT_TOKENS="${MAX_INPUT_TOKENS:-2048}" +export MAX_TOTAL_TOKENS="${MAX_TOTAL_TOKENS:-4096}" + +# Worker URLs +export WORKER_FINQA_AGENT_URL="http://${HOST_IP}:9095/v1/chat/completions" +export WORKER_RESEARCH_AGENT_URL="http://${HOST_IP}:9096/v1/chat/completions" + +# DocSum configuration +export DOCSUM_COMPONENT_NAME="${DOCSUM_COMPONENT_NAME:-"OpeaDocSumvLLM"}" +export DOCSUM_ENDPOINT="http://${HOST_IP}:9000/v1/docsum" + +# API keys +check_var "FINNHUB_API_KEY" +check_var "FINANCIAL_DATASETS_API_KEY" +export FINNHUB_API_KEY="${FINNHUB_API_KEY}" +export FINANCIAL_DATASETS_API_KEY="${FINANCIAL_DATASETS_API_KEY}" + + +# Toolset and prompt paths +if check_var "WORKDIR"; then + export TOOLSET_PATH=$WORKDIR/GenAIExamples/FinanceAgent/tools/ + export PROMPT_PATH=$WORKDIR/GenAIExamples/FinanceAgent/prompts/ + + echo "TOOLSET_PATH=${TOOLSET_PATH}" + echo "PROMPT_PATH=${PROMPT_PATH}" + + # Array of directories to check + REQUIRED_DIRS=("${TOOLSET_PATH}" "${PROMPT_PATH}") + + for dir in "${REQUIRED_DIRS[@]}"; do + if [ ! -d "${dir}" ]; then + echo "Error: Required directory does not exist: ${dir}" + exit 1 + fi + done +fi diff --git a/FinanceAgent/tests/test_compose_on_gaudi.sh b/FinanceAgent/tests/test_compose_on_gaudi.sh index 0f42813978..d534ffa122 100644 --- a/FinanceAgent/tests/test_compose_on_gaudi.sh +++ b/FinanceAgent/tests/test_compose_on_gaudi.sh @@ -6,33 +6,69 @@ set -xe export WORKPATH=$(dirname "$PWD") export WORKDIR=$WORKPATH/../../ echo "WORKDIR=${WORKDIR}" -export ip_address=$(hostname -I | awk '{print $1}') +export IP_ADDRESS=$(hostname -I | awk '{print $1}') +export HOST_IP=${IP_ADDRESS} LOG_PATH=$WORKPATH -#### env vars for LLM endpoint ############# -model=meta-llama/Llama-3.3-70B-Instruct -vllm_image=opea/vllm-gaudi:latest -vllm_port=8086 -vllm_image=$vllm_image -HF_CACHE_DIR=${model_cache:-"/data2/huggingface"} -vllm_volume=${HF_CACHE_DIR} -####################################### +# Proxy settings +export NO_PROXY="${NO_PROXY},${HOST_IP}" +export HTTP_PROXY="${http_proxy}" +export HTTPS_PROXY="${https_proxy}" + +export no_proxy="${no_proxy},${HOST_IP}" +export http_proxy="${http_proxy}" +export https_proxy="${https_proxy}" + +# VLLM configuration +MODEL=meta-llama/Llama-3.3-70B-Instruct +export VLLM_PORT="${VLLM_PORT:-8086}" + +# export HF_CACHE_DIR="${HF_CACHE_DIR:-"./data"}" +export HF_CACHE_DIR=${model_cache:-"./data2/huggingface"} +export VLLM_VOLUME="${HF_CACHE_DIR:-"./data2/huggingface"}" +export VLLM_IMAGE="${VLLM_IMAGE:-opea/vllm-gaudi:latest}" +export LLM_MODEL_ID="${LLM_MODEL_ID:-meta-llama/Llama-3.3-70B-Instruct}" +export LLM_MODEL=$LLM_MODEL_ID +export LLM_ENDPOINT="http://${IP_ADDRESS}:${VLLM_PORT}" +export MAX_LEN="${MAX_LEN:-16384}" +export NUM_CARDS="${NUM_CARDS:-4}" + +# Recursion limits +export RECURSION_LIMIT_WORKER="${RECURSION_LIMIT_WORKER:-12}" +export RECURSION_LIMIT_SUPERVISOR="${RECURSION_LIMIT_SUPERVISOR:-10}" + +# Hugging Face API token +export HUGGINGFACEHUB_API_TOKEN="${HF_TOKEN}" + +# LLM configuration +export TEMPERATURE="${TEMPERATURE:-0.5}" +export MAX_TOKENS="${MAX_TOKENS:-4096}" +export MAX_INPUT_TOKENS="${MAX_INPUT_TOKENS:-2048}" +export MAX_TOTAL_TOKENS="${MAX_TOTAL_TOKENS:-4096}" + +# Worker URLs +export WORKER_FINQA_AGENT_URL="http://${IP_ADDRESS}:9095/v1/chat/completions" +export WORKER_RESEARCH_AGENT_URL="http://${IP_ADDRESS}:9096/v1/chat/completions" + +# DocSum configuration +export DOCSUM_COMPONENT_NAME="${DOCSUM_COMPONENT_NAME:-"OpeaDocSumvLLM"}" +export DOCSUM_ENDPOINT="http://${IP_ADDRESS}:9000/v1/docsum" + +# Toolset and prompt paths +export TOOLSET_PATH=$WORKDIR/GenAIExamples/FinanceAgent/tools/ +export PROMPT_PATH=$WORKDIR/GenAIExamples/FinanceAgent/prompts/ #### env vars for dataprep ############# -export host_ip=${ip_address} export DATAPREP_PORT="6007" export TEI_EMBEDDER_PORT="10221" -export REDIS_URL_VECTOR="redis://${ip_address}:6379" -export REDIS_URL_KV="redis://${ip_address}:6380" -export LLM_MODEL=$model -export LLM_ENDPOINT="http://${ip_address}:${vllm_port}" +export REDIS_URL_VECTOR="redis://${IP_ADDRESS}:6379" +export REDIS_URL_KV="redis://${IP_ADDRESS}:6380" + export DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_REDIS_FINANCE" export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" -export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}" +export TEI_EMBEDDING_ENDPOINT="http://${IP_ADDRESS}:${TEI_EMBEDDER_PORT}" ####################################### - - function get_genai_comps() { if [ ! -d "GenAIComps" ] ; then git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git @@ -48,7 +84,7 @@ function build_dataprep_agent_images() { function build_agent_image_local(){ cd $WORKDIR/GenAIComps/ - docker build -t opea/agent:latest -f comps/agent/src/Dockerfile . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy + docker build -t opea/agent:latest -f comps/agent/src/Dockerfile . --build-arg https_proxy=$HTTPS_PROXY --build-arg http_proxy=$HTTP_PROXY } function build_vllm_docker_image() { @@ -62,24 +98,25 @@ function build_vllm_docker_image() { VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0 git checkout ${VLLM_FORK_VER} &> /dev/null - docker build --no-cache -f Dockerfile.hpu -t $vllm_image --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy + docker build --no-cache -f Dockerfile.hpu -t $VLLM_IMAGE --shm-size=128g . --build-arg https_proxy=$HTTPS_PROXY --build-arg http_proxy=$HTTP_PROXY if [ $? -ne 0 ]; then - echo "$vllm_image failed" + echo "$VLLM_IMAGE failed" exit 1 else - echo "$vllm_image successful" + echo "$VLLM_IMAGE successful" fi } +function stop_llm(){ + cid=$(docker ps -aq --filter "name=vllm-gaudi-server") + echo "Stopping container $cid" + if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi + +} + +function start_all_services(){ + docker compose -f $WORKPATH/docker_compose/intel/hpu/gaudi/compose.yaml up -d -function start_vllm_service_70B() { - echo "token is ${HF_TOKEN}" - echo "start vllm gaudi service" - echo "**************model is $model**************" - docker run -d --runtime=habana --rm --name "vllm-gaudi-server" -e HABANA_VISIBLE_DEVICES=all -p $vllm_port:8000 -v $vllm_volume:/data -e HF_TOKEN=$HF_TOKEN -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_HOME=/data -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e VLLM_SKIP_WARMUP=true --cap-add=sys_nice --ipc=host $vllm_image --model ${model} --max-seq-len-to-capture 16384 --tensor-parallel-size 4 - sleep 10s - echo "Waiting vllm gaudi ready" - n=0 until [[ "$n" -ge 200 ]] || [[ $ready == true ]]; do docker logs vllm-gaudi-server &> ${LOG_PATH}/vllm-gaudi-service.log n=$((n+1)) @@ -96,19 +133,6 @@ function start_vllm_service_70B() { echo "Service started successfully" } - -function stop_llm(){ - cid=$(docker ps -aq --filter "name=vllm-gaudi-server") - echo "Stopping container $cid" - if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi - -} - -function start_dataprep(){ - docker compose -f $WORKPATH/docker_compose/intel/hpu/gaudi/dataprep_compose.yaml up -d - sleep 1m -} - function validate() { local CONTENT="$1" local EXPECTED_RESULT="$2" @@ -155,16 +179,8 @@ function stop_dataprep() { } -function start_agents() { - echo "Starting Agent services" - cd $WORKDIR/GenAIExamples/FinanceAgent/docker_compose/intel/hpu/gaudi/ - bash launch_agents.sh - sleep 2m -} - - function validate_agent_service() { - # # test worker finqa agent + # test worker finqa agent echo "======================Testing worker finqa agent======================" export agent_port="9095" prompt="What is Gap's revenue in 2024?" @@ -178,7 +194,7 @@ function validate_agent_service() { exit 1 fi - # # test worker research agent + # test worker research agent echo "======================Testing worker research agent======================" export agent_port="9096" prompt="Johnson & Johnson" @@ -215,7 +231,6 @@ function validate_agent_service() { docker logs supervisor-agent-endpoint exit 1 fi - } function stop_agent_docker() { @@ -228,7 +243,6 @@ function stop_agent_docker() { done } - echo "workpath: $WORKPATH" echo "=================== Stop containers ====================" stop_llm @@ -238,24 +252,22 @@ stop_dataprep cd $WORKPATH/tests echo "=================== #1 Building docker images====================" -build_vllm_docker_image +# build_vllm_docker_image build_dataprep_agent_images -#### for local test -# build_agent_image_local -# echo "=================== #1 Building docker images completed====================" +# ## for local test +# # build_agent_image_local +echo "=================== #1 Building docker images completed====================" -echo "=================== #2 Start vllm endpoint====================" -start_vllm_service_70B -echo "=================== #2 vllm endpoint started====================" +echo "=================== #2 Start services ====================" +start_all_services +echo "=================== #2 Endpoints for services started====================" -echo "=================== #3 Start dataprep and ingest data ====================" -start_dataprep +echo "=================== #3 Validate ingest_validate_dataprep ====================" ingest_validate_dataprep echo "=================== #3 Data ingestion and validation completed====================" echo "=================== #4 Start agents ====================" -start_agents validate_agent_service echo "=================== #4 Agent test passed ====================" From a0f7ea043dbbcb015ebcd214ea85841af4a19421 Mon Sep 17 00:00:00 2001 From: Yi Yao Date: Tue, 3 Jun 2025 14:59:06 +0800 Subject: [PATCH 14/44] Add code owners. (#2022) Signed-off-by: Yi Yao Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- .github/CODEOWNERS | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index e57bd74544..5033ca6483 100755 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -2,17 +2,17 @@ * liang1.lv@intel.com feng.tian@intel.com suyue.chen@intel.com kaokao.lv@intel.com minmin.hou@intel.com rita.brugarolas.brufau@intel.com /.github/ suyue.chen@intel.com ze.pan@intel.com -/AgentQnA/ abolfazl.shahbazi@intel.com kaokao.lv@intel.com minmin.hou@intel.com +/AgentQnA/ abolfazl.shahbazi@intel.com kaokao.lv@intel.com minmin.hou@intel.com xinyu.ye@intel.com /AudioQnA/ sihan.chen@intel.com wenjiao.yue@intel.com -/AvatarChatbot/ chun.tao@intel.com kaokao.lv@intel.com +/AvatarChatbot/ chun.tao@intel.com kaokao.lv@intel.com xinyu.ye@intel.com /ChatQnA/ liang1.lv@intel.com letong.han@intel.com -/CodeGen/ liang1.lv@intel.com -/CodeTrans/ sihan.chen@intel.com -/DBQnA/ supriya.krishnamurthi@intel.com liang1.lv@intel.com -/DocIndexRetriever/ abolfazl.shahbazi@intel.com kaokao.lv@intel.com chendi.xue@intel.com +/CodeGen/ liang1.lv@intel.com qing.yao@intel.com +/CodeTrans/ sihan.chen@intel.com letong.han@intel.com +/DBQnA/ supriya.krishnamurthi@intel.com liang1.lv@intel.com ze.pan@intel.com +/DocIndexRetriever/ abolfazl.shahbazi@intel.com kaokao.lv@intel.com chendi.xue@intel.com ze.pan@intel.com /DocSum/ letong.han@intel.com /EdgeCraftRAG/ yongbo.zhu@intel.com mingyuan.qi@intel.com -/FinanceAgent/ abolfazl.shahbazi@intel.com kaokao.lv@intel.com minmin.hou@intel.com rita.brugarolas.brufau@intel.com +/FinanceAgent/ abolfazl.shahbazi@intel.com kaokao.lv@intel.com minmin.hou@intel.com rita.brugarolas.brufau@intel.com xinyu.ye@intel.com /GraphRAG/ rita.brugarolas.brufau@intel.com abolfazl.shahbazi@intel.com /InstructionTuning/ xinyu.ye@intel.com kaokao.lv@intel.com /MultimodalQnA/ melanie.h.buehler@intel.com tiep.le@intel.com @@ -20,7 +20,7 @@ /RerankFinetuning/ xinyu.ye@intel.com kaokao.lv@intel.com /SearchQnA/ sihan.chen@intel.com letong.han@intel.com /Text2Image/ wenjiao.yue@intel.com xinyu.ye@intel.com -/Translation/ liang1.lv@intel.com sihan.chen@intel.com +/Translation/ liang1.lv@intel.com sihan.chen@intel.com qing.yao@intel.com /VideoQnA/ huiling.bao@intel.com -/VisualQnA/ liang1.lv@intel.com sihan.chen@intel.com -/WorkflowExecAgent/ joshua.jian.ern.liew@intel.com kaokao.lv@intel.com \ No newline at end of file +/VisualQnA/ liang1.lv@intel.com sihan.chen@intel.com wenjiao.yue@intel.com +/WorkflowExecAgent/ joshua.jian.ern.liew@intel.com kaokao.lv@intel.com wenjiao.yue@intel.com \ No newline at end of file From 977659352a048e6ac18b1e90f516daf06b4536b7 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Tue, 3 Jun 2025 15:18:08 +0800 Subject: [PATCH 15/44] Fix MultimodalQnA UT issues (#2011) Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- MultimodalQnA/tests/test_compose_milvus_on_xeon.sh | 4 ++-- MultimodalQnA/tests/test_compose_on_xeon.sh | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/MultimodalQnA/tests/test_compose_milvus_on_xeon.sh b/MultimodalQnA/tests/test_compose_milvus_on_xeon.sh index c82e0a7c62..d46b38d06f 100644 --- a/MultimodalQnA/tests/test_compose_milvus_on_xeon.sh +++ b/MultimodalQnA/tests/test_compose_milvus_on_xeon.sh @@ -66,11 +66,11 @@ function build_docker_images() { function setup_env() { export COLLECTION_NAME="LangChainCollection" - export MILVUS_HOST=${host_ip} + export MILVUS_HOST=${ip_address} export MILVUS_RETRIEVER_PORT=7000 - export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf" cd $WORKPATH/docker_compose/intel source set_env.sh + export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf" } diff --git a/MultimodalQnA/tests/test_compose_on_xeon.sh b/MultimodalQnA/tests/test_compose_on_xeon.sh index 10f015aa7b..2c87016d16 100644 --- a/MultimodalQnA/tests/test_compose_on_xeon.sh +++ b/MultimodalQnA/tests/test_compose_on_xeon.sh @@ -66,8 +66,10 @@ function build_docker_images() { } function setup_env() { + export host_ip=${ip_address} cd $WORKPATH/docker_compose/intel source set_env.sh + export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf" } From 31cd99f11c7007a776c867046762fdf37752fa43 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Thu, 5 Jun 2025 14:36:22 +0800 Subject: [PATCH 16/44] update secrets token name for AgentQnA. (#2023) Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- .github/workflows/_gmc-e2e.yml | 2 +- .github/workflows/_helm-e2e.yml | 6 +++--- .github/workflows/_run-docker-compose.yml | 4 ++-- AgentQnA/README.md | 2 +- AgentQnA/docker_compose/amd/gpu/rocm/README.md | 2 +- AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml | 10 +++++----- AgentQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml | 10 +++++----- .../amd/gpu/rocm/launch_agent_service_tgi_rocm.sh | 7 ++++--- .../amd/gpu/rocm/launch_agent_service_vllm_rocm.sh | 7 ++++--- .../amd/gpu/rocm/stop_agent_service_tgi_rocm.sh | 2 +- .../amd/gpu/rocm/stop_agent_service_vllm_rocm.sh | 8 ++++---- AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh | 3 ++- AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml | 8 ++++---- .../grafana/dashboards/download_opea_dashboard.sh | 1 + AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh | 4 ++-- AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml | 2 +- AgentQnA/retrieval_tool/launch_retrieval_tool.sh | 3 ++- AgentQnA/retrieval_tool/run_ingest_data.sh | 1 + AgentQnA/tests/sql_agent_test/run_data_split.sh | 1 + AgentQnA/tests/step2_start_retrieval_tool_rocm_vllm.sh | 2 +- .../tests/step4_launch_and_validate_agent_gaudi.sh | 3 +-- AgentQnA/tests/test_compose_on_gaudi.sh | 2 +- AgentQnA/tests/test_compose_on_rocm.sh | 2 +- AgentQnA/tests/test_compose_vllm_on_rocm.sh | 2 +- ChatQnA/benchmark/accuracy_faqgen/launch_tgi.sh | 1 + ChatQnA/benchmark/accuracy_faqgen/run_acc.sh | 1 + .../xeon/grafana/dashboards/download_opea_dashboard.sh | 1 + .../grafana/dashboards/download_opea_dashboard.sh | 1 + ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh | 2 +- ChatQnA/entrypoint.sh | 1 + HybridRAG/tests/test_compose_on_gaudi.sh | 6 +++--- Text2Image/tests/test_compose_on_gaudi.sh | 1 + Text2Image/tests/test_compose_on_xeon.sh | 1 + 33 files changed, 61 insertions(+), 48 deletions(-) diff --git a/.github/workflows/_gmc-e2e.yml b/.github/workflows/_gmc-e2e.yml index ba50e8b955..debd59ea03 100644 --- a/.github/workflows/_gmc-e2e.yml +++ b/.github/workflows/_gmc-e2e.yml @@ -56,7 +56,7 @@ jobs: - name: Run tests id: run-test env: - HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }} GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} run: | diff --git a/.github/workflows/_helm-e2e.yml b/.github/workflows/_helm-e2e.yml index 61a91ce583..fa7ca06761 100644 --- a/.github/workflows/_helm-e2e.yml +++ b/.github/workflows/_helm-e2e.yml @@ -165,8 +165,8 @@ jobs: env: GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }} GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} - HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} - HFTOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} + HFTOKEN: ${{ secrets.HF_TOKEN }} value_file: ${{ matrix.value_file }} run: | set -xe @@ -190,7 +190,7 @@ jobs: --namespace $NAMESPACE \ $RELEASE_NAME \ oci://ghcr.io/opea-project/charts/${CHART_NAME} \ - --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} \ + --set global.HF_TOKEN=${HFTOKEN} \ --set global.modelUseHostPath=/data2/hf_model \ --set GOOGLE_API_KEY=${{ env.GOOGLE_API_KEY}} \ --set GOOGLE_CSE_ID=${{ env.GOOGLE_CSE_ID}} \ diff --git a/.github/workflows/_run-docker-compose.yml b/.github/workflows/_run-docker-compose.yml index 7af15e11be..4301de9ebc 100644 --- a/.github/workflows/_run-docker-compose.yml +++ b/.github/workflows/_run-docker-compose.yml @@ -170,8 +170,8 @@ jobs: - name: Run test shell: bash env: - HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} - HF_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HF_TOKEN }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }} GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} PINECONE_KEY: ${{ secrets.PINECONE_KEY }} diff --git a/AgentQnA/README.md b/AgentQnA/README.md index c78703d6fb..e7f01a3ee1 100644 --- a/AgentQnA/README.md +++ b/AgentQnA/README.md @@ -123,7 +123,7 @@ Set up a [HuggingFace](https://huggingface.co/) account and generate a [user acc Then set an environment variable with the token and another for a directory to download the models: ```bash -export HUGGINGFACEHUB_API_TOKEN= +export HF_TOKEN= export HF_CACHE_DIR= # to avoid redownloading models ``` diff --git a/AgentQnA/docker_compose/amd/gpu/rocm/README.md b/AgentQnA/docker_compose/amd/gpu/rocm/README.md index fe5253ed07..27854510e5 100644 --- a/AgentQnA/docker_compose/amd/gpu/rocm/README.md +++ b/AgentQnA/docker_compose/amd/gpu/rocm/README.md @@ -170,7 +170,7 @@ Use AMD GPU driver utilities to determine the correct `cardN` and `renderN` IDs ### Replace the string 'server_address' with your local server IP address export host_ip='server_address' ### Replace the string 'your_huggingfacehub_token' with your HuggingFacehub repository access token. -export HUGGINGFACEHUB_API_TOKEN='your_huggingfacehub_token' +export HF_TOKEN='your_huggingfacehub_token' ### Replace the string 'your_langchain_api_key' with your LANGCHAIN API KEY. export LANGCHAIN_API_KEY='your_langchain_api_key' export LANGCHAIN_TRACING_V2="" diff --git a/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml b/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml index 4eab372dec..722019c6e0 100644 --- a/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml +++ b/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml @@ -13,8 +13,8 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: "http://${ip_address}:${TGI_SERVICE_PORT}" - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} shm_size: 32g devices: - /dev/kfd:/dev/kfd @@ -42,7 +42,7 @@ services: with_memory: false recursion_limit: ${recursion_limit_worker} llm_engine: tgi - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} llm_endpoint_url: ${LLM_ENDPOINT_URL} model: ${LLM_MODEL_ID} temperature: ${temperature} @@ -76,7 +76,7 @@ services: use_hints: false recursion_limit: ${recursion_limit_worker} llm_engine: vllm - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} llm_endpoint_url: ${LLM_ENDPOINT_URL} model: ${LLM_MODEL_ID} temperature: ${temperature} @@ -104,7 +104,7 @@ services: with_memory: true recursion_limit: ${recursion_limit_supervisor} llm_engine: tgi - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} llm_endpoint_url: ${LLM_ENDPOINT_URL} model: ${LLM_MODEL_ID} temperature: ${temperature} diff --git a/AgentQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/AgentQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml index 4b37d15d10..28e48fc569 100644 --- a/AgentQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/AgentQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -10,8 +10,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 WILM_USE_TRITON_FLASH_ATTENTION: 0 @@ -46,7 +46,7 @@ services: with_memory: false recursion_limit: ${recursion_limit_worker} llm_engine: vllm - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} llm_endpoint_url: ${LLM_ENDPOINT_URL} model: ${LLM_MODEL_ID} temperature: ${temperature} @@ -80,7 +80,7 @@ services: use_hints: false recursion_limit: ${recursion_limit_worker} llm_engine: vllm - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} llm_endpoint_url: ${LLM_ENDPOINT_URL} model: ${LLM_MODEL_ID} temperature: ${temperature} @@ -108,7 +108,7 @@ services: with_memory: true recursion_limit: ${recursion_limit_supervisor} llm_engine: vllm - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} llm_endpoint_url: ${LLM_ENDPOINT_URL} model: ${LLM_MODEL_ID} temperature: ${temperature} diff --git a/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh index 209d9b372a..7774202208 100644 --- a/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh +++ b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh @@ -1,9 +1,10 @@ +#!/bin/bash # Copyright (C) 2024 Advanced Micro Devices, Inc. # SPDX-License-Identifier: Apache-2.0 # Before start script: # export host_ip="your_host_ip_or_host_name" -# export HUGGINGFACEHUB_API_TOKEN="your_huggingface_api_token" +# export HF_TOKEN="your_huggingface_api_token" # export LANGCHAIN_API_KEY="your_langchain_api_key" # export LANGCHAIN_TRACING_V2="" @@ -19,7 +20,7 @@ export CRAG_SERVER_PORT="18114" export WORKPATH=$(dirname "$PWD") export WORKDIR=${WORKPATH}/../../../ -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export HF_CACHE_DIR="./data" export MODEL_CACHE="./data" @@ -39,7 +40,7 @@ export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT} export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions" export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions" export HF_CACHE_DIR=${HF_CACHE_DIR} -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export no_proxy=${no_proxy} export http_proxy=${http_proxy} export https_proxy=${https_proxy} diff --git a/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_vllm_rocm.sh b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_vllm_rocm.sh index 2d28ea3c34..1e7a7289bd 100644 --- a/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_vllm_rocm.sh +++ b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_vllm_rocm.sh @@ -1,9 +1,10 @@ +#!/bin/bash # Copyright (C) 2024 Advanced Micro Devices, Inc. # SPDX-License-Identifier: Apache-2.0 # Before start script: # export host_ip="your_host_ip_or_host_name" -# export HUGGINGFACEHUB_API_TOKEN="your_huggingface_api_token" +# export HF_TOKEN="your_huggingface_api_token" # export LANGCHAIN_API_KEY="your_langchain_api_key" # export LANGCHAIN_TRACING_V2="" @@ -19,7 +20,7 @@ export CRAG_SERVER_PORT="18114" export WORKPATH=$(dirname "$PWD") export WORKDIR=${WORKPATH}/../../../ -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export VLLM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export HF_CACHE_DIR="./data" export MODEL_CACHE="./data" @@ -40,7 +41,7 @@ export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT} export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions" export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions" export HF_CACHE_DIR=${HF_CACHE_DIR} -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export no_proxy=${no_proxy} export http_proxy=${http_proxy} export https_proxy=${https_proxy} diff --git a/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_tgi_rocm.sh b/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_tgi_rocm.sh index ab5b133c18..58c6c303cb 100644 --- a/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_tgi_rocm.sh +++ b/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_tgi_rocm.sh @@ -5,7 +5,7 @@ WORKPATH=$(dirname "$PWD")/.. export ip_address=${host_ip} -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export HF_TOKEN=${HF_TOKEN} export AGENTQNA_TGI_IMAGE=ghcr.io/huggingface/text-generation-inference:2.4.1-rocm export AGENTQNA_TGI_SERVICE_PORT="19001" diff --git a/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_vllm_rocm.sh b/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_vllm_rocm.sh index d448864e08..fa9d6508e6 100644 --- a/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_vllm_rocm.sh +++ b/AgentQnA/docker_compose/amd/gpu/rocm/stop_agent_service_vllm_rocm.sh @@ -1,10 +1,11 @@ +#!/bin/bash # Copyright (C) 2024 Advanced Micro Devices, Inc. # SPDX-License-Identifier: Apache-2.0 # Before start script: # export host_ip="your_host_ip_or_host_name" -# export HUGGINGFACEHUB_API_TOKEN="your_huggingface_api_token" +# export HF_TOKEN="your_huggingface_api_token" # export LANGCHAIN_API_KEY="your_langchain_api_key" # export LANGCHAIN_TRACING_V2="" @@ -20,8 +21,7 @@ export CRAG_SERVER_PORT="18114" export WORKPATH=$(dirname "$PWD") export WORKDIR=${WORKPATH}/../../../ -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export VLLM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export HF_CACHE_DIR="./data" export MODEL_CACHE="./data" @@ -42,7 +42,7 @@ export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT} export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions" export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions" export HF_CACHE_DIR=${HF_CACHE_DIR} -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export no_proxy=${no_proxy} export http_proxy=${http_proxy} export https_proxy=${https_proxy} diff --git a/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh b/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh index ba452cf4bb..470cfb044e 100644 --- a/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ b/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -33,7 +34,7 @@ fi # retriever export host_ip=$(hostname -I | awk '{print $1}') export HF_CACHE_DIR=${HF_CACHE_DIR} -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export no_proxy=${no_proxy} export http_proxy=${http_proxy} export https_proxy=${https_proxy} diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml index a5240ad4b8..9945fa2b41 100644 --- a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -16,7 +16,7 @@ services: with_memory: false recursion_limit: ${recursion_limit_worker} llm_engine: vllm - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} llm_endpoint_url: ${LLM_ENDPOINT_URL} model: ${LLM_MODEL_ID} temperature: ${temperature} @@ -50,7 +50,7 @@ services: use_hints: false recursion_limit: ${recursion_limit_worker} llm_engine: vllm - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} llm_endpoint_url: ${LLM_ENDPOINT_URL} model: ${LLM_MODEL_ID} temperature: ${temperature} @@ -79,7 +79,7 @@ services: with_memory: true recursion_limit: ${recursion_limit_supervisor} llm_engine: vllm - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} llm_endpoint_url: ${LLM_ENDPOINT_URL} model: ${LLM_MODEL_ID} temperature: ${temperature} @@ -122,7 +122,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none LLM_MODEL_ID: ${LLM_MODEL_ID} diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh b/AgentQnA/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh index df4012d2d3..a37e6c39d8 100644 --- a/AgentQnA/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh +++ b/AgentQnA/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh index cc8139c960..fb66c459e3 100644 --- a/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -16,8 +17,7 @@ export ip_address=$(hostname -I | awk '{print $1}') # LLM related environment variables export HF_CACHE_DIR=${HF_CACHE_DIR} ls $HF_CACHE_DIR -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" export NUM_SHARDS=4 export LLM_ENDPOINT_URL="http://${ip_address}:8086" diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml b/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml index 856642456a..86324b8652 100644 --- a/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml +++ b/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml @@ -13,7 +13,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all diff --git a/AgentQnA/retrieval_tool/launch_retrieval_tool.sh b/AgentQnA/retrieval_tool/launch_retrieval_tool.sh index 40ff1ff62a..b5d1a76686 100644 --- a/AgentQnA/retrieval_tool/launch_retrieval_tool.sh +++ b/AgentQnA/retrieval_tool/launch_retrieval_tool.sh @@ -1,9 +1,10 @@ +#!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 host_ip=$(hostname -I | awk '{print $1}') export HF_CACHE_DIR=${HF_CACHE_DIR} -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export no_proxy=${no_proxy} export http_proxy=${http_proxy} export https_proxy=${https_proxy} diff --git a/AgentQnA/retrieval_tool/run_ingest_data.sh b/AgentQnA/retrieval_tool/run_ingest_data.sh index 8dcd5a22fc..00a3c4e785 100644 --- a/AgentQnA/retrieval_tool/run_ingest_data.sh +++ b/AgentQnA/retrieval_tool/run_ingest_data.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/AgentQnA/tests/sql_agent_test/run_data_split.sh b/AgentQnA/tests/sql_agent_test/run_data_split.sh index 2fc2dfcb0e..07f1d71474 100644 --- a/AgentQnA/tests/sql_agent_test/run_data_split.sh +++ b/AgentQnA/tests/sql_agent_test/run_data_split.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/AgentQnA/tests/step2_start_retrieval_tool_rocm_vllm.sh b/AgentQnA/tests/step2_start_retrieval_tool_rocm_vllm.sh index 1c039ad6a0..6a15e79c37 100644 --- a/AgentQnA/tests/step2_start_retrieval_tool_rocm_vllm.sh +++ b/AgentQnA/tests/step2_start_retrieval_tool_rocm_vllm.sh @@ -20,7 +20,7 @@ function start_retrieval_tool() { cd $WORKPATH/../DocIndexRetriever/docker_compose/intel/cpu/xeon host_ip=$(hostname -I | awk '{print $1}') export HF_CACHE_DIR=${HF_CACHE_DIR} - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} export no_proxy=${no_proxy} export http_proxy=${http_proxy} export https_proxy=${https_proxy} diff --git a/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh b/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh index 6e970ce2e8..b853094c50 100644 --- a/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh +++ b/AgentQnA/tests/step4_launch_and_validate_agent_gaudi.sh @@ -11,8 +11,7 @@ export ip_address=$(hostname -I | awk '{print $1}') export host_ip=$ip_address echo "ip_address=${ip_address}" export TOOLSET_PATH=$WORKPATH/tools/ -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} model="meta-llama/Llama-3.3-70B-Instruct" #"meta-llama/Meta-Llama-3.1-70B-Instruct" export HF_CACHE_DIR=${model_cache:-"/data2/huggingface"} diff --git a/AgentQnA/tests/test_compose_on_gaudi.sh b/AgentQnA/tests/test_compose_on_gaudi.sh index f6e7e95997..2920455e2b 100644 --- a/AgentQnA/tests/test_compose_on_gaudi.sh +++ b/AgentQnA/tests/test_compose_on_gaudi.sh @@ -7,7 +7,7 @@ WORKPATH=$(dirname "$PWD") export WORKDIR=$WORKPATH/../../ echo "WORKDIR=${WORKDIR}" export ip_address=$(hostname -I | awk '{print $1}') -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/ export no_proxy="$no_proxy,rag-agent-endpoint,sql-agent-endpoint,react-agent-endpoint,agent-ui,vllm-gaudi-server,jaeger,grafana,prometheus,127.0.0.1,localhost,0.0.0.0,$ip_address" IMAGE_REPO=${IMAGE_REPO:-"opea"} diff --git a/AgentQnA/tests/test_compose_on_rocm.sh b/AgentQnA/tests/test_compose_on_rocm.sh index 561ab0a967..1a466fff72 100644 --- a/AgentQnA/tests/test_compose_on_rocm.sh +++ b/AgentQnA/tests/test_compose_on_rocm.sh @@ -9,7 +9,7 @@ ls $WORKPATH export WORKDIR=$WORKPATH/../../ echo "WORKDIR=${WORKDIR}" export ip_address=$(hostname -I | awk '{print $1}') -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export TOOLSET_PATH=$WORKPATH/tools/ IMAGE_REPO=${IMAGE_REPO:-"opea"} IMAGE_TAG=${IMAGE_TAG:-"latest"} diff --git a/AgentQnA/tests/test_compose_vllm_on_rocm.sh b/AgentQnA/tests/test_compose_vllm_on_rocm.sh index 711554f965..01630ee243 100644 --- a/AgentQnA/tests/test_compose_vllm_on_rocm.sh +++ b/AgentQnA/tests/test_compose_vllm_on_rocm.sh @@ -8,7 +8,7 @@ WORKPATH=$(dirname "$PWD") export WORKDIR=${WORKPATH}/../../ echo "WORKDIR=${WORKDIR}" export ip_address=$(hostname -I | awk '{print $1}') -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export TOOLSET_PATH=$WORKPATH/tools/ IMAGE_REPO=${IMAGE_REPO:-"opea"} IMAGE_TAG=${IMAGE_TAG:-"latest"} diff --git a/ChatQnA/benchmark/accuracy_faqgen/launch_tgi.sh b/ChatQnA/benchmark/accuracy_faqgen/launch_tgi.sh index 72082d1419..0af4431184 100644 --- a/ChatQnA/benchmark/accuracy_faqgen/launch_tgi.sh +++ b/ChatQnA/benchmark/accuracy_faqgen/launch_tgi.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/ChatQnA/benchmark/accuracy_faqgen/run_acc.sh b/ChatQnA/benchmark/accuracy_faqgen/run_acc.sh index 766b718ff8..d31cc74080 100644 --- a/ChatQnA/benchmark/accuracy_faqgen/run_acc.sh +++ b/ChatQnA/benchmark/accuracy_faqgen/run_acc.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh b/ChatQnA/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh index c3739ec705..b98476de2a 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh +++ b/ChatQnA/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 if ls *.json 1> /dev/null 2>&1; then diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh b/ChatQnA/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh index 03e0d1f3c2..598ed1427a 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 rm *.json diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh index fe847e6036..1d0409eccd 100755 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh @@ -1,4 +1,4 @@ -#/usr/bin/env bash +#!/usr/bin/env bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/ChatQnA/entrypoint.sh b/ChatQnA/entrypoint.sh index ae395cde0e..c45ae5f94b 100644 --- a/ChatQnA/entrypoint.sh +++ b/ChatQnA/entrypoint.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/HybridRAG/tests/test_compose_on_gaudi.sh b/HybridRAG/tests/test_compose_on_gaudi.sh index f8c2ccf203..9c512afc9a 100755 --- a/HybridRAG/tests/test_compose_on_gaudi.sh +++ b/HybridRAG/tests/test_compose_on_gaudi.sh @@ -99,7 +99,7 @@ function validate_service() { local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "DOCKER_NAME" -eq "text2cypher-gaudi-container" ]; then + if [ "$DOCKER_NAME" == "text2cypher-gaudi-container" ]; then docker ps docker logs text2cypher-gaudi-container fi @@ -114,7 +114,7 @@ function validate_service() { else echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - if [ "DOCKER_NAME" -eq "hybridrag-xeon-backend-server" ]; then + if [ "$DOCKER_NAME" == "hybridrag-xeon-backend-server" ]; then docker ps docker logs text2cypher-gaudi-container fi @@ -123,7 +123,7 @@ function validate_service() { else echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - if [ "DOCKER_NAME" -eq "hybridrag-xeon-backend-server" ]; then + if [ "$DOCKER_NAME" == "hybridrag-xeon-backend-server" ]; then docker ps docker logs text2cypher-gaudi-container fi diff --git a/Text2Image/tests/test_compose_on_gaudi.sh b/Text2Image/tests/test_compose_on_gaudi.sh index 2e4e70c84d..811782cd5b 100644 --- a/Text2Image/tests/test_compose_on_gaudi.sh +++ b/Text2Image/tests/test_compose_on_gaudi.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/Text2Image/tests/test_compose_on_xeon.sh b/Text2Image/tests/test_compose_on_xeon.sh index ac2f344482..650c5d47ed 100644 --- a/Text2Image/tests/test_compose_on_xeon.sh +++ b/Text2Image/tests/test_compose_on_xeon.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 From 9b089ddf8c05e98fb44c955f8173d4bc4f686668 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Thu, 5 Jun 2025 14:58:43 +0800 Subject: [PATCH 17/44] update secrets token name for AudioQnA. (#2024) Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- AudioQnA/benchmark/accuracy/run_acc.sh | 2 +- AudioQnA/docker_compose/amd/gpu/rocm/README.md | 4 ++-- AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml | 2 +- AudioQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml | 4 ++-- AudioQnA/docker_compose/amd/gpu/rocm/set_env.sh | 5 ++--- AudioQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh | 4 ++-- AudioQnA/docker_compose/intel/cpu/xeon/README.md | 2 +- AudioQnA/docker_compose/intel/cpu/xeon/README_vllm.md | 2 +- AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml | 2 +- .../docker_compose/intel/cpu/xeon/compose_multilang.yaml | 2 +- AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml | 2 +- AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh | 2 +- AudioQnA/docker_compose/intel/hpu/gaudi/README.md | 2 +- AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml | 2 +- AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml | 2 +- AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh | 2 +- AudioQnA/tests/README.md | 2 +- README-deploy-benchmark.md | 2 +- deploy.py | 4 ++-- deploy_and_benchmark.py | 2 +- 20 files changed, 25 insertions(+), 26 deletions(-) diff --git a/AudioQnA/benchmark/accuracy/run_acc.sh b/AudioQnA/benchmark/accuracy/run_acc.sh index af7fab3c41..e56835e1be 100644 --- a/AudioQnA/benchmark/accuracy/run_acc.sh +++ b/AudioQnA/benchmark/accuracy/run_acc.sh @@ -1,4 +1,4 @@ - +#!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/AudioQnA/docker_compose/amd/gpu/rocm/README.md b/AudioQnA/docker_compose/amd/gpu/rocm/README.md index d26e52553c..651974b575 100644 --- a/AudioQnA/docker_compose/amd/gpu/rocm/README.md +++ b/AudioQnA/docker_compose/amd/gpu/rocm/README.md @@ -72,7 +72,7 @@ For TGI inference usage: ```bash export host_ip="External_Public_IP" # ip address of the node -export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token" +export HF_TOKEN="Your_HuggingFace_API_Token" export http_proxy="Your_HTTP_Proxy" # http proxy if any export https_proxy="Your_HTTPs_Proxy" # https proxy if any export no_proxy=localhost,127.0.0.1,$host_ip,whisper-service,speecht5-service,vllm-service,tgi-service,audioqna-xeon-backend-server,audioqna-xeon-ui-server # additional no proxies if needed @@ -84,7 +84,7 @@ For vLLM inference usage ```bash export host_ip="External_Public_IP" # ip address of the node -export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token" +export HF_TOKEN="Your_HuggingFace_API_Token" export http_proxy="Your_HTTP_Proxy" # http proxy if any export https_proxy="Your_HTTPs_Proxy" # https proxy if any export no_proxy=localhost,127.0.0.1,$host_ip,whisper-service,speecht5-service,vllm-service,tgi-service,audioqna-xeon-backend-server,audioqna-xeon-ui-server # additional no proxies if needed diff --git a/AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml b/AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml index 2465fec1f5..5ef2298b10 100644 --- a/AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml +++ b/AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml @@ -40,7 +40,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${LLM_MODEL_ID} diff --git a/AudioQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/AudioQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml index da7defe67d..cda5b2529d 100644 --- a/AudioQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/AudioQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -35,8 +35,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 WILM_USE_TRITON_FLASH_ATTENTION: 0 diff --git a/AudioQnA/docker_compose/amd/gpu/rocm/set_env.sh b/AudioQnA/docker_compose/amd/gpu/rocm/set_env.sh index d4a0bda6d1..b3106df68b 100644 --- a/AudioQnA/docker_compose/amd/gpu/rocm/set_env.sh +++ b/AudioQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -1,5 +1,4 @@ -#!/usr/bin/env bash set_env.sh - +#!/usr/bin/env bash # Copyright (C) 2024 Advanced Micro Devices, Inc. # SPDX-License-Identifier: Apache-2.0 @@ -7,7 +6,7 @@ # export host_ip= # export host_ip=$(hostname -I | awk '{print $1}') export host_ip=${ip_address} -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} # export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3 diff --git a/AudioQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/AudioQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh index 9cd8934f49..c6c59342a4 100644 --- a/AudioQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/AudioQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash set_env.sh +#!/usr/bin/env bash # Copyright (C) 2024 Advanced Micro Devices, Inc. # SPDX-License-Identifier: Apache-2.0 @@ -8,7 +8,7 @@ export host_ip=${ip_address} export external_host_ip=${ip_address} -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export HF_CACHE_DIR="./data" export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export VLLM_SERVICE_PORT="8081" diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/README.md b/AudioQnA/docker_compose/intel/cpu/xeon/README.md index 3994d34219..918e5c8a21 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/README.md +++ b/AudioQnA/docker_compose/intel/cpu/xeon/README.md @@ -43,7 +43,7 @@ To set up environment variables for deploying AudioQnA services, set up some par ```bash export host_ip="External_Public_IP" # ip address of the node -export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token" +export HF_TOKEN="Your_HuggingFace_API_Token" export http_proxy="Your_HTTP_Proxy" # http proxy if any export https_proxy="Your_HTTPs_Proxy" # https proxy if any export no_proxy=localhost,127.0.0.1,$host_ip,whisper-service,speecht5-service,vllm-service,tgi-service,audioqna-xeon-backend-server,audioqna-xeon-ui-server # additional no proxies if needed diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/README_vllm.md b/AudioQnA/docker_compose/intel/cpu/xeon/README_vllm.md index 8602259532..8b94343e32 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/README_vllm.md +++ b/AudioQnA/docker_compose/intel/cpu/xeon/README_vllm.md @@ -68,7 +68,7 @@ The output of the command should contain images: ```bash ### Replace the string 'your_huggingfacehub_token' with your HuggingFacehub repository access token. -export HUGGINGFACEHUB_API_TOKEN='your_huggingfacehub_token' +export HF_TOKEN='your_huggingfacehub_token' ``` ### Setting variables in the file set_env_vllm.sh diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml index 1fe5e6b2a6..1f3b5ec454 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -36,7 +36,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LLM_MODEL_ID: ${LLM_MODEL_ID} VLLM_TORCH_PROFILER_DIR: "/mnt" LLM_SERVER_PORT: ${LLM_SERVER_PORT} diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml index 4499da33bf..528ba48c68 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml +++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml @@ -40,7 +40,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LLM_MODEL_ID: ${LLM_MODEL_ID} VLLM_TORCH_PROFILER_DIR: "/mnt" LLM_SERVER_PORT: ${LLM_SERVER_PORT} diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml index d421f488fd..2dda4379ee 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml +++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml @@ -36,7 +36,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LLM_SERVER_PORT: ${LLM_SERVER_PORT} healthcheck: test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"] diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh b/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh index 4a63ef65b3..3fb001855d 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ b/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh @@ -5,7 +5,7 @@ # export host_ip= export host_ip=$(hostname -I | awk '{print $1}') -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} # export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md index dbec6d11bd..ae2ede434e 100644 --- a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md @@ -43,7 +43,7 @@ To set up environment variables for deploying AudioQnA services, set up some par ```bash export host_ip="External_Public_IP" # ip address of the node -export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token" +export HF_TOKEN="Your_HuggingFace_API_Token" export http_proxy="Your_HTTP_Proxy" # http proxy if any export https_proxy="Your_HTTPs_Proxy" # https proxy if any export no_proxy=localhost,127.0.0.1,$host_ip,whisper-service,speecht5-service,vllm-service,tgi-service,audioqna-gaudi-backend-server,audioqna-gaudi-ui-server # additional no proxies if needed diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml index c1e9db4505..dcd05ff160 100644 --- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -45,7 +45,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml index f14bd8cb99..c1d02da25a 100644 --- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml @@ -45,7 +45,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh index 179a8c2a24..4b19d19c08 100644 --- a/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh @@ -5,7 +5,7 @@ # export host_ip= export host_ip=$(hostname -I | awk '{print $1}') -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} # export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" diff --git a/AudioQnA/tests/README.md b/AudioQnA/tests/README.md index 390c182447..c55fa7b7b5 100644 --- a/AudioQnA/tests/README.md +++ b/AudioQnA/tests/README.md @@ -3,7 +3,7 @@ ## Set the required environment variable ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` ## Run test diff --git a/README-deploy-benchmark.md b/README-deploy-benchmark.md index 1b0f0ee530..cbdef4fb62 100644 --- a/README-deploy-benchmark.md +++ b/README-deploy-benchmark.md @@ -30,7 +30,7 @@ Before running the benchmarks, ensure you have: - (Optional) [Kubernetes set up guide on Intel Gaudi product](https://github.com/opea-project/GenAIInfra/blob/main/README.md#setup-kubernetes-cluster) 2. **Configuration YAML** - The configuration file (e.g., `./ChatQnA/benchmark_chatqna.yaml`) consists of two main sections: deployment and benchmarking. Required fields with `# mandatory` comment must be filled with valid values, such as `HUGGINGFACEHUB_API_TOKEN`. For all other fields, you can either customize them according to our needs or leave them empty ("") to use the default values from the [helm charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts). + The configuration file (e.g., `./ChatQnA/benchmark_chatqna.yaml`) consists of two main sections: deployment and benchmarking. Required fields with `# mandatory` comment must be filled with valid values, such as `HF_TOKEN`. For all other fields, you can either customize them according to our needs or leave them empty ("") to use the default values from the [helm charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts). **Default Models**: diff --git a/deploy.py b/deploy.py index 184b82e187..682af01c17 100644 --- a/deploy.py +++ b/deploy.py @@ -220,11 +220,11 @@ def generate_helm_values(example_type, deploy_config, chart_dir, action_type, no # Initialize base values values = { "global": { - "HUGGINGFACEHUB_API_TOKEN": deploy_config.get("HUGGINGFACEHUB_API_TOKEN", ""), + "HF_TOKEN": deploy_config.get("HF_TOKEN", ""), "modelUseHostPath": deploy_config.get("modelUseHostPath", ""), } } - os.environ["HF_TOKEN"] = deploy_config.get("HUGGINGFACEHUB_API_TOKEN", "") + os.environ["HF_TOKEN"] = deploy_config.get("HF_TOKEN", "") # Configure components values = configure_node_selectors(values, node_selector or {}, deploy_config) diff --git a/deploy_and_benchmark.py b/deploy_and_benchmark.py index 495a554525..e14be3c800 100644 --- a/deploy_and_benchmark.py +++ b/deploy_and_benchmark.py @@ -187,7 +187,7 @@ def main(yaml_file, target_node=None, test_mode="oob", clean_up=True): return # Set HF_TOKEN - HF_TOKEN = deploy_config.get("HUGGINGFACEHUB_API_TOKEN", "") + HF_TOKEN = deploy_config.get("HF_TOKEN", "") os.environ["HF_TOKEN"] = HF_TOKEN os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_TOKEN From 313f6716d5dfb26cca8e3e3521aefe1e7d2a8a55 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Thu, 5 Jun 2025 15:18:14 +0800 Subject: [PATCH 18/44] update secrets token name for AvatarChatbot and DBQnA. (#2030) Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- AvatarChatbot/docker_compose/amd/gpu/rocm/README.md | 4 ++-- AvatarChatbot/docker_compose/amd/gpu/rocm/compose.yaml | 4 ++-- AvatarChatbot/docker_compose/amd/gpu/rocm/set_env.sh | 2 +- AvatarChatbot/docker_compose/intel/cpu/xeon/README.md | 4 ++-- AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml | 2 +- AvatarChatbot/docker_compose/intel/cpu/xeon/set_env.sh | 2 +- AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md | 4 ++-- AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml | 2 +- AvatarChatbot/docker_compose/intel/hpu/gaudi/set_env.sh | 2 +- AvatarChatbot/tests/README.md | 2 +- DBQnA/docker_compose/amd/gpu/rocm/README.md | 2 +- DBQnA/docker_compose/amd/gpu/rocm/set_env.sh | 2 +- DBQnA/docker_compose/intel/cpu/xeon/README.md | 8 ++++---- DBQnA/docker_compose/intel/cpu/xeon/compose.yaml | 4 ++-- DBQnA/docker_compose/intel/cpu/xeon/set_env.sh | 4 ++-- DBQnA/tests/README.md | 2 +- 16 files changed, 25 insertions(+), 25 deletions(-) diff --git a/AvatarChatbot/docker_compose/amd/gpu/rocm/README.md b/AvatarChatbot/docker_compose/amd/gpu/rocm/README.md index a94924ab16..036810d53a 100644 --- a/AvatarChatbot/docker_compose/amd/gpu/rocm/README.md +++ b/AvatarChatbot/docker_compose/amd/gpu/rocm/README.md @@ -68,7 +68,7 @@ Then run the command `docker images`, you will have following images ready: Before starting the services with `docker compose`, you have to recheck the following environment variables. ```bash -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export host_ip=$(hostname -I | awk '{print $1}') export TGI_SERVICE_PORT=3006 @@ -203,7 +203,7 @@ In the current version v1.3, you need to set the avatar figure image/video and t cd GenAIExamples/AvatarChatbot/tests export IMAGE_REPO="opea" export IMAGE_TAG="latest" -export HUGGINGFACEHUB_API_TOKEN= +export HF_TOKEN= test_avatarchatbot_on_xeon.sh ``` diff --git a/AvatarChatbot/docker_compose/amd/gpu/rocm/compose.yaml b/AvatarChatbot/docker_compose/amd/gpu/rocm/compose.yaml index 884e1fcf79..651ff1a146 100644 --- a/AvatarChatbot/docker_compose/amd/gpu/rocm/compose.yaml +++ b/AvatarChatbot/docker_compose/amd/gpu/rocm/compose.yaml @@ -52,8 +52,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN} shm_size: 1g devices: - /dev/kfd:/dev/kfd diff --git a/AvatarChatbot/docker_compose/amd/gpu/rocm/set_env.sh b/AvatarChatbot/docker_compose/amd/gpu/rocm/set_env.sh index 38d54c38f7..02e35b241f 100644 --- a/AvatarChatbot/docker_compose/amd/gpu/rocm/set_env.sh +++ b/AvatarChatbot/docker_compose/amd/gpu/rocm/set_env.sh @@ -3,7 +3,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export OPENAI_API_KEY=${OPENAI_API_KEY} export host_ip=$(hostname -I | awk '{print $1}') diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md b/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md index b803392f80..e4c91f67cd 100644 --- a/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md +++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md @@ -58,7 +58,7 @@ Then run the command `docker images`, you will have following images ready: Before starting the services with `docker compose`, you have to recheck the following environment variables. ```bash -export HUGGINGFACEHUB_API_TOKEN= +export HF_TOKEN= export host_ip=$(hostname -I | awk '{print $1}') export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3 @@ -173,7 +173,7 @@ In the current version v1.3, you need to set the avatar figure image/video and t cd GenAIExamples/AvatarChatbot/tests export IMAGE_REPO="opea" export IMAGE_TAG="latest" -export HUGGINGFACEHUB_API_TOKEN= +export HF_TOKEN= test_avatarchatbot_on_xeon.sh ``` diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml index f33449d020..6d9491f6c7 100644 --- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml +++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml @@ -37,7 +37,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "curl -f http://${host_ip}:3006/health || exit 1"] interval: 10s diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/set_env.sh b/AvatarChatbot/docker_compose/intel/cpu/xeon/set_env.sh index 49c7e4cdd7..c65d6b9446 100644 --- a/AvatarChatbot/docker_compose/intel/cpu/xeon/set_env.sh +++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/set_env.sh @@ -6,7 +6,7 @@ pushd "../../../../../" > /dev/null source .set_env.sh popd > /dev/null -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export host_ip=$(hostname -I | awk '{print $1}') export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3 export WAV2LIP_ENDPOINT=http://$host_ip:7860 diff --git a/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md b/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md index 105987ec18..2dfc814437 100644 --- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md +++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md @@ -58,7 +58,7 @@ Then run the command `docker images`, you will have following images ready: Before starting the services with `docker compose`, you have to recheck the following environment variables. ```bash -export HUGGINGFACEHUB_API_TOKEN= +export HF_TOKEN= export host_ip=$(hostname -I | awk '{print $1}') export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3 @@ -183,7 +183,7 @@ In the current version v1.3, you need to set the avatar figure image/video and t cd GenAIExamples/AvatarChatbot/tests export IMAGE_REPO="opea" export IMAGE_TAG="latest" -export HUGGINGFACEHUB_API_TOKEN= +export HF_TOKEN= test_avatarchatbot_on_gaudi.sh ``` diff --git a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml index aba9bb910c..036736a574 100644 --- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml @@ -48,7 +48,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all diff --git a/AvatarChatbot/docker_compose/intel/hpu/gaudi/set_env.sh b/AvatarChatbot/docker_compose/intel/hpu/gaudi/set_env.sh index a55f4b4f58..a14f168d39 100644 --- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/set_env.sh @@ -6,7 +6,7 @@ pushd "../../../../../" > /dev/null source .set_env.sh popd > /dev/null -export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN +export HF_TOKEN=$HF_TOKEN export host_ip=$(hostname -I | awk '{print $1}') export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3 diff --git a/AvatarChatbot/tests/README.md b/AvatarChatbot/tests/README.md index 411afc28b7..bc50211ec0 100644 --- a/AvatarChatbot/tests/README.md +++ b/AvatarChatbot/tests/README.md @@ -3,7 +3,7 @@ ## Set the required environment variable ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` ## Run test diff --git a/DBQnA/docker_compose/amd/gpu/rocm/README.md b/DBQnA/docker_compose/amd/gpu/rocm/README.md index 014d5722c4..3e212f31e5 100644 --- a/DBQnA/docker_compose/amd/gpu/rocm/README.md +++ b/DBQnA/docker_compose/amd/gpu/rocm/README.md @@ -36,7 +36,7 @@ Then run the command `docker images`, you will have the following Docker Images: We set default model as "mistralai/Mistral-7B-Instruct-v0.3", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models. -If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable. +If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HF_TOKEN" environment variable. ### 2.1 Setup Environment Variables diff --git a/DBQnA/docker_compose/amd/gpu/rocm/set_env.sh b/DBQnA/docker_compose/amd/gpu/rocm/set_env.sh index f744dbcc0f..fb52787fb0 100644 --- a/DBQnA/docker_compose/amd/gpu/rocm/set_env.sh +++ b/DBQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -9,7 +9,7 @@ source .set_env.sh popd > /dev/null export host_ip=${ip_address} -export DBQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export DBQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} export DBQNA_TGI_SERVICE_PORT=8008 export DBQNA_TGI_LLM_ENDPOINT="http://${host_ip}:${DBQNA_TGI_SERVICE_PORT}" export DBQNA_LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" diff --git a/DBQnA/docker_compose/intel/cpu/xeon/README.md b/DBQnA/docker_compose/intel/cpu/xeon/README.md index c227e1fe46..a979a43da6 100644 --- a/DBQnA/docker_compose/intel/cpu/xeon/README.md +++ b/DBQnA/docker_compose/intel/cpu/xeon/README.md @@ -36,10 +36,10 @@ Then run the command `docker images`, you will have the following Docker Images: We set default model as "mistralai/Mistral-7B-Instruct-v0.3", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models. -If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable. +If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HF_TOKEN" environment variable. ```bash -export HUGGINGFACEHUB_API_TOKEN="xxx" +export HF_TOKEN="xxx" ``` ### 2.1 Setup Environment Variables @@ -61,7 +61,7 @@ export https_proxy=${https_proxy} export TGI_PORT=8008 export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_PORT} -export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" export POSTGRES_USER=postgres export POSTGRES_PASSWORD=testpwd @@ -109,7 +109,7 @@ docker run --name test-text2sql-postgres --ipc=host -e POSTGRES_USER=${POSTGRES_ ```bash -docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.4.1 --model-id $model +docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HF_TOKEN=${HF_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.4.1 --model-id $model ``` - Start Text-to-SQL Service diff --git a/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml b/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml index b96a71d01d..1e66ef992f 100644 --- a/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -13,8 +13,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} shm_size: 1g command: --model-id ${LLM_MODEL_ID} diff --git a/DBQnA/docker_compose/intel/cpu/xeon/set_env.sh b/DBQnA/docker_compose/intel/cpu/xeon/set_env.sh index f05e9c871c..3990c7b114 100755 --- a/DBQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ b/DBQnA/docker_compose/intel/cpu/xeon/set_env.sh @@ -9,8 +9,8 @@ popd > /dev/null export host_ip=${ip_address} export no_proxy=$no_proxy,$host_ip,dbqna-xeon-react-ui-server,text2sql-service,tgi-service,postgres-container -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} +export HF_TOKEN=${HF_TOKEN} export POSTGRES_USER=postgres export POSTGRES_PASSWORD=testpwd export POSTGRES_DB=chinook diff --git a/DBQnA/tests/README.md b/DBQnA/tests/README.md index 5d6dc16a10..951d7da1c7 100644 --- a/DBQnA/tests/README.md +++ b/DBQnA/tests/README.md @@ -3,7 +3,7 @@ ## Set the required environment variable ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` ## Run test From 39b53a2410f649aec27e38d2f4f04df2a0687ada Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Thu, 5 Jun 2025 15:19:15 +0800 Subject: [PATCH 19/44] update secrets token name for ChatQnA. (#2029) Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- ChatQnA/benchmark_chatqna.yaml | 2 +- ChatQnA/docker_compose/amd/gpu/rocm/README.md | 2 +- ChatQnA/docker_compose/amd/gpu/rocm/set_env.sh | 2 +- ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen.sh | 2 +- .../docker_compose/amd/gpu/rocm/set_env_faqgen_vllm.sh | 2 +- ChatQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh | 2 +- ChatQnA/docker_compose/intel/cpu/aipc/README.md | 10 +++++----- ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml | 6 +++--- ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh | 6 +++--- ChatQnA/docker_compose/intel/cpu/xeon/README.md | 2 +- ChatQnA/docker_compose/intel/cpu/xeon/README_faqgen.md | 4 ++-- .../docker_compose/intel/cpu/xeon/README_mariadb.md | 2 +- .../docker_compose/intel/cpu/xeon/README_pinecone.md | 4 ++-- ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml | 8 ++++---- .../docker_compose/intel/cpu/xeon/compose_faqgen.yaml | 6 +++--- .../intel/cpu/xeon/compose_faqgen_tgi.yaml | 6 +++--- .../docker_compose/intel/cpu/xeon/compose_mariadb.yaml | 8 ++++---- .../docker_compose/intel/cpu/xeon/compose_milvus.yaml | 8 ++++---- .../intel/cpu/xeon/compose_pinecone.yaml | 8 ++++---- .../docker_compose/intel/cpu/xeon/compose_qdrant.yaml | 6 +++--- .../docker_compose/intel/cpu/xeon/compose_remote.yaml | 6 +++--- ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml | 8 ++++---- .../intel/cpu/xeon/compose_without_rerank.yaml | 6 +++--- ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh | 1 - .../docker_compose/intel/cpu/xeon/set_env_mariadb.sh | 6 +++--- ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml | 6 +++--- .../docker_compose/intel/hpu/gaudi/compose_faqgen.yaml | 4 ++-- .../intel/hpu/gaudi/compose_faqgen_tgi.yaml | 6 +++--- .../intel/hpu/gaudi/compose_guardrails.yaml | 10 +++++----- .../docker_compose/intel/hpu/gaudi/compose_tgi.yaml | 6 +++--- .../intel/hpu/gaudi/compose_without_rerank.yaml | 6 +++--- .../intel/hpu/gaudi/how_to_validate_service.md | 2 +- ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh | 4 ++-- .../docker_compose/intel/hpu/gaudi/set_env_faqgen.sh | 1 - ChatQnA/docker_compose/nvidia/gpu/README.md | 4 ++-- ChatQnA/docker_compose/nvidia/gpu/compose.yaml | 6 +++--- ChatQnA/tests/README.md | 2 +- 37 files changed, 89 insertions(+), 91 deletions(-) diff --git a/ChatQnA/benchmark_chatqna.yaml b/ChatQnA/benchmark_chatqna.yaml index e528bb9d7a..815523191f 100644 --- a/ChatQnA/benchmark_chatqna.yaml +++ b/ChatQnA/benchmark_chatqna.yaml @@ -5,7 +5,7 @@ deploy: device: gaudi version: 1.3.0 modelUseHostPath: /mnt/models - HUGGINGFACEHUB_API_TOKEN: "" # mandatory + HF_TOKEN: "" # mandatory node: [1, 2, 4, 8] namespace: "" timeout: 1000 # timeout in seconds for services to be ready, default 30 minutes diff --git a/ChatQnA/docker_compose/amd/gpu/rocm/README.md b/ChatQnA/docker_compose/amd/gpu/rocm/README.md index 4d968b84eb..0ce4e88958 100644 --- a/ChatQnA/docker_compose/amd/gpu/rocm/README.md +++ b/ChatQnA/docker_compose/amd/gpu/rocm/README.md @@ -64,7 +64,7 @@ Set the values of the variables: Setting variables in the operating system environment: ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token" +export HF_TOKEN="Your_HuggingFace_API_Token" source ./set_env_*.sh # replace the script name with the appropriate one ``` diff --git a/ChatQnA/docker_compose/amd/gpu/rocm/set_env.sh b/ChatQnA/docker_compose/amd/gpu/rocm/set_env.sh index 5fcdad0a06..3d729dd0da 100644 --- a/ChatQnA/docker_compose/amd/gpu/rocm/set_env.sh +++ b/ChatQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -6,7 +6,7 @@ export HOST_IP=${ip_address} export HOST_IP_EXTERNAL=${ip_address} export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" -export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base" diff --git a/ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen.sh b/ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen.sh index 543119eadc..706eb64ac0 100644 --- a/ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen.sh +++ b/ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen.sh @@ -6,7 +6,7 @@ export HOST_IP=${ip_address} export HOST_IP_EXTERNAL=${ip_address} export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" -export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base" diff --git a/ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen_vllm.sh b/ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen_vllm.sh index d2462d2646..35379df191 100644 --- a/ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen_vllm.sh +++ b/ChatQnA/docker_compose/amd/gpu/rocm/set_env_faqgen_vllm.sh @@ -6,7 +6,7 @@ export HOST_IP=${ip_address} export HOST_IP_EXTERNAL=${ip_address} export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" -export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base" diff --git a/ChatQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/ChatQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh index 0000b233e1..ae31cee3cf 100644 --- a/ChatQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/ChatQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -6,7 +6,7 @@ export HOST_IP=${ip_address} export HOST_IP_EXTERNAL=${ip_address} export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" -export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base" diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/README.md b/ChatQnA/docker_compose/intel/cpu/aipc/README.md index 77d7ddfcd0..1bf2b9c674 100644 --- a/ChatQnA/docker_compose/intel/cpu/aipc/README.md +++ b/ChatQnA/docker_compose/intel/cpu/aipc/README.md @@ -22,7 +22,7 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/aipc 1. Set the required environment variables: ```bash - export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" + export HF_TOKEN="Your_Huggingface_API_Token" ``` 2. If you are in a proxy environment, also set the proxy-related environment variables: @@ -160,12 +160,12 @@ export host_ip="External_Public_IP" For Linux users, please run `hostname -I | awk '{print $1}'`. For Windows users, please run `ipconfig | findstr /i "IPv4"` to get the external public ip. -**Export the value of your Huggingface API token to the `HUGGINGFACEHUB_API_TOKEN` environment variable** +**Export the value of your Huggingface API token to the `HF_TOKEN` environment variable** > Change the Your_Huggingface_API_Token below with tyour actual Huggingface API Token value ``` -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` **Append the value of the public IP address to the no_proxy list if you are in a proxy environment** @@ -183,7 +183,7 @@ export https_proxy=${your_http_proxy} export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export RERANK_MODEL_ID="BAAI/bge-reranker-base" export INDEX_NAME="rag-redis" -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export OLLAMA_HOST=${host_ip} export OLLAMA_MODEL="llama3.2" ``` @@ -194,7 +194,7 @@ export OLLAMA_MODEL="llama3.2" set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5 set RERANK_MODEL_ID=BAAI/bge-reranker-base set INDEX_NAME=rag-redis -set HUGGINGFACEHUB_API_TOKEN=%HUGGINGFACEHUB_API_TOKEN% +set HF_TOKEN=%HF_TOKEN% set OLLAMA_HOST=host.docker.internal set OLLAMA_MODEL="llama3.2" ``` diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml index 9035642c5d..2afd0d9181 100644 --- a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml +++ b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml @@ -24,7 +24,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] interval: 10s @@ -60,7 +60,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped @@ -76,7 +76,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh b/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh index 3ee4cd6d6c..f2cfdf6fe7 100644 --- a/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh +++ b/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh @@ -9,15 +9,15 @@ popd > /dev/null export host_ip=$(hostname -I | awk '{print $1}') -if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then - echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN." +if [ -z "${HF_TOKEN}" ]; then + echo "Error: HF_TOKEN is not set. Please set HF_TOKEN." fi if [ -z "${host_ip}" ]; then echo "Error: host_ip is not set. Please set host_ip first." fi -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export RERANK_MODEL_ID="BAAI/bge-reranker-base" export INDEX_NAME="rag-redis" diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md index 166dc50c40..f8ac050355 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md @@ -45,7 +45,7 @@ To set up environment variables for deploying ChatQnA services, set up some para ``` export host_ip="External_Public_IP" #ip address of the node -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" export http_proxy="Your_HTTP_Proxy" #http proxy if any export https_proxy="Your_HTTPs_Proxy" #https proxy if any export no_proxy=localhost,127.0.0.1,$host_ip #additional no proxies if needed diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_faqgen.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_faqgen.md index c783161288..4210415db5 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README_faqgen.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_faqgen.md @@ -129,7 +129,7 @@ Then run the command `docker images`, you will have the following Docker Images: We set default model as "meta-llama/Meta-Llama-3-8B-Instruct", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models. -If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable. +If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HF_TOKEN" environment variable. ### Setup Environment Variables @@ -145,7 +145,7 @@ export LLM_SERVICE_PORT=9000 export FAQGEN_BACKEND_PORT=8888 export FAQGen_COMPONENT_NAME="OpeaFaqGenvLLM" export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export HF_TOKEN=${your_hf_api_token} export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_mariadb.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_mariadb.md index 4717e61109..31681566a7 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README_mariadb.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_mariadb.md @@ -114,7 +114,7 @@ export host_ip="External_Public_IP" > Change to your actual Huggingface API Token value ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` **Append the value of the public IP address to the no_proxy list if you are in a proxy environment** diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md index b26435c335..6c90228430 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md @@ -25,7 +25,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste ```bash # Example: host_ip="192.168.1.1" export host_ip="External_Public_IP" - export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" + export HF_TOKEN="Your_Huggingface_API_Token" export PINECONE_API_KEY="Pinecone_API_Key" export PINECONE_INDEX_NAME="Pinecone_Index_Name" export INDEX_NAME="Pinecone_Index_Name" @@ -201,7 +201,7 @@ For users in China who are unable to download models directly from Huggingface, ```bash # Example: host_ip="192.168.1.1" export host_ip="External_Public_IP" - export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" + export HF_TOKEN="Your_Huggingface_API_Token" # Example: NGINX_PORT=80 export NGINX_PORT=${your_nginx_port} export PINECONE_API_KEY="Pinecone_API_Key" diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml index 1e5fef6d40..2f6eb00642 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -31,7 +31,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] interval: 10s @@ -67,7 +67,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped @@ -83,7 +83,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate @@ -99,7 +99,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LLM_MODEL_ID: ${LLM_MODEL_ID} VLLM_TORCH_PROFILER_DIR: "/mnt" VLLM_CPU_KVCACHE_SPACE: 40 diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen.yaml index eb31dfb1fa..2c772044a7 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen.yaml @@ -24,7 +24,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] interval: 10s @@ -60,7 +60,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped @@ -76,7 +76,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen_tgi.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen_tgi.yaml index a66be60327..9750a4fa98 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen_tgi.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen_tgi.yaml @@ -24,7 +24,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] interval: 10s @@ -60,7 +60,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped @@ -76,7 +76,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_mariadb.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_mariadb.yaml index 9e109e6144..9731f011f3 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_mariadb.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_mariadb.yaml @@ -35,7 +35,7 @@ services: DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MARIADBVECTOR" MARIADB_CONNECTION_URL: mariadb+mariadbconnector://${MARIADB_USER}:${MARIADB_PASSWORD}@mariadb-server:3306/${MARIADB_DATABASE} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] interval: 10s @@ -69,7 +69,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} MARIADB_CONNECTION_URL: mariadb+mariadbconnector://${MARIADB_USER}:${MARIADB_PASSWORD}@mariadb-server:3306/${MARIADB_DATABASE} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_MARIADBVECTOR" restart: unless-stopped @@ -85,7 +85,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate @@ -101,7 +101,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LLM_MODEL_ID: ${LLM_MODEL_ID} VLLM_TORCH_PROFILER_DIR: "/mnt" VLLM_CPU_KVCACHE_SPACE: 40 diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml index eb81c3ec2e..2dbf25ca05 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml @@ -75,7 +75,7 @@ services: MILVUS_HOST: ${host_ip} MILVUS_PORT: 19530 TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID} LOGFLAG: ${LOGFLAG} healthcheck: @@ -107,7 +107,7 @@ services: MILVUS_HOST: ${host_ip} MILVUS_PORT: 19530 TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_MILVUS" restart: unless-stopped @@ -138,7 +138,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate @@ -155,7 +155,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LLM_MODEL_ID: ${LLM_MODEL_ID} VLLM_TORCH_PROFILER_DIR: "/mnt" healthcheck: diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml index 8a2af3c117..917cfd26a8 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml @@ -20,7 +20,7 @@ services: PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_PINECONE" healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] @@ -55,7 +55,7 @@ services: PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_PINECONE" restart: unless-stopped @@ -71,7 +71,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate @@ -87,7 +87,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LLM_MODEL_ID: ${LLM_MODEL_ID} VLLM_TORCH_PROFILER_DIR: "/mnt" command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml index 38cad037a4..b5e475185f 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml @@ -24,7 +24,7 @@ services: QDRANT_PORT: 6333 QDRANT_INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_QDRANT" healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] @@ -76,7 +76,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate @@ -92,7 +92,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LLM_MODEL_ID: ${LLM_MODEL_ID} VLLM_TORCH_PROFILER_DIR: "/mnt" command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml index 2acc51bbe4..a69a420aaa 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml @@ -31,7 +31,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-embedding-server @@ -61,7 +61,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped @@ -77,7 +77,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml index 434ae34eac..b57be60cf1 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml @@ -24,7 +24,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] interval: 10s @@ -60,7 +60,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped @@ -76,7 +76,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate @@ -92,7 +92,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml index b813852c74..1dd80c7106 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml @@ -24,7 +24,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] interval: 10s @@ -60,7 +60,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped @@ -76,7 +76,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LLM_MODEL_ID: ${LLM_MODEL_ID} VLLM_TORCH_PROFILER_DIR: "/mnt" command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh index 2959f94321..e32a53ac1f 100755 --- a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh @@ -7,7 +7,6 @@ pushd "../../../../../" > /dev/null source .set_env.sh popd > /dev/null -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export HF_TOKEN=${HF_TOKEN} export host_ip=${ip_address} export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/set_env_mariadb.sh b/ChatQnA/docker_compose/intel/cpu/xeon/set_env_mariadb.sh index 88ae5c0eec..2ef732aeb0 100755 --- a/ChatQnA/docker_compose/intel/cpu/xeon/set_env_mariadb.sh +++ b/ChatQnA/docker_compose/intel/cpu/xeon/set_env_mariadb.sh @@ -7,15 +7,15 @@ pushd "../../../../../" > /dev/null source .set_env.sh popd > /dev/null -if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then - echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN." +if [ -z "${HF_TOKEN}" ]; then + echo "Error: HF_TOKEN is not set. Please set HF_TOKEN." fi export host_ip=$(hostname -I | awk '{print $1}') export MARIADB_DATABASE="vectordb" export MARIADB_USER="chatqna" export MARIADB_PASSWORD="password" -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export RERANK_MODEL_ID="BAAI/bge-reranker-base" export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 49d7ff99a5..ed9d3ffc1d 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -31,7 +31,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] interval: 10s @@ -67,7 +67,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} restart: unless-stopped tei-reranking-service: image: ghcr.io/huggingface/tei-gaudi:1.5.0 @@ -101,7 +101,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none LLM_MODEL_ID: ${LLM_MODEL_ID} diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml index 951956be8f..09a94df962 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml @@ -24,7 +24,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] @@ -61,7 +61,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} restart: unless-stopped tei-reranking-service: image: ghcr.io/huggingface/tei-gaudi:1.5.0 diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen_tgi.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen_tgi.yaml index 8c2b0d1d54..01c55de853 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen_tgi.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen_tgi.yaml @@ -24,7 +24,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] @@ -61,7 +61,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} restart: unless-stopped tei-reranking-service: image: ghcr.io/huggingface/tei-gaudi:1.5.0 @@ -95,7 +95,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml index 7f44764413..34005977be 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml @@ -24,7 +24,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] interval: 10s @@ -42,7 +42,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none GURADRAILS_MODEL_ID: ${GURADRAILS_MODEL_ID} @@ -73,7 +73,7 @@ services: https_proxy: ${https_proxy} SAFETY_GUARD_MODEL_ID: ${GURADRAILS_MODEL_ID} SAFETY_GUARD_ENDPOINT: http://vllm-guardrails-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} restart: unless-stopped tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 @@ -104,7 +104,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped @@ -140,7 +140,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none LLM_MODEL_ID: ${LLM_MODEL_ID} diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml index 02d99098b6..45294816dd 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml @@ -24,7 +24,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] interval: 10s @@ -60,7 +60,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped @@ -96,7 +96,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml index 9704984f1a..1afc2ae7a2 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml @@ -24,7 +24,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] interval: 10s @@ -60,7 +60,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped @@ -75,7 +75,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none LLM_MODEL_ID: ${LLM_MODEL_ID} diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md index ce515d4509..98e97fb19c 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md @@ -123,7 +123,7 @@ View the docker input parameters in `./ChatQnA/docker_compose/intel/hpu/gaudi/co environment: http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none ENABLE_HPU_GRAPH: true diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh index 1d0409eccd..1a3acef274 100755 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh @@ -42,7 +42,7 @@ popd > /dev/null # Prompt the user for each required environment variable prompt_for_env_var "EMBEDDING_MODEL_ID" "Enter the EMBEDDING_MODEL_ID" "BAAI/bge-base-en-v1.5" false -prompt_for_env_var "HUGGINGFACEHUB_API_TOKEN" "Enter the HUGGINGFACEHUB_API_TOKEN" "${HF_TOKEN}" true +prompt_for_env_var "HF_TOKEN" "Enter the HF_TOKEN" "${HF_TOKEN}" true prompt_for_env_var "RERANK_MODEL_ID" "Enter the RERANK_MODEL_ID" "BAAI/bge-reranker-base" false prompt_for_env_var "LLM_MODEL_ID" "Enter the LLM_MODEL_ID" "meta-llama/Meta-Llama-3-8B-Instruct" false prompt_for_env_var "INDEX_NAME" "Enter the INDEX_NAME" "rag-redis" false @@ -92,7 +92,7 @@ cat < .env # Set all required ENV values export TAG=${TAG} export EMBEDDING_MODEL_ID=${EMBEDDING_MODEL_ID} -export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN +export HF_TOKEN=$HF_TOKEN export RERANK_MODEL_ID=${RERANK_MODEL_ID} export LLM_MODEL_ID=${LLM_MODEL_ID} export INDEX_NAME=${INDEX_NAME} diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env_faqgen.sh b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env_faqgen.sh index fde0b35fd0..8337b7eebc 100755 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env_faqgen.sh +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env_faqgen.sh @@ -7,7 +7,6 @@ pushd "../../../../../" > /dev/null source .set_env.sh popd > /dev/null -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export HF_TOKEN=${HF_TOKEN} export host_ip=${ip_address} export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" diff --git a/ChatQnA/docker_compose/nvidia/gpu/README.md b/ChatQnA/docker_compose/nvidia/gpu/README.md index b1ab3e8baf..546419d5f9 100644 --- a/ChatQnA/docker_compose/nvidia/gpu/README.md +++ b/ChatQnA/docker_compose/nvidia/gpu/README.md @@ -20,7 +20,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste ```bash # Example: host_ip="192.168.1.1" export host_ip="External_Public_IP" - export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" + export HF_TOKEN="Your_Huggingface_API_Token" ``` 2. If you are in a proxy environment, also set the proxy-related environment variables: @@ -182,7 +182,7 @@ Change the `xxx_MODEL_ID` below for your needs. ```bash # Example: host_ip="192.168.1.1" export host_ip="External_Public_IP" - export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" + export HF_TOKEN="Your_Huggingface_API_Token" # Example: NGINX_PORT=80 export NGINX_PORT=${your_nginx_port} ``` diff --git a/ChatQnA/docker_compose/nvidia/gpu/compose.yaml b/ChatQnA/docker_compose/nvidia/gpu/compose.yaml index 7a30c37c6b..7b47f46db1 100644 --- a/ChatQnA/docker_compose/nvidia/gpu/compose.yaml +++ b/ChatQnA/docker_compose/nvidia/gpu/compose.yaml @@ -24,7 +24,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] interval: 10s @@ -76,7 +76,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate @@ -98,7 +98,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 ipc: host diff --git a/ChatQnA/tests/README.md b/ChatQnA/tests/README.md index c622008650..1616127839 100644 --- a/ChatQnA/tests/README.md +++ b/ChatQnA/tests/README.md @@ -3,7 +3,7 @@ ## Set the required environment variable ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` ## Run test From d18bc9bd588ba1f8e56c7082f3e9fc829b6dea42 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Thu, 5 Jun 2025 15:23:03 +0800 Subject: [PATCH 20/44] update secrets token name for CodeGen and CodeTrans (#2031) Signed-off-by: ZePan110 Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- CodeGen/benchmark/accuracy/run_acc.sh | 2 +- CodeGen/docker_compose/amd/gpu/rocm/README.md | 8 ++++---- CodeGen/docker_compose/amd/gpu/rocm/set_env.sh | 2 +- CodeGen/docker_compose/amd/gpu/rocm/set_env_vllm.sh | 2 +- CodeGen/docker_compose/intel/cpu/xeon/README.md | 8 ++++---- CodeGen/docker_compose/intel/cpu/xeon/compose.yaml | 12 ++++++------ .../intel/cpu/xeon/compose_remote.yaml | 6 +++--- CodeGen/docker_compose/intel/hpu/gaudi/README.md | 8 ++++---- CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml | 12 ++++++------ CodeGen/docker_compose/intel/set_env.sh | 6 +++--- CodeGen/tests/README.md | 2 +- CodeGen/tests/test_compose_on_gaudi.sh | 2 +- CodeTrans/docker_compose/amd/gpu/rocm/README.md | 2 +- CodeTrans/docker_compose/amd/gpu/rocm/set_env.sh | 2 +- .../docker_compose/amd/gpu/rocm/set_env_vllm.sh | 2 +- CodeTrans/docker_compose/intel/cpu/xeon/README.md | 4 ++-- CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml | 4 ++-- .../docker_compose/intel/cpu/xeon/compose_tgi.yaml | 4 ++-- CodeTrans/docker_compose/intel/hpu/gaudi/README.md | 4 ++-- .../docker_compose/intel/hpu/gaudi/compose.yaml | 4 ++-- .../docker_compose/intel/hpu/gaudi/compose_tgi.yaml | 4 ++-- CodeTrans/tests/README.md | 2 +- CodeTrans/tests/test_compose_on_gaudi.sh | 2 +- CodeTrans/tests/test_compose_on_xeon.sh | 2 +- CodeTrans/tests/test_compose_tgi_on_gaudi.sh | 2 +- CodeTrans/tests/test_compose_tgi_on_xeon.sh | 2 +- 26 files changed, 55 insertions(+), 55 deletions(-) diff --git a/CodeGen/benchmark/accuracy/run_acc.sh b/CodeGen/benchmark/accuracy/run_acc.sh index a5c451965c..7fb894451c 100644 --- a/CodeGen/benchmark/accuracy/run_acc.sh +++ b/CodeGen/benchmark/accuracy/run_acc.sh @@ -1,4 +1,4 @@ - +#!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/CodeGen/docker_compose/amd/gpu/rocm/README.md b/CodeGen/docker_compose/amd/gpu/rocm/README.md index a3718f7ad0..90e3d2564c 100644 --- a/CodeGen/docker_compose/amd/gpu/rocm/README.md +++ b/CodeGen/docker_compose/amd/gpu/rocm/README.md @@ -109,7 +109,7 @@ Key parameters are configured via environment variables set before running `dock | Environment Variable | Description | Default (Set Externally) | | :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------------------------------- | | `HOST_IP` | External IP address of the host machine. **Required.** | `your_external_ip_address` | -| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` | +| `HF_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` | | `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct` | | `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` | | `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`. | `http://codegen-tgi-server:80/generate` or `http://codegen-vllm-server:8000/v1/chat/completions` | @@ -125,7 +125,7 @@ For TGI ```bash export host_ip="External_Public_IP" #ip address of the node -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" export http_proxy="Your_HTTP_Proxy" #http proxy if any export https_proxy="Your_HTTPs_Proxy" #https proxy if any export no_proxy=localhost,127.0.0.1,$host_ip #additional no proxies if needed @@ -137,7 +137,7 @@ For vLLM ```bash export host_ip="External_Public_IP" #ip address of the node -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" export http_proxy="Your_HTTP_Proxy" #http proxy if any export https_proxy="Your_HTTPs_Proxy" #https proxy if any export no_proxy=localhost,127.0.0.1,$host_ip #additional no proxies if needed @@ -422,7 +422,7 @@ Users can interact with the backend service using the `Neural Copilot` VS Code e ## Troubleshooting -- **Model Download Issues:** Check `HUGGINGFACEHUB_API_TOKEN`. Ensure internet connectivity or correct proxy settings. Check logs of `tgi-service`/`vllm-service` and `tei-embedding-server`. Gated models need prior Hugging Face access. +- **Model Download Issues:** Check `HF_TOKEN`. Ensure internet connectivity or correct proxy settings. Check logs of `tgi-service`/`vllm-service` and `tei-embedding-server`. Gated models need prior Hugging Face access. - **Connection Errors:** Verify `HOST_IP` is correct and accessible. Check `docker ps` for port mappings. Ensure `no_proxy` includes `HOST_IP` if using a proxy. Check logs of the service failing to connect (e.g., `codegen-backend-server` logs if it can't reach `codegen-llm-server`). - **"Container name is in use"**: Stop existing containers (`docker compose down`) or change `container_name` in `compose.yaml`. - **Resource Issues:** CodeGen models can be memory-intensive. Monitor host RAM usage. Increase Docker resources if needed. diff --git a/CodeGen/docker_compose/amd/gpu/rocm/set_env.sh b/CodeGen/docker_compose/amd/gpu/rocm/set_env.sh index afaa29b341..bef5865267 100644 --- a/CodeGen/docker_compose/amd/gpu/rocm/set_env.sh +++ b/CodeGen/docker_compose/amd/gpu/rocm/set_env.sh @@ -12,7 +12,7 @@ export EXTERNAL_HOST_IP=${ip_address} export CODEGEN_TGI_SERVICE_PORT=8028 ### A token for accessing repositories with models -export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} ### Model ID export CODEGEN_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" diff --git a/CodeGen/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/CodeGen/docker_compose/amd/gpu/rocm/set_env_vllm.sh index 475191539a..5f8e02796f 100644 --- a/CodeGen/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/CodeGen/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -13,7 +13,7 @@ export CODEGEN_VLLM_SERVICE_PORT=8028 export CODEGEN_VLLM_ENDPOINT="http://${HOST_IP}:${CODEGEN_VLLM_SERVICE_PORT}" ### A token for accessing repositories with models -export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export CODEGEN_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} ### Model ID export CODEGEN_LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md index 88f0a51c6c..eff643ddef 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/README.md +++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md @@ -42,7 +42,7 @@ This uses the default vLLM-based deployment profile (`codegen-xeon-vllm`). # Replace with your host's external IP address (do not use localhost or 127.0.0.1) export HOST_IP="your_external_ip_address" # Replace with your Hugging Face Hub API token - export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token" + export HF_TOKEN="your_huggingface_token" # Optional: Configure proxy if needed # export http_proxy="your_http_proxy" @@ -90,7 +90,7 @@ The `compose.yaml` file uses Docker Compose profiles to select the LLM serving b - **Services Deployed:** `codegen-tgi-server`, `codegen-llm-server`, `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`. - **To Run:** ```bash - # Ensure environment variables (HOST_IP, HUGGINGFACEHUB_API_TOKEN) are set + # Ensure environment variables (HOST_IP, HF_TOKEN) are set docker compose --profile codegen-xeon-tgi up -d ``` @@ -103,7 +103,7 @@ Key parameters are configured via environment variables set before running `dock | Environment Variable | Description | Default (Set Externally) | | :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :--------------------------------------------- | ------------------------------------ | | `HOST_IP` | External IP address of the host machine. **Required.** | `your_external_ip_address` | -| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` | +| `HF_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` | | `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct` | | `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` | | `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`. | `http://codegen-vllm | tgi-server:9000/v1/chat/completions` | @@ -216,7 +216,7 @@ Users can interact with the backend service using the `Neural Copilot` VS Code e ## Troubleshooting -- **Model Download Issues:** Check `HUGGINGFACEHUB_API_TOKEN`. Ensure internet connectivity or correct proxy settings. Check logs of `tgi-service`/`vllm-service` and `tei-embedding-server`. Gated models need prior Hugging Face access. +- **Model Download Issues:** Check `HF_TOKEN`. Ensure internet connectivity or correct proxy settings. Check logs of `tgi-service`/`vllm-service` and `tei-embedding-server`. Gated models need prior Hugging Face access. - **Connection Errors:** Verify `HOST_IP` is correct and accessible. Check `docker ps` for port mappings. Ensure `no_proxy` includes `HOST_IP` if using a proxy. Check logs of the service failing to connect (e.g., `codegen-backend-server` logs if it can't reach `codegen-llm-server`). - **"Container name is in use"**: Stop existing containers (`docker compose down`) or change `container_name` in `compose.yaml`. - **Resource Issues:** CodeGen models can be memory-intensive. Monitor host RAM usage. Increase Docker resources if needed. diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml index eec356dd8c..fd891c93ce 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -17,7 +17,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} host_ip: ${host_ip} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] @@ -39,7 +39,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} host_ip: ${host_ip} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] @@ -56,7 +56,7 @@ services: https_proxy: ${https_proxy} LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} restart: unless-stopped llm-tgi-service: extends: llm-base @@ -140,7 +140,7 @@ services: REDIS_URL: ${REDIS_URL} REDIS_HOST: ${host_ip} INDEX_NAME: ${INDEX_NAME} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: true healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] @@ -162,7 +162,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} host_ip: ${host_ip} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD", "curl", "-f", "http://localhost:80/health"] interval: 10s @@ -202,7 +202,7 @@ services: REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT} INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS} restart: unless-stopped diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml index 637c1f4b49..23b8af1959 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml @@ -59,7 +59,7 @@ services: REDIS_URL: ${REDIS_URL} REDIS_HOST: ${host_ip} INDEX_NAME: ${INDEX_NAME} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: true restart: unless-stopped tei-embedding-serving: @@ -76,7 +76,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} host_ip: ${host_ip} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"] interval: 10s @@ -116,7 +116,7 @@ services: REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT} INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS} restart: unless-stopped diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/README.md b/CodeGen/docker_compose/intel/hpu/gaudi/README.md index 4af050f051..04f20874fd 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/README.md +++ b/CodeGen/docker_compose/intel/hpu/gaudi/README.md @@ -42,7 +42,7 @@ This uses the default vLLM-based deployment profile (`codegen-gaudi-vllm`). # Replace with your host's external IP address (do not use localhost or 127.0.0.1) export HOST_IP="your_external_ip_address" # Replace with your Hugging Face Hub API token - export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token" + export HF_TOKEN="your_huggingface_token" # Optional: Configure proxy if needed # export http_proxy="your_http_proxy" @@ -93,7 +93,7 @@ The `compose.yaml` file uses Docker Compose profiles to select the LLM serving b - **Other Services:** Same CPU-based services as the vLLM profile. - **To Run:** ```bash - # Ensure environment variables (HOST_IP, HUGGINGFACEHUB_API_TOKEN) are set + # Ensure environment variables (HOST_IP, HF_TOKEN) are set docker compose --profile codegen-gaudi-tgi up -d ``` @@ -106,7 +106,7 @@ Key parameters are configured via environment variables set before running `dock | Environment Variable | Description | Default (Set Externally) | | :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :--------------------------------------------- | ------------------------------------ | | `HOST_IP` | External IP address of the host machine. **Required.** | `your_external_ip_address` | -| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` | +| `HF_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` | | `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct` | | `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` | | `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `llm-codegen-vllm-server`). Configured in `compose.yaml`. | http://codegen-vllm | tgi-server:9000/v1/chat/completions` | @@ -224,7 +224,7 @@ Use the `Neural Copilot` extension configured with the CodeGen backend URL: `htt - Ensure host drivers and Habana Docker runtime are installed and working (`habana-container-runtime`). - Verify `runtime: habana` and volume mounts in `compose.yaml`. - Gaudi initialization can take significant time and memory. Monitor resource usage. -- **Model Download Issues:** Check `HUGGINGFACEHUB_API_TOKEN`, internet access, proxy settings. Check LLM service logs. +- **Model Download Issues:** Check `HF_TOKEN`, internet access, proxy settings. Check LLM service logs. - **Connection Errors:** Verify `HOST_IP`, ports, and proxy settings. Use `docker ps` and check service logs. ## Stopping the Application diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml index fb9a78d252..3b9b5a00e4 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml @@ -17,7 +17,7 @@ services: https_proxy: ${https_proxy} HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} ENABLE_HPU_GRAPH: true LIMIT_HPU_GRAPH: true USE_FLASH_ATTENTION: true @@ -46,7 +46,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false} @@ -71,7 +71,7 @@ services: https_proxy: ${https_proxy} LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} restart: unless-stopped llm-tgi-service: extends: llm-base @@ -156,7 +156,7 @@ services: REDIS_URL: ${REDIS_URL} REDIS_HOST: ${host_ip} INDEX_NAME: ${INDEX_NAME} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: true healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] @@ -178,7 +178,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} host_ip: ${host_ip} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD", "curl", "-f", "http://localhost:80/health"] interval: 10s @@ -218,7 +218,7 @@ services: REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT} INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS} restart: unless-stopped diff --git a/CodeGen/docker_compose/intel/set_env.sh b/CodeGen/docker_compose/intel/set_env.sh index ea48c198bb..28ed3aff46 100644 --- a/CodeGen/docker_compose/intel/set_env.sh +++ b/CodeGen/docker_compose/intel/set_env.sh @@ -7,9 +7,9 @@ source .set_env.sh popd > /dev/null export HOST_IP=$(hostname -I | awk '{print $1}') -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then - echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN" +export HF_TOKEN=${HF_TOKEN} +if [ -z "${HF_TOKEN}" ]; then + echo "Error: HF_TOKEN is not set. Please set HF_TOKEN" fi if [ -z "${HOST_IP}" ]; then diff --git a/CodeGen/tests/README.md b/CodeGen/tests/README.md index 4909899be7..11efd37b0f 100644 --- a/CodeGen/tests/README.md +++ b/CodeGen/tests/README.md @@ -3,7 +3,7 @@ ## Set the required environment variable ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` ## Run test diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh index 38354233d9..a86c5724a6 100644 --- a/CodeGen/tests/test_compose_on_gaudi.sh +++ b/CodeGen/tests/test_compose_on_gaudi.sh @@ -1,4 +1,4 @@ - +#!/bin/bashs # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/CodeTrans/docker_compose/amd/gpu/rocm/README.md b/CodeTrans/docker_compose/amd/gpu/rocm/README.md index 9fef7c8426..3c048905b1 100644 --- a/CodeTrans/docker_compose/amd/gpu/rocm/README.md +++ b/CodeTrans/docker_compose/amd/gpu/rocm/README.md @@ -62,7 +62,7 @@ Set the values of the variables: Setting variables in the operating system environment: ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token" +export HF_TOKEN="Your_HuggingFace_API_Token" source ./set_env_*.sh # replace the script name with the appropriate one ``` diff --git a/CodeTrans/docker_compose/amd/gpu/rocm/set_env.sh b/CodeTrans/docker_compose/amd/gpu/rocm/set_env.sh index c1acc4464d..24a3a4d11c 100644 --- a/CodeTrans/docker_compose/amd/gpu/rocm/set_env.sh +++ b/CodeTrans/docker_compose/amd/gpu/rocm/set_env.sh @@ -21,7 +21,7 @@ export CODETRANS_TGI_SERVICE_PORT=8008 export CODETRANS_TGI_LLM_ENDPOINT="http://${HOST_IP}:${CODETRANS_TGI_SERVICE_PORT}" ### A token for accessing repositories with models -export CODETRANS_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export CODETRANS_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} ### The port of the LLM service. On this port, the LLM service will accept connections export CODETRANS_LLM_SERVICE_PORT=9000 diff --git a/CodeTrans/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/CodeTrans/docker_compose/amd/gpu/rocm/set_env_vllm.sh index ffcbd35df5..494df73a07 100644 --- a/CodeTrans/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/CodeTrans/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -21,7 +21,7 @@ export CODETRANS_VLLM_SERVICE_PORT=8008 export CODETRANS_LLM_ENDPOINT="http://${HOST_IP}:${CODETRANS_VLLM_SERVICE_PORT}" ### A token for accessing repositories with models -export CODETRANS_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export CODETRANS_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} ### The port of the LLM service. On this port, the LLM service will accept connections export CODETRANS_LLM_SERVICE_PORT=9000 diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/README.md b/CodeTrans/docker_compose/intel/cpu/xeon/README.md index b01492ff12..bc107c53c0 100755 --- a/CodeTrans/docker_compose/intel/cpu/xeon/README.md +++ b/CodeTrans/docker_compose/intel/cpu/xeon/README.md @@ -41,7 +41,7 @@ To set up environment variables for deploying CodeTrans services, set up some pa ```bash export host_ip="External_Public_IP" # ip address of the node -export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token" +export HF_TOKEN="Your_HuggingFace_API_Token" export http_proxy="Your_HTTP_Proxy" # http proxy if any export https_proxy="Your_HTTPs_Proxy" # https proxy if any export no_proxy=localhost,127.0.0.1,$host_ip # additional no proxies if needed @@ -124,7 +124,7 @@ Key parameters are configured via environment variables set before running `dock | Environment Variable | Description | Default (Set Externally) | | :-------------------------------------- | :-------------------------------------------------------------------------------------------------------------------- | :------------------------------------ | | `HOST_IP` | External IP address of the host machine. **Required.** | `your_external_ip_address` | -| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` | +| `HF_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` | | `LLM_MODEL_ID` | Hugging Face model ID for the CodeTrans LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `mistralai/Mistral-7B-Instruct-v0.3` | | `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codetrans-xeon-llm-server`). Configured in `compose.yaml`. | `http://${HOST_IP}:8008` | | `LLM_COMPONENT_NAME` | LLM component name for the LLM Microservice. | `OpeaTextGenService` | diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml index f4aa9f2b95..f950c770ec 100644 --- a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml @@ -14,7 +14,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LLM_MODEL_ID: ${LLM_MODEL_ID} VLLM_TORCH_PROFILER_DIR: "/mnt" healthcheck: @@ -39,7 +39,7 @@ services: LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} restart: unless-stopped codetrans-xeon-backend-server: image: ${REGISTRY:-opea}/codetrans:${TAG:-latest} diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi.yaml index 77c668241c..1eda99bccc 100644 --- a/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi.yaml +++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi.yaml @@ -14,7 +14,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} host_ip: ${host_ip} healthcheck: test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"] @@ -38,7 +38,7 @@ services: LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} restart: unless-stopped codetrans-xeon-backend-server: image: ${REGISTRY:-opea}/codetrans:${TAG:-latest} diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/README.md b/CodeTrans/docker_compose/intel/hpu/gaudi/README.md index 00551eb406..4f242cff5d 100755 --- a/CodeTrans/docker_compose/intel/hpu/gaudi/README.md +++ b/CodeTrans/docker_compose/intel/hpu/gaudi/README.md @@ -41,7 +41,7 @@ To set up environment variables for deploying CodeTrans services, set up some pa ```bash export host_ip="External_Public_IP" # ip address of the node -export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token" +export HF_TOKEN="Your_HuggingFace_API_Token" export http_proxy="Your_HTTP_Proxy" # http proxy if any export https_proxy="Your_HTTPs_Proxy" # https proxy if any export no_proxy=localhost,127.0.0.1,$host_ip # additional no proxies if needed @@ -124,7 +124,7 @@ Key parameters are configured via environment variables set before running `dock | Environment Variable | Description | Default (Set Externally) | | :-------------------------------------- | :-------------------------------------------------------------------------------------------------------------------- | :------------------------------------ | | `HOST_IP` | External IP address of the host machine. **Required.** | `your_external_ip_address` | -| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` | +| `HF_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` | | `LLM_MODEL_ID` | Hugging Face model ID for the CodeTrans LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `mistralai/Mistral-7B-Instruct-v0.3` | | `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codetrans-gaudi-llm-server`). Configured in `compose.yaml`. | `http://${HOST_IP}:8008` | | `LLM_COMPONENT_NAME` | LLM component name for the LLM Microservice. | `OpeaTextGenService` | diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml index f34fe5a1e4..60728feabf 100644 --- a/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml @@ -13,7 +13,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none LLM_MODEL_ID: ${LLM_MODEL_ID} @@ -45,7 +45,7 @@ services: LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} restart: unless-stopped codetrans-gaudi-backend-server: image: ${REGISTRY:-opea}/codetrans:${TAG:-latest} diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/CodeTrans/docker_compose/intel/hpu/gaudi/compose_tgi.yaml index 9bcc01f318..b2b4c268c8 100644 --- a/CodeTrans/docker_compose/intel/hpu/gaudi/compose_tgi.yaml +++ b/CodeTrans/docker_compose/intel/hpu/gaudi/compose_tgi.yaml @@ -13,7 +13,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all @@ -42,7 +42,7 @@ services: LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} restart: unless-stopped codetrans-gaudi-backend-server: image: ${REGISTRY:-opea}/codetrans:${TAG:-latest} diff --git a/CodeTrans/tests/README.md b/CodeTrans/tests/README.md index 62edebc6a8..e4e9e135cc 100644 --- a/CodeTrans/tests/README.md +++ b/CodeTrans/tests/README.md @@ -3,7 +3,7 @@ ## Set the required environment variable ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` ## Run test diff --git a/CodeTrans/tests/test_compose_on_gaudi.sh b/CodeTrans/tests/test_compose_on_gaudi.sh index 600c20a0c3..298aa85900 100644 --- a/CodeTrans/tests/test_compose_on_gaudi.sh +++ b/CodeTrans/tests/test_compose_on_gaudi.sh @@ -38,7 +38,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} export NGINX_PORT=80 export host_ip=${ip_address} source set_env.sh diff --git a/CodeTrans/tests/test_compose_on_xeon.sh b/CodeTrans/tests/test_compose_on_xeon.sh index 42f80469e0..12e96a9474 100644 --- a/CodeTrans/tests/test_compose_on_xeon.sh +++ b/CodeTrans/tests/test_compose_on_xeon.sh @@ -40,7 +40,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} export NGINX_PORT=80 export host_ip=${ip_address} diff --git a/CodeTrans/tests/test_compose_tgi_on_gaudi.sh b/CodeTrans/tests/test_compose_tgi_on_gaudi.sh index 051afce9d4..a1c978b4b1 100644 --- a/CodeTrans/tests/test_compose_tgi_on_gaudi.sh +++ b/CodeTrans/tests/test_compose_tgi_on_gaudi.sh @@ -35,7 +35,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} export NGINX_PORT=80 export host_ip=${ip_address} diff --git a/CodeTrans/tests/test_compose_tgi_on_xeon.sh b/CodeTrans/tests/test_compose_tgi_on_xeon.sh index 00da9bde73..e5393453b2 100644 --- a/CodeTrans/tests/test_compose_tgi_on_xeon.sh +++ b/CodeTrans/tests/test_compose_tgi_on_xeon.sh @@ -35,7 +35,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} export NGINX_PORT=80 export host_ip=${ip_address} From 07980046a80851c63c692c0c9b49195db0f32f78 Mon Sep 17 00:00:00 2001 From: Zhenzhong Xu Date: Fri, 6 Jun 2025 13:58:28 +0800 Subject: [PATCH 21/44] [DocSum] Aligned the output format (#1948) Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- .../intel/cpu/xeon/compose.yaml | 1 + DocSum/docker_compose/intel/set_env.sh | 5 +++ DocSum/docsum.py | 39 +++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml index 9f05963e7a..d6aa67ced3 100644 --- a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml +++ b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml @@ -17,6 +17,7 @@ services: HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} LLM_MODEL_ID: ${LLM_MODEL_ID} VLLM_TORCH_PROFILER_DIR: "/mnt" + VLLM_CPU_KVCACHE_SPACE: 40 healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] interval: 10s diff --git a/DocSum/docker_compose/intel/set_env.sh b/DocSum/docker_compose/intel/set_env.sh index d2c061177d..1d9a013375 100644 --- a/DocSum/docker_compose/intel/set_env.sh +++ b/DocSum/docker_compose/intel/set_env.sh @@ -14,6 +14,11 @@ export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export LLM_ENDPOINT_PORT=8008 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" + +export BLOCK_SIZE=128 +export MAX_NUM_SEQS=256 +export MAX_SEQ_LEN_TO_CAPTURE=2048 +export NUM_CARDS=1 export MAX_INPUT_TOKENS=1024 export MAX_TOTAL_TOKENS=2048 diff --git a/DocSum/docsum.py b/DocSum/docsum.py index 786e48a264..32766925e0 100644 --- a/DocSum/docsum.py +++ b/DocSum/docsum.py @@ -3,6 +3,7 @@ import asyncio import base64 +import json import os import subprocess import uuid @@ -142,11 +143,49 @@ def read_text_from_file(file, save_file_name): return file_content +def align_generator(self, gen, **kwargs): + # OpenAI response format + # b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n' + for line in gen: + line = line.decode("utf-8") + start = -1 + end = -1 + try: + start = line.find("{") + end = line.rfind("}") + 1 + if start == -1 or end <= start: + # Handle cases where '{' or '}' are not found or are in the wrong order + json_str = "" + else: + json_str = line[start:end] + except Exception as e: + print(f"Error finding JSON boundaries: {e}") + json_str = "" + + try: + # sometimes yield empty chunk, do a fallback here + json_data = json.loads(json_str) + if "ops" in json_data and "op" in json_data["ops"][0]: + if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str): + yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n" + else: + pass + elif ( + json_data["choices"][0]["finish_reason"] != "eos_token" + and "content" in json_data["choices"][0]["delta"] + ): + yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n" + except Exception as e: + yield f"data: {repr(json_str.encode('utf-8'))}\n\n" + yield "data: [DONE]\n\n" + + class DocSumService: def __init__(self, host="0.0.0.0", port=8000): self.host = host self.port = port ServiceOrchestrator.align_inputs = align_inputs + ServiceOrchestrator.align_generator = align_generator self.megaservice = ServiceOrchestrator() self.megaservice_text_only = ServiceOrchestrator() self.endpoint = str(MegaServiceEndpoint.DOC_SUMMARY) From 229f2b169613739b645c84bbbcfc2818f2a9aa46 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Fri, 6 Jun 2025 16:35:32 +0800 Subject: [PATCH 22/44] update secrets token name for DocIndexRetriever. (#2035) Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- .../docker_compose/intel/cpu/xeon/README.md | 4 ++-- .../docker_compose/intel/cpu/xeon/compose.yaml | 12 ++++++------ .../intel/cpu/xeon/compose_milvus.yaml | 12 ++++++------ .../intel/cpu/xeon/compose_without_rerank.yaml | 8 ++++---- .../docker_compose/intel/cpu/xeon/set_env.sh | 2 +- .../docker_compose/intel/hpu/gaudi/README.md | 2 +- .../docker_compose/intel/hpu/gaudi/compose.yaml | 10 +++++----- .../intel/hpu/gaudi/compose_milvus.yaml | 10 +++++----- .../docker_compose/intel/hpu/gaudi/set_env.sh | 2 +- DocIndexRetriever/tests/README.md | 2 +- .../tests/test_compose_without_rerank_on_xeon.sh | 11 +---------- 11 files changed, 33 insertions(+), 42 deletions(-) diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md index 5d0ff79475..fdd8effc33 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md @@ -47,7 +47,7 @@ docker build --no-cache -t opea/doc-index-retriever:latest --build-arg https_pro ```bash export host_ip="YOUR IP ADDR" -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export HF_TOKEN=${your_hf_api_token} ``` Set environment variables by @@ -81,7 +81,7 @@ In that case, start Docker Containers with compose_without_rerank.yaml ```bash export host_ip="YOUR IP ADDR" -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export HF_TOKEN=${your_hf_api_token} export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" cd GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon docker compose -f compose_without_rerank.yaml up -d diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml index 252a01f2e9..457afaae46 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml @@ -28,7 +28,7 @@ services: REDIS_HOST: ${REDIS_HOST} INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] @@ -49,7 +49,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} host_ip: ${host_ip} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] @@ -69,7 +69,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} LOGFLAG: ${LOGFLAG} restart: unless-stopped @@ -87,7 +87,7 @@ services: https_proxy: ${https_proxy} REDIS_URL: ${REDIS_URL} INDEX_NAME: ${INDEX_NAME} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" @@ -105,7 +105,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 host_ip: ${host_ip} @@ -129,7 +129,7 @@ services: https_proxy: ${https_proxy} RERANK_TYPE: ${RERANK_TYPE} TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 LOGFLAG: ${LOGFLAG} diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_milvus.yaml b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_milvus.yaml index 59c60c5e81..eeacffa17b 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_milvus.yaml +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_milvus.yaml @@ -76,7 +76,7 @@ services: DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MILVUS" MILVUS_HOST: ${MILVUS_HOST} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] @@ -107,7 +107,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} host_ip: ${host_ip} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] @@ -130,7 +130,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} LOGFLAG: ${LOGFLAG} restart: unless-stopped @@ -148,7 +148,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} MILVUS_HOST: ${host_ip} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_MILVUS" @@ -167,7 +167,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 host_ip: ${host_ip} @@ -194,7 +194,7 @@ services: https_proxy: ${https_proxy} RERANK_TYPE: ${RERANK_TYPE} TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 LOGFLAG: ${LOGFLAG} diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml index d99d8e7b35..99af075420 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml @@ -25,7 +25,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME:-rag-redis} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] @@ -46,7 +46,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} host_ip: ${host_ip} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] @@ -66,7 +66,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 LOGFLAG: ${LOGFLAG} restart: unless-stopped @@ -84,7 +84,7 @@ services: https_proxy: ${https_proxy} REDIS_URL: redis://redis-vector-db:6379 INDEX_NAME: ${INDEX_NAME:-rag-redis} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/set_env.sh b/DocIndexRetriever/docker_compose/intel/cpu/xeon/set_env.sh index ca8818e065..72cce36ebe 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/set_env.sh +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/set_env.sh @@ -12,7 +12,7 @@ export RERANK_MODEL_ID="BAAI/bge-reranker-base" export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006" export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808" export TGI_LLM_ENDPOINT="http://${ip_address}:8008" -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export EMBEDDING_SERVICE_HOST_IP=${ip_address} export RETRIEVER_SERVICE_HOST_IP=${ip_address} diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md index 01a4dceb38..f8b0dac2ee 100644 --- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md +++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md @@ -43,7 +43,7 @@ docker build --no-cache -t opea/doc-index-retriever:latest --build-arg https_pro ```bash export host_ip="YOUR IP ADDR" -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export HF_TOKEN=${your_hf_api_token} export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export RERANK_MODEL_ID="BAAI/bge-reranker-base" export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090" diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml index 3b17350218..ded82b6c1b 100644 --- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml @@ -28,7 +28,7 @@ services: REDIS_URL: ${REDIS_URL} INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] interval: 10s @@ -76,7 +76,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} restart: unless-stopped retriever: @@ -96,7 +96,7 @@ services: LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} restart: unless-stopped tei-reranking-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 @@ -111,7 +111,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 host_ip: ${host_ip} @@ -135,7 +135,7 @@ services: https_proxy: ${https_proxy} RERANK_TYPE: ${RERANK_TYPE} TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 LOGFLAG: ${LOGFLAG} diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose_milvus.yaml b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose_milvus.yaml index 49f8d07a05..c05ec1cce4 100644 --- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose_milvus.yaml +++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose_milvus.yaml @@ -76,7 +76,7 @@ services: DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MILVUS" MILVUS_HOST: ${MILVUS_HOST} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] @@ -136,7 +136,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} LOGFLAG: ${LOGFLAG} restart: unless-stopped @@ -154,7 +154,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} MILVUS_HOST: ${host_ip} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_MILVUS" @@ -173,7 +173,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 host_ip: ${host_ip} @@ -200,7 +200,7 @@ services: https_proxy: ${https_proxy} RERANK_TYPE: ${RERANK_TYPE} TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 LOGFLAG: ${LOGFLAG} diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/set_env.sh b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/set_env.sh index 0c2b818df4..4d91fe236b 100644 --- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/set_env.sh @@ -11,7 +11,7 @@ export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export RERANK_MODEL_ID="BAAI/bge-reranker-base" export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:8090" export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808" -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export EMBEDDING_SERVICE_HOST_IP=${ip_address} export RETRIEVER_SERVICE_HOST_IP=${ip_address} diff --git a/DocIndexRetriever/tests/README.md b/DocIndexRetriever/tests/README.md index be057c4239..98b2476b8f 100644 --- a/DocIndexRetriever/tests/README.md +++ b/DocIndexRetriever/tests/README.md @@ -3,7 +3,7 @@ ## Set the required environment variable ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` ## Run test diff --git a/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh b/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh index dde5d84ef9..37c477b2ad 100644 --- a/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh +++ b/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh @@ -39,16 +39,7 @@ function build_docker_images() { function start_services() { echo "Starting Docker Services...." cd $WORKPATH/docker_compose/intel/cpu/xeon - export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006" - export REDIS_URL="redis://${ip_address}:6379" - export INDEX_NAME="rag-redis" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export MEGA_SERVICE_HOST_IP=${ip_address} - export EMBEDDING_SERVICE_HOST_IP=${ip_address} - export RETRIEVER_SERVICE_HOST_IP=${ip_address} - export host_ip=${ip_address} - export LOGFLAG=true + source ./set_env.sh # Start Docker Containers docker compose -f compose_without_rerank.yaml up -d From 4d0b5c43470ec82702ccf01adc663a1741cdf394 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Fri, 6 Jun 2025 16:36:44 +0800 Subject: [PATCH 23/44] update secrets token name for EdgeCraftRag, FinanceAgent, GraphRAG and HybridRAG (#2037) Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- EdgeCraftRAG/README.md | 2 +- EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml | 2 +- .../docker_compose/intel/gpu/arc/compose_gradio.yaml | 2 +- EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml | 2 +- EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh | 2 +- EdgeCraftRAG/tests/README.md | 2 +- FinanceAgent/docker_compose/intel/hpu/gaudi/compose.yaml | 2 +- FinanceAgent/docker_compose/intel/set_env.sh | 2 +- FinanceAgent/tests/test_compose_on_gaudi.sh | 2 +- GraphRAG/README.md | 2 +- GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh | 5 ++--- GraphRAG/tests/README.md | 2 +- 12 files changed, 13 insertions(+), 14 deletions(-) diff --git a/EdgeCraftRAG/README.md b/EdgeCraftRAG/README.md index 9a6216bdb6..93546869da 100755 --- a/EdgeCraftRAG/README.md +++ b/EdgeCraftRAG/README.md @@ -96,7 +96,7 @@ Set up Additional Environment Variables and start with compose_vllm.yaml export LLM_MODEL=#your model id export VLLM_SERVICE_PORT=8008 export vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT}" -export HUGGINGFACEHUB_API_TOKEN=#your HF token +export HF_TOKEN=#your HF token docker compose -f compose_vllm.yaml up -d ``` diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml index 281dc16132..e4465e0e7f 100644 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml @@ -71,7 +71,7 @@ services: # HTTP_PROXY: ${https_proxy} # VLLM_OPENVINO_DEVICE: GPU # HF_ENDPOINT: ${HF_ENDPOINT} - # HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + # HF_TOKEN: ${HF_TOKEN} # volumes: # - /dev/dri/by-path:/dev/dri/by-path # - $HOME/.cache/huggingface:/root/.cache/huggingface diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_gradio.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_gradio.yaml index 9204351fd4..f753a17460 100644 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_gradio.yaml +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_gradio.yaml @@ -71,7 +71,7 @@ services: # HTTP_PROXY: ${https_proxy} # VLLM_OPENVINO_DEVICE: GPU # HF_ENDPOINT: ${HF_ENDPOINT} - # HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + # HF_TOKEN: ${HF_TOKEN} # volumes: # - /dev/dri/by-path:/dev/dri/by-path # - $HOME/.cache/huggingface:/root/.cache/huggingface diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml index 1204e5f0b2..d1811a4aca 100644 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml @@ -72,7 +72,7 @@ services: https_proxy: ${https_proxy} VLLM_OPENVINO_DEVICE: GPU HF_ENDPOINT: ${HF_ENDPOINT} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} volumes: - ${HF_CACHE:-${HOME}/.cache}:/root/.cache devices: diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh b/EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh index eef0ebd201..c70928a492 100644 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh @@ -13,5 +13,5 @@ export HOST_IP=${HOST_IP} export LLM_MODEL=${LLM_MODEL} export HF_ENDPOINT=${HF_ENDPOINT} export vLLM_ENDPOINT=${vLLM_ENDPOINT} -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export no_proxy="localhost, 127.0.0.1, 192.168.1.1" diff --git a/EdgeCraftRAG/tests/README.md b/EdgeCraftRAG/tests/README.md index 3b2f72e0c1..ec08f640b1 100644 --- a/EdgeCraftRAG/tests/README.md +++ b/EdgeCraftRAG/tests/README.md @@ -3,7 +3,7 @@ ## Set the required environment variable ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` ## Run test diff --git a/FinanceAgent/docker_compose/intel/hpu/gaudi/compose.yaml b/FinanceAgent/docker_compose/intel/hpu/gaudi/compose.yaml index e788c5899a..1edc6f0796 100644 --- a/FinanceAgent/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/FinanceAgent/docker_compose/intel/hpu/gaudi/compose.yaml @@ -11,7 +11,7 @@ x-common-environment: x-common-agent-environment: &common-agent-env <<: *common-env - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} llm_endpoint_url: ${LLM_ENDPOINT} model: ${LLM_MODEL_ID} REDIS_URL_VECTOR: ${REDIS_URL_VECTOR} diff --git a/FinanceAgent/docker_compose/intel/set_env.sh b/FinanceAgent/docker_compose/intel/set_env.sh index 16893f3ab5..c8a36fabb0 100644 --- a/FinanceAgent/docker_compose/intel/set_env.sh +++ b/FinanceAgent/docker_compose/intel/set_env.sh @@ -42,7 +42,7 @@ export EMBEDDING_MODEL_ID="${EMBEDDING_MODEL_ID:-BAAI/bge-base-en-v1.5}" export TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${TEI_EMBEDDER_PORT}" # Hugging Face API token -export HUGGINGFACEHUB_API_TOKEN="${HF_TOKEN}" +export HF_TOKEN="${HF_TOKEN}" # Recursion limits export RECURSION_LIMIT_WORKER="${RECURSION_LIMIT_WORKER:-12}" diff --git a/FinanceAgent/tests/test_compose_on_gaudi.sh b/FinanceAgent/tests/test_compose_on_gaudi.sh index d534ffa122..cb0f594422 100644 --- a/FinanceAgent/tests/test_compose_on_gaudi.sh +++ b/FinanceAgent/tests/test_compose_on_gaudi.sh @@ -38,7 +38,7 @@ export RECURSION_LIMIT_WORKER="${RECURSION_LIMIT_WORKER:-12}" export RECURSION_LIMIT_SUPERVISOR="${RECURSION_LIMIT_SUPERVISOR:-10}" # Hugging Face API token -export HUGGINGFACEHUB_API_TOKEN="${HF_TOKEN}" +export HF_TOKEN="${HF_TOKEN}" # LLM configuration export TEMPERATURE="${TEMPERATURE:-0.5}" diff --git a/GraphRAG/README.md b/GraphRAG/README.md index 0cdc3b5905..0870b3d829 100644 --- a/GraphRAG/README.md +++ b/GraphRAG/README.md @@ -33,7 +33,7 @@ To set up environment variables for deploying GraphRAG services, follow these st export NEO4J_PASSWORD=${your_neo4j_password} export PYTHONPATH=${path_to_comps} export OPENAI_KEY=${your_openai_api_key} #optional, when not provided will use smaller models TGI/TEI - export HUGGINGFACEHUB_API_TOKEN=${your_hf_token} #needed for TGI/TEI models + export HF_TOKEN=${your_hf_token} #needed for TGI/TEI models ``` 2. If you are in a proxy environment, also set the proxy-related environment variables: diff --git a/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh b/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh index 441ea183be..d5b7e64b5b 100644 --- a/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh @@ -5,14 +5,13 @@ # Remember to set your private variables mentioned in README -# host_ip, OPENAI_API_KEY, HUGGINGFACEHUB_API_TOKEN, proxies... +# host_ip, OPENAI_API_KEY, HF_TOKEN, proxies... pushd "../../../../../" > /dev/null source .set_env.sh popd > /dev/null host_ip=$(hostname -I | awk '{print $1}') -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export TEI_EMBEDDER_PORT=11633 export LLM_ENDPOINT_PORT=11634 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" diff --git a/GraphRAG/tests/README.md b/GraphRAG/tests/README.md index daf4788df2..3f41f1851c 100644 --- a/GraphRAG/tests/README.md +++ b/GraphRAG/tests/README.md @@ -3,7 +3,7 @@ ## Set the required environment variable ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` ## Run test From fdbb0bf1e527b0f192d358af7a4095f690d6ab8f Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Fri, 6 Jun 2025 16:37:11 +0800 Subject: [PATCH 24/44] update secrets token name for ProductivitySuite, RerankFinetuning, SearchQnA and Translation (#2038) update secrets token name for ProductivitySuite, RerankFinetuning, SearchQnA and Translation Fix shellcheck issue Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- .../docker_compose/intel/cpu/xeon/README.md | 6 +++--- .../docker_compose/intel/cpu/xeon/compose.yaml | 14 +++++++------- .../docker_compose/intel/cpu/xeon/set_env.sh | 3 ++- ProductivitySuite/tests/README.md | 2 +- RerankFinetuning/tests/test_compose_on_gaudi.sh | 1 + RerankFinetuning/tests/test_compose_on_xeon.sh | 1 + SearchQnA/docker_compose/amd/gpu/rocm/README.md | 4 ++-- SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh | 2 +- .../docker_compose/amd/gpu/rocm/set_env_vllm.sh | 2 +- SearchQnA/docker_compose/intel/cpu/xeon/README.md | 2 +- .../docker_compose/intel/cpu/xeon/compose.yaml | 8 ++++---- SearchQnA/docker_compose/intel/hpu/gaudi/README.md | 2 +- .../docker_compose/intel/hpu/gaudi/compose.yaml | 8 ++++---- SearchQnA/docker_compose/intel/set_env.sh | 2 +- SearchQnA/tests/README.md | 2 +- Translation/docker_compose/amd/gpu/rocm/set_env.sh | 2 +- .../docker_compose/amd/gpu/rocm/set_env_vllm.sh | 2 +- .../docker_compose/intel/cpu/xeon/compose.yaml | 4 ++-- .../docker_compose/intel/hpu/gaudi/compose.yaml | 4 ++-- Translation/docker_compose/intel/set_env.sh | 2 +- Translation/tests/README.md | 2 +- 21 files changed, 39 insertions(+), 36 deletions(-) diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md index 91921c8c23..55af17f1e8 100644 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md @@ -108,12 +108,12 @@ Since the `compose.yaml` will consume some environment variables, you need to se export host_ip="External_Public_IP" ``` -**Export the value of your Huggingface API token to the `HUGGINGFACEHUB_API_TOKEN` environment variable** +**Export the value of your Huggingface API token to the `HF_TOKEN` environment variable** > Change the Your_Huggingface_API_Token below with tyour actual Huggingface API Token value ``` -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` **Append the value of the public IP address to the no_proxy list** @@ -129,7 +129,7 @@ export RERANK_MODEL_ID="BAAI/bge-reranker-base" export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export LLM_MODEL_ID_CODEGEN="meta-llama/CodeLlama-7b-hf" export INDEX_NAME="rag-redis" -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna" export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete" export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen" diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml index 00a16c1670..ddc8790951 100644 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml @@ -29,7 +29,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} DATAPREP_TYPE: ${DATAPREP_TYPE} LOGFLAG: ${LOGFLAG} healthcheck: @@ -73,7 +73,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped @@ -90,7 +90,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 host_ip: ${host_ip} @@ -111,7 +111,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 host_ip: ${host_ip} @@ -167,7 +167,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} host_ip: ${host_ip} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] @@ -190,7 +190,7 @@ services: https_proxy: ${https_proxy} LLM_ENDPOINT: http://tgi_service_codegen:80 LLM_MODEL_ID: ${LLM_MODEL_ID_CODEGEN} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} restart: unless-stopped codegen-xeon-backend-server: @@ -303,7 +303,7 @@ services: https_proxy: ${https_proxy} LLM_ENDPOINT: http://tgi-service:80 LLM_MODEL_ID: ${LLM_MODEL_ID} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS:-1024} MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS:-2048} DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME} diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh b/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh index a70561f28b..f88f72e444 100755 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 pushd "../../../../../" > /dev/null @@ -10,7 +11,7 @@ export RERANK_MODEL_ID="BAAI/bge-reranker-base" export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export LLM_MODEL_ID_CODEGEN="Intel/neural-chat-7b-v3-3" export INDEX_NAME="rag-redis" -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna" export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete" export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen" diff --git a/ProductivitySuite/tests/README.md b/ProductivitySuite/tests/README.md index a7bc0ab7ce..fdd2bce112 100644 --- a/ProductivitySuite/tests/README.md +++ b/ProductivitySuite/tests/README.md @@ -3,7 +3,7 @@ ## Set the required environment variable ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` ## Run test diff --git a/RerankFinetuning/tests/test_compose_on_gaudi.sh b/RerankFinetuning/tests/test_compose_on_gaudi.sh index dca0bc2512..db6c468b6f 100644 --- a/RerankFinetuning/tests/test_compose_on_gaudi.sh +++ b/RerankFinetuning/tests/test_compose_on_gaudi.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/RerankFinetuning/tests/test_compose_on_xeon.sh b/RerankFinetuning/tests/test_compose_on_xeon.sh index ca9faa0222..3b9cf0786f 100644 --- a/RerankFinetuning/tests/test_compose_on_xeon.sh +++ b/RerankFinetuning/tests/test_compose_on_xeon.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README.md b/SearchQnA/docker_compose/amd/gpu/rocm/README.md index 40533aac9f..4146dbbe92 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README.md +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README.md @@ -50,7 +50,7 @@ To set up environment variables for deploying SearchQnA services, set up some pa export host_ip="External_Public_IP" # ip address of the node export GOOGLE_CSE_ID="your cse id" export GOOGLE_API_KEY="your google api key" -export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token" +export HF_TOKEN="Your_HuggingFace_API_Token" export http_proxy="Your_HTTP_Proxy" # http proxy if any export https_proxy="Your_HTTPs_Proxy" # https proxy if any export no_proxy=localhost,127.0.0.1,$host_ip # additional no proxies if needed @@ -64,7 +64,7 @@ source ./set_env_vllm.sh export host_ip="External_Public_IP" # ip address of the node export GOOGLE_CSE_ID="your cse id" export GOOGLE_API_KEY="your google api key" -export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token" +export HF_TOKEN="Your_HuggingFace_API_Token" export http_proxy="Your_HTTP_Proxy" # http proxy if any export https_proxy="Your_HTTPs_Proxy" # https proxy if any export no_proxy=localhost,127.0.0.1,$host_ip # additional no proxies if needed diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh b/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh index 3d84e01fcf..faeca0ae51 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh +++ b/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -8,7 +8,7 @@ export EXTERNAL_HOST_IP=${ip_address} export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5' export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID} -export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} export SEARCH_LLM_MODEL_ID='Intel/neural-chat-7b-v3-3' export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh index a891cce2a0..9ee0d24f79 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -8,7 +8,7 @@ export EXTERNAL_HOST_IP=${ip_address} export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5' export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID} -export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} export SEARCH_LLM_MODEL_ID='Intel/neural-chat-7b-v3-3' export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' diff --git a/SearchQnA/docker_compose/intel/cpu/xeon/README.md b/SearchQnA/docker_compose/intel/cpu/xeon/README.md index 742a6ae1cd..c59d5fade5 100644 --- a/SearchQnA/docker_compose/intel/cpu/xeon/README.md +++ b/SearchQnA/docker_compose/intel/cpu/xeon/README.md @@ -43,7 +43,7 @@ To set up environment variables for deploying SearchQnA services, set up some pa export host_ip="External_Public_IP" # ip address of the node export GOOGLE_CSE_ID="your cse id" export GOOGLE_API_KEY="your google api key" -export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token" +export HF_TOKEN="Your_HuggingFace_API_Token" export http_proxy="Your_HTTP_Proxy" # http proxy if any export https_proxy="Your_HTTPs_Proxy" # https proxy if any export no_proxy=localhost,127.0.0.1,$host_ip # additional no proxies if needed diff --git a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml index 4503a645bb..dfc05a5b31 100644 --- a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -35,7 +35,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} restart: unless-stopped web-retriever: @@ -87,7 +87,7 @@ services: https_proxy: ${https_proxy} RERANK_TYPE: ${RERANK_TYPE} TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} restart: unless-stopped tgi-service: @@ -102,7 +102,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} host_ip: ${host_ip} healthcheck: test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"] @@ -125,7 +125,7 @@ services: https_proxy: ${https_proxy} LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} restart: unless-stopped searchqna-xeon-backend-server: diff --git a/SearchQnA/docker_compose/intel/hpu/gaudi/README.md b/SearchQnA/docker_compose/intel/hpu/gaudi/README.md index 611b4a3c44..4e4cedadc6 100644 --- a/SearchQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/SearchQnA/docker_compose/intel/hpu/gaudi/README.md @@ -43,7 +43,7 @@ To set up environment variables for deploying SearchQnA services, set up some pa export host_ip="External_Public_IP" # ip address of the node export GOOGLE_CSE_ID="your cse id" export GOOGLE_API_KEY="your google api key" -export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token" +export HF_TOKEN="Your_HuggingFace_API_Token" export http_proxy="Your_HTTP_Proxy" # http proxy if any export https_proxy="Your_HTTPs_Proxy" # https proxy if any export no_proxy=localhost,127.0.0.1,$host_ip # additional no proxies if needed diff --git a/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 5ff29a5d7a..6affd1fa69 100644 --- a/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -43,7 +43,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} restart: unless-stopped web-retriever: @@ -94,7 +94,7 @@ services: https_proxy: ${https_proxy} RERANK_TYPE: ${RERANK_TYPE} TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} restart: unless-stopped tgi-service: @@ -108,7 +108,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all @@ -142,7 +142,7 @@ services: https_proxy: ${https_proxy} LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 LOGFLAG: ${LOGFLAG} diff --git a/SearchQnA/docker_compose/intel/set_env.sh b/SearchQnA/docker_compose/intel/set_env.sh index 45aaa7eb48..9680c13af0 100644 --- a/SearchQnA/docker_compose/intel/set_env.sh +++ b/SearchQnA/docker_compose/intel/set_env.sh @@ -8,7 +8,7 @@ popd > /dev/null export GOOGLE_CSE_ID=$GOOGLE_CSE_ID export GOOGLE_API_KEY=$GOOGLE_API_KEY -export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN +export HF_TOKEN=$HF_TOKEN export EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5 export TEI_EMBEDDING_ENDPOINT=http://${host_ip}:3001 export RERANK_MODEL_ID=BAAI/bge-reranker-base diff --git a/SearchQnA/tests/README.md b/SearchQnA/tests/README.md index 4dd235fbb6..652d025076 100644 --- a/SearchQnA/tests/README.md +++ b/SearchQnA/tests/README.md @@ -3,7 +3,7 @@ ## Set the required environment variable ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` ## Run test diff --git a/Translation/docker_compose/amd/gpu/rocm/set_env.sh b/Translation/docker_compose/amd/gpu/rocm/set_env.sh index dc7dbe85ac..e0457d8395 100644 --- a/Translation/docker_compose/amd/gpu/rocm/set_env.sh +++ b/Translation/docker_compose/amd/gpu/rocm/set_env.sh @@ -9,7 +9,7 @@ export TRANSLATION_HOST_IP=${host_ip} export TRANSLATION_EXTERNAL_HOST_IP=${host_ip} export TRANSLATION_LLM_MODEL_ID="haoranxu/ALMA-13B" export TRANSLATION_TGI_LLM_ENDPOINT="http://${TRANSLATION_HOST_IP}:8008" -export TRANSLATION_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export TRANSLATION_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} export TRANSLATION_MEGA_SERVICE_HOST_IP=${TRANSLATION_HOST_IP} export TRANSLATION_LLM_SERVICE_HOST_IP=${TRANSLATION_HOST_IP} export TRANSLATION_FRONTEND_SERVICE_IP=${TRANSLATION_HOST_IP} diff --git a/Translation/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/Translation/docker_compose/amd/gpu/rocm/set_env_vllm.sh index 4ebd939baa..772430ac0c 100644 --- a/Translation/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/Translation/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -11,7 +11,7 @@ export TRANSLATION_LLM_MODEL_ID="haoranxu/ALMA-13B" export TRANSLATION_VLLM_SERVICE_PORT=8088 export TRANSLATION_LLM_ENDPOINT="http://${HOST_IP}:${TRANSLATION_VLLM_SERVICE_PORT}" export TRANSLATION_LLM_PORT=9088 -export TRANSLATION_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export TRANSLATION_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} export TRANSLATION_MEGA_SERVICE_HOST_IP=${HOST_IP} export TRANSLATION_LLM_SERVICE_HOST_IP=${HOST_IP} export TRANSLATION_FRONTEND_SERVICE_IP=${HOST_IP} diff --git a/Translation/docker_compose/intel/cpu/xeon/compose.yaml b/Translation/docker_compose/intel/cpu/xeon/compose.yaml index 4b77d84484..1c3d115741 100644 --- a/Translation/docker_compose/intel/cpu/xeon/compose.yaml +++ b/Translation/docker_compose/intel/cpu/xeon/compose.yaml @@ -11,7 +11,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 host_ip: ${host_ip} @@ -39,7 +39,7 @@ services: https_proxy: ${https_proxy} LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 restart: unless-stopped diff --git a/Translation/docker_compose/intel/hpu/gaudi/compose.yaml b/Translation/docker_compose/intel/hpu/gaudi/compose.yaml index 9516e60ce6..92661ab552 100644 --- a/Translation/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/Translation/docker_compose/intel/hpu/gaudi/compose.yaml @@ -11,7 +11,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all @@ -47,7 +47,7 @@ services: https_proxy: ${https_proxy} LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 restart: unless-stopped diff --git a/Translation/docker_compose/intel/set_env.sh b/Translation/docker_compose/intel/set_env.sh index 37762fbd50..931ea2716a 100644 --- a/Translation/docker_compose/intel/set_env.sh +++ b/Translation/docker_compose/intel/set_env.sh @@ -9,7 +9,7 @@ popd > /dev/null export LLM_MODEL_ID="haoranxu/ALMA-13B" export TGI_LLM_ENDPOINT="http://${host_ip}:8008" -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/translation" diff --git a/Translation/tests/README.md b/Translation/tests/README.md index ece64cf149..7a544febb2 100644 --- a/Translation/tests/README.md +++ b/Translation/tests/README.md @@ -3,7 +3,7 @@ ## Set the required environment variable ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` ## Run test From b27a6d3f3da00fcda4c0aa2380f434c0f4a6953b Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Fri, 6 Jun 2025 16:37:39 +0800 Subject: [PATCH 25/44] update secrets token name for InstructionTuning, MultimodalQnA and WorkflowExecAgent (#2039) update secrets token name for InstructionTuning, MultimodalQnA and WorkflowExecAgent Fix shellcheck issue Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- InstructionTuning/tests/README.md | 2 +- MultimodalQnA/docker_compose/amd/gpu/rocm/README.md | 4 ++-- MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml | 2 +- .../docker_compose/intel/cpu/xeon/compose_milvus.yaml | 2 +- MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml | 2 +- .../docker_compose/intel/hpu/gaudi/compose_milvus.yaml | 2 +- MultimodalQnA/docker_compose/intel/set_env.sh | 2 +- MultimodalQnA/tests/README.md | 2 +- WorkflowExecAgent/README.md | 2 +- WorkflowExecAgent/tests/2_start_vllm_service.sh | 2 +- WorkflowExecAgent/tests/3_launch_and_validate_agent.sh | 2 +- WorkflowExecAgent/tests/README.md | 2 +- WorkflowExecAgent/tests/test_compose_vllm_on_xeon.sh | 1 + 13 files changed, 14 insertions(+), 13 deletions(-) diff --git a/InstructionTuning/tests/README.md b/InstructionTuning/tests/README.md index fd43a2b4a1..19d617b426 100644 --- a/InstructionTuning/tests/README.md +++ b/InstructionTuning/tests/README.md @@ -3,7 +3,7 @@ ## Set the required environment variable ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` ## Run test diff --git a/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md b/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md index 14e66d989a..f6bc0e8d1a 100644 --- a/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md +++ b/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md @@ -165,11 +165,11 @@ Use AMD GPU driver utilities to determine the correct `cardN` and `renderN` IDs #### Setting variables in the operating system environment: -##### Set variable HUGGINGFACEHUB_API_TOKEN: +##### Set variable HF_TOKEN: ```bash ### Replace the string 'your_huggingfacehub_token' with your HuggingFacehub repository access token. -export HUGGINGFACEHUB_API_TOKEN='your_huggingfacehub_token' +export HF_TOKEN='your_huggingfacehub_token' ``` #### Set variables value in set_env\*\*\*\*.sh file: diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml index 2f2318de07..3de373be5d 100644 --- a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -49,7 +49,7 @@ services: DATAPREP_MMR_PORT: ${DATAPREP_MMR_PORT} INDEX_NAME: ${INDEX_NAME} LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:${LVM_PORT}/v1/lvm" - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} MULTIMODAL_DATAPREP: true DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS" healthcheck: diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml index 250d2633a5..257f033745 100644 --- a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml +++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml @@ -91,7 +91,7 @@ services: MILVUS_HOST: ${MILVUS_HOST} COLLECTION_NAME: ${COLLECTION_NAME:-LangChainCollection} LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:${LVM_PORT}/v1/lvm" - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} restart: unless-stopped embedding-multimodal-bridgetower: diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml index c3dcc9f8cc..ae68d329a3 100644 --- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -51,7 +51,7 @@ services: DATAPREP_MMR_PORT: ${DATAPREP_MMR_PORT} INDEX_NAME: ${INDEX_NAME} LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:${LVM_PORT}/v1/lvm" - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} MULTIMODAL_DATAPREP: true DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS" healthcheck: diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose_milvus.yaml b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose_milvus.yaml index 165760003c..4c1019785c 100644 --- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose_milvus.yaml +++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose_milvus.yaml @@ -89,7 +89,7 @@ services: MILVUS_HOST: ${MILVUS_HOST} COLLECTION_NAME: ${COLLECTION_NAME} LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:${LVM_PORT}/v1/lvm" - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} restart: unless-stopped embedding-multimodal-bridgetower-gaudi: image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower-gaudi:${TAG:-latest} diff --git a/MultimodalQnA/docker_compose/intel/set_env.sh b/MultimodalQnA/docker_compose/intel/set_env.sh index 8d31674a29..b2caa3ad0b 100755 --- a/MultimodalQnA/docker_compose/intel/set_env.sh +++ b/MultimodalQnA/docker_compose/intel/set_env.sh @@ -7,7 +7,7 @@ source .set_env.sh popd > /dev/null export host_ip=$(hostname -I | awk '{print $1}') -export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} +export HF_TOKEN=${HF_TOKEN} export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip} export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip} export LVM_SERVICE_HOST_IP=${host_ip} diff --git a/MultimodalQnA/tests/README.md b/MultimodalQnA/tests/README.md index 279576500f..0aa33095cc 100644 --- a/MultimodalQnA/tests/README.md +++ b/MultimodalQnA/tests/README.md @@ -3,7 +3,7 @@ ## Set the required environment variable ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` ## Run test diff --git a/WorkflowExecAgent/README.md b/WorkflowExecAgent/README.md index 0a4b7f333e..402913775d 100644 --- a/WorkflowExecAgent/README.md +++ b/WorkflowExecAgent/README.md @@ -85,7 +85,7 @@ Configure `GenAIExamples/WorkflowExecAgent/docker_compose/.env` file with the fo ```sh export SDK_BASE_URL=${SDK_BASE_URL} export SERVING_TOKEN=${SERVING_TOKEN} -export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} +export HF_TOKEN=${HF_TOKEN} export llm_engine=${llm_engine} export llm_endpoint_url=${llm_endpoint_url} export ip_address=$(hostname -I | awk '{print $1}') diff --git a/WorkflowExecAgent/tests/2_start_vllm_service.sh b/WorkflowExecAgent/tests/2_start_vllm_service.sh index a058864c37..73c43d4b44 100644 --- a/WorkflowExecAgent/tests/2_start_vllm_service.sh +++ b/WorkflowExecAgent/tests/2_start_vllm_service.sh @@ -10,7 +10,7 @@ vllm_port=${vllm_port} [[ -z "$vllm_port" ]] && vllm_port=8084 model=mistralai/Mistral-7B-Instruct-v0.3 export WORKDIR=$WORKPATH/../../ -export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} function build_vllm_docker_image() { echo "Building the vllm docker images" diff --git a/WorkflowExecAgent/tests/3_launch_and_validate_agent.sh b/WorkflowExecAgent/tests/3_launch_and_validate_agent.sh index 5c9e6da583..3fa75920c3 100644 --- a/WorkflowExecAgent/tests/3_launch_and_validate_agent.sh +++ b/WorkflowExecAgent/tests/3_launch_and_validate_agent.sh @@ -12,7 +12,7 @@ export WORKDIR=$WORKPATH/../../ echo "WORKDIR=${WORKDIR}" export SDK_BASE_URL=${SDK_BASE_URL} export SERVING_TOKEN=${SERVING_TOKEN} -export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export llm_engine=vllm export ip_address=$(hostname -I | awk '{print $1}') export llm_endpoint_url=http://${ip_address}:${vllm_port} diff --git a/WorkflowExecAgent/tests/README.md b/WorkflowExecAgent/tests/README.md index 1dbaab6e93..9f20e96b5e 100644 --- a/WorkflowExecAgent/tests/README.md +++ b/WorkflowExecAgent/tests/README.md @@ -9,7 +9,7 @@ Configure necessary variables as listed below. Replace the variables according t ```sh export SDK_BASE_URL=${SDK_BASE_URL} export SERVING_TOKEN=${SERVING_TOKEN} -export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} +export HF_TOKEN=${HF_TOKEN} export workflow_id=${workflow_id} # workflow_id of the serving workflow export vllm_port=${vllm_port} # vllm serving port export ip_address=$(hostname -I | awk '{print $1}') diff --git a/WorkflowExecAgent/tests/test_compose_vllm_on_xeon.sh b/WorkflowExecAgent/tests/test_compose_vllm_on_xeon.sh index d1faa05a85..f9352214cc 100644 --- a/WorkflowExecAgent/tests/test_compose_vllm_on_xeon.sh +++ b/WorkflowExecAgent/tests/test_compose_vllm_on_xeon.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 From a797945041ba5dc8096801683149c8592a671cb6 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Fri, 6 Jun 2025 16:40:18 +0800 Subject: [PATCH 26/44] update secrets token name for DocSum. (#2036) Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- DocSum/docker_compose/amd/gpu/rocm/README.md | 2 +- DocSum/docker_compose/amd/gpu/rocm/set_env.sh | 2 +- DocSum/docker_compose/amd/gpu/rocm/set_env_vllm.sh | 2 +- DocSum/docker_compose/intel/cpu/xeon/compose.yaml | 6 +++--- DocSum/docker_compose/intel/cpu/xeon/compose_tgi.yaml | 6 +++--- DocSum/docker_compose/intel/hpu/gaudi/compose.yaml | 6 +++--- DocSum/docker_compose/intel/hpu/gaudi/compose_tgi.yaml | 6 +++--- DocSum/docker_compose/intel/set_env.sh | 2 +- DocSum/tests/README.md | 2 +- 9 files changed, 17 insertions(+), 17 deletions(-) diff --git a/DocSum/docker_compose/amd/gpu/rocm/README.md b/DocSum/docker_compose/amd/gpu/rocm/README.md index da9d7d749f..1c765cbd81 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/README.md +++ b/DocSum/docker_compose/amd/gpu/rocm/README.md @@ -65,7 +65,7 @@ Set the values of the variables: Setting variables in the operating system environment: ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token" +export HF_TOKEN="Your_HuggingFace_API_Token" source ./set_env_*.sh # replace the script name with the appropriate one ``` diff --git a/DocSum/docker_compose/amd/gpu/rocm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm/set_env.sh index f597849987..771bce2297 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/set_env.sh +++ b/DocSum/docker_compose/amd/gpu/rocm/set_env.sh @@ -9,7 +9,7 @@ export DOCSUM_MAX_TOTAL_TOKENS="4096" export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export DOCSUM_TGI_SERVICE_PORT="8008" export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} export DOCSUM_WHISPER_PORT="7066" export ASR_SERVICE_HOST_IP="${HOST_IP}" export DOCSUM_LLM_SERVER_PORT="9000" diff --git a/DocSum/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/DocSum/docker_compose/amd/gpu/rocm/set_env_vllm.sh index 800e502071..1c33250fde 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/DocSum/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -4,7 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 export HOST_IP=${ip_address} -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} export DOCSUM_MAX_INPUT_TOKENS=2048 export DOCSUM_MAX_TOTAL_TOKENS=4096 export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml index d6aa67ced3..5489325d50 100644 --- a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml +++ b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml @@ -14,7 +14,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LLM_MODEL_ID: ${LLM_MODEL_ID} VLLM_TORCH_PROFILER_DIR: "/mnt" VLLM_CPU_KVCACHE_SPACE: 40 @@ -40,8 +40,8 @@ services: https_proxy: ${https_proxy} LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN} MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME} diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose_tgi.yaml index 2343d726c7..b929900830 100644 --- a/DocSum/docker_compose/intel/cpu/xeon/compose_tgi.yaml +++ b/DocSum/docker_compose/intel/cpu/xeon/compose_tgi.yaml @@ -14,7 +14,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} host_ip: ${host_ip} healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] @@ -39,8 +39,8 @@ services: https_proxy: ${https_proxy} LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN} MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME} diff --git a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml index 2efa09e890..739a41feba 100644 --- a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml @@ -13,7 +13,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none LLM_MODEL_ID: ${LLM_MODEL_ID} @@ -44,8 +44,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN} MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} LLM_ENDPOINT: ${LLM_ENDPOINT} diff --git a/DocSum/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/DocSum/docker_compose/intel/hpu/gaudi/compose_tgi.yaml index 6b922ebc68..987706b0ee 100644 --- a/DocSum/docker_compose/intel/hpu/gaudi/compose_tgi.yaml +++ b/DocSum/docker_compose/intel/hpu/gaudi/compose_tgi.yaml @@ -13,7 +13,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all @@ -48,8 +48,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN} MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} LLM_ENDPOINT: ${LLM_ENDPOINT} diff --git a/DocSum/docker_compose/intel/set_env.sh b/DocSum/docker_compose/intel/set_env.sh index 1d9a013375..a0271fb8f4 100644 --- a/DocSum/docker_compose/intel/set_env.sh +++ b/DocSum/docker_compose/intel/set_env.sh @@ -10,7 +10,7 @@ export host_ip=$(hostname -I | awk '{print $1}') # Example: host_ip="192.168.1.1 export no_proxy="${no_proxy},${host_ip}" # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" export http_proxy=$http_proxy export https_proxy=$https_proxy -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export LLM_ENDPOINT_PORT=8008 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" diff --git a/DocSum/tests/README.md b/DocSum/tests/README.md index 6d5f55c7f1..2d002f3485 100644 --- a/DocSum/tests/README.md +++ b/DocSum/tests/README.md @@ -3,7 +3,7 @@ ## Set the required environment variable ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` ## Run test From 81a88419e9d96518844c6004facad87c237e3b34 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Fri, 6 Jun 2025 16:40:37 +0800 Subject: [PATCH 27/44] update secrets token name for VideoQnA and VisualQnA (#2040) Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- VideoQnA/docker_compose/intel/cpu/xeon/README.md | 2 +- VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml | 4 ++-- VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh | 2 +- VideoQnA/tests/test_compose_on_xeon.sh | 1 - VisualQnA/docker_compose/amd/gpu/rocm/README.md | 4 ++-- VisualQnA/docker_compose/amd/gpu/rocm/set_env.sh | 2 +- VisualQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh | 2 +- VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml | 2 +- VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml | 2 +- VisualQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml | 2 +- VisualQnA/tests/README.md | 2 +- VisualQnA/tests/test_compose_tgi_on_gaudi.sh | 2 +- VisualQnA/tests/test_compose_tgi_on_xeon.sh | 2 +- 13 files changed, 14 insertions(+), 15 deletions(-) diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/README.md b/VideoQnA/docker_compose/intel/cpu/xeon/README.md index 96b1d97ec0..6f06577af4 100644 --- a/VideoQnA/docker_compose/intel/cpu/xeon/README.md +++ b/VideoQnA/docker_compose/intel/cpu/xeon/README.md @@ -151,7 +151,7 @@ export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} export HF_TOKEN=${HF_TOKEN} -export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} +export HF_TOKEN=${HF_TOKEN} export INDEX_NAME="mega-videoqna" export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml b/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml index fd67f82eeb..4d9f7ffec4 100644 --- a/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -26,7 +26,7 @@ services: VDMS_PORT: ${VDMS_PORT} INDEX_NAME: ${INDEX_NAME} COLLECTION_NAME: ${INDEX_NAME} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} volumes: - videoqna-cache:/home/user/.cache healthcheck: @@ -64,7 +64,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} INDEX_NAME: ${INDEX_NAME} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_VDMS" VDMS_INDEX_NAME: ${INDEX_NAME} VDMS_HOST: ${VDMS_HOST} diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh b/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh index ada41f8ba9..307f849b88 100644 --- a/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ b/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh @@ -9,7 +9,7 @@ popd > /dev/null host_ip=$(hostname -I | awk '{print $1}') export HF_TOKEN=${HF_TOKEN} -export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} +export HF_TOKEN=${HF_TOKEN} export INDEX_NAME="mega-videoqna" export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download diff --git a/VideoQnA/tests/test_compose_on_xeon.sh b/VideoQnA/tests/test_compose_on_xeon.sh index d4c1b5a3b5..a2306399b2 100755 --- a/VideoQnA/tests/test_compose_on_xeon.sh +++ b/VideoQnA/tests/test_compose_on_xeon.sh @@ -17,7 +17,6 @@ export host_ip=${ip_address} function setup_env() { export HF_TOKEN=${HF_TOKEN} - export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} export INDEX_NAME="mega-videoqna" export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download diff --git a/VisualQnA/docker_compose/amd/gpu/rocm/README.md b/VisualQnA/docker_compose/amd/gpu/rocm/README.md index 1647b16b2a..08e93c2776 100644 --- a/VisualQnA/docker_compose/amd/gpu/rocm/README.md +++ b/VisualQnA/docker_compose/amd/gpu/rocm/README.md @@ -151,11 +151,11 @@ Use AMD GPU driver utilities to determine the correct `cardN` and `renderN` IDs #### Setting variables in the operating system environment: -##### Set variable HUGGINGFACEHUB_API_TOKEN: +##### Set variable HF_TOKEN: ```bash ### Replace the string 'your_huggingfacehub_token' with your HuggingFacehub repository access token. -export HUGGINGFACEHUB_API_TOKEN='your_huggingfacehub_token' +export HF_TOKEN='your_huggingfacehub_token' ``` #### Set variables value in set_env\*\*\*\*.sh file: diff --git a/VisualQnA/docker_compose/amd/gpu/rocm/set_env.sh b/VisualQnA/docker_compose/amd/gpu/rocm/set_env.sh index 1cdf88a262..48893357c3 100644 --- a/VisualQnA/docker_compose/amd/gpu/rocm/set_env.sh +++ b/VisualQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -6,7 +6,7 @@ export HOST_IP=${host_ip} export EXTERNAL_HOST_IP=${host_ip} export VISUALQNA_TGI_SERVICE_PORT="8399" -export VISUALQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export VISUALQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} export VISUALQNA_CARD_ID="card1" export VISUALQNA_RENDER_ID="renderD136" export LVM_MODEL_ID="Xkev/Llama-3.2V-11B-cot" diff --git a/VisualQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/VisualQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh index 68a1bb0b9d..b36390d840 100644 --- a/VisualQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/VisualQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -6,7 +6,7 @@ export HOST_IP=${host_ip} export EXTERNAL_HOST_IP=${host_ip} export VISUALQNA_VLLM_SERVICE_PORT="8081" -export VISUALQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export VISUALQNA_HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} export VISUALQNA_CARD_ID="card1" export VISUALQNA_RENDER_ID="renderD136" export VISUALQNA_LVM_MODEL_ID="Xkev/Llama-3.2V-11B-cot" diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml b/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml index 7c7d9c9317..47a99a6b0b 100644 --- a/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -13,7 +13,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} VLLM_TORCH_PROFILER_DIR: "/mnt" healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] diff --git a/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml index c1950a14d4..3430ac1052 100644 --- a/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -13,7 +13,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none LLM_MODEL_ID: ${LVM_MODEL_ID} diff --git a/VisualQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/VisualQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml index 251b4fce70..adfbc01543 100644 --- a/VisualQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml +++ b/VisualQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml @@ -17,7 +17,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} ENABLE_HPU_GRAPH: true LIMIT_HPU_GRAPH: true USE_FLASH_ATTENTION: true diff --git a/VisualQnA/tests/README.md b/VisualQnA/tests/README.md index 8d07371b51..664c9d7b44 100644 --- a/VisualQnA/tests/README.md +++ b/VisualQnA/tests/README.md @@ -3,7 +3,7 @@ ## Set the required environment variable ```bash -export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` ## Run test diff --git a/VisualQnA/tests/test_compose_tgi_on_gaudi.sh b/VisualQnA/tests/test_compose_tgi_on_gaudi.sh index b469166a8e..ba49821249 100644 --- a/VisualQnA/tests/test_compose_tgi_on_gaudi.sh +++ b/VisualQnA/tests/test_compose_tgi_on_gaudi.sh @@ -34,7 +34,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} export NGINX_PORT=80 export host_ip=${ip_address} source ./set_env.sh diff --git a/VisualQnA/tests/test_compose_tgi_on_xeon.sh b/VisualQnA/tests/test_compose_tgi_on_xeon.sh index 29a009904d..270b638350 100644 --- a/VisualQnA/tests/test_compose_tgi_on_xeon.sh +++ b/VisualQnA/tests/test_compose_tgi_on_xeon.sh @@ -34,7 +34,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} export NGINX_PORT=80 export host_ip=${ip_address} source ./set_env.sh From d91de605b3c7688f27063487667759743217d68e Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Mon, 9 Jun 2025 09:14:51 +0800 Subject: [PATCH 28/44] Fix shellcheck issues and update secrets TOKEN name (#2043) Signed-off-by: ZePan110 Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- .github/env/_build_image.sh | 1 + .set_env.sh | 1 + HybridRAG/docker_compose/intel/hpu/gaudi/compose.yaml | 10 +++++----- HybridRAG/docker_compose/intel/hpu/gaudi/set_env.sh | 3 +-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/env/_build_image.sh b/.github/env/_build_image.sh index d559137fed..b83b4b0f65 100644 --- a/.github/env/_build_image.sh +++ b/.github/env/_build_image.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/.set_env.sh b/.set_env.sh index 4480362d81..f0c6f4e400 100644 --- a/.set_env.sh +++ b/.set_env.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # diff --git a/HybridRAG/docker_compose/intel/hpu/gaudi/compose.yaml b/HybridRAG/docker_compose/intel/hpu/gaudi/compose.yaml index 2ae35cf5ed..d296cfeb00 100644 --- a/HybridRAG/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/HybridRAG/docker_compose/intel/hpu/gaudi/compose.yaml @@ -16,7 +16,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} INDEX_NAME: ${INDEX_NAME} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG:-False} HABANA_VISIBLE_DEVICES: all @@ -90,7 +90,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 container_name: tei-embedding-server @@ -120,7 +120,7 @@ services: REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped @@ -136,7 +136,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate @@ -152,7 +152,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LLM_MODEL_ID: ${LLM_MODEL_ID} VLLM_TORCH_PROFILER_DIR: "/mnt" VLLM_CPU_KVCACHE_SPACE: 40 diff --git a/HybridRAG/docker_compose/intel/hpu/gaudi/set_env.sh b/HybridRAG/docker_compose/intel/hpu/gaudi/set_env.sh index a828fb565d..c743b61494 100644 --- a/HybridRAG/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/HybridRAG/docker_compose/intel/hpu/gaudi/set_env.sh @@ -4,8 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 export host_ip=$(hostname -I | awk '{print $1}') -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HF_TOKEN} export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export RERANK_MODEL_ID="BAAI/bge-reranker-base" From 1e20459a170927313599a65846b5c7dda2c1b6e0 Mon Sep 17 00:00:00 2001 From: Zhu Yongbo Date: Mon, 9 Jun 2025 09:32:13 +0800 Subject: [PATCH 29/44] add new feature for EC-RAG (#2013) Signed-off-by: Yongbozzz Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com> --- .github/code_spell_ignore.txt | 3 +- EdgeCraftRAG/Dockerfile.server | 3 +- EdgeCraftRAG/README.md | 64 ++- EdgeCraftRAG/chatqna.py | 1 + .../docker_compose/intel/gpu/arc/compose.yaml | 32 +- .../intel/gpu/arc/compose_gradio.yaml | 4 +- .../intel/gpu/arc/compose_vllm.yaml | 94 ----- .../intel/gpu/arc/compose_vllm_multi-arc.yaml | 72 +++- .../docker_compose/intel/gpu/arc/set_env.sh | 11 + EdgeCraftRAG/docker_image_build/build.yaml | 5 - EdgeCraftRAG/edgecraftrag/VERSION | 1 + EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py | 44 +- EdgeCraftRAG/edgecraftrag/api/v1/data.py | 6 +- .../edgecraftrag/api/v1/knowledge_base.py | 136 +++++++ EdgeCraftRAG/edgecraftrag/api/v1/prompt.py | 58 +++ EdgeCraftRAG/edgecraftrag/api_schema.py | 14 +- EdgeCraftRAG/edgecraftrag/components/data.py | 2 +- .../edgecraftrag/components/generator.py | 41 +- .../edgecraftrag/components/knowledge_base.py | 51 +++ .../edgecraftrag/components/pipeline.py | 10 +- EdgeCraftRAG/edgecraftrag/context.py | 5 + .../controllers/knowledge_basemgr.py | 73 ++++ EdgeCraftRAG/edgecraftrag/requirements.txt | 0 EdgeCraftRAG/edgecraftrag/server.py | 4 +- EdgeCraftRAG/edgecraftrag/utils.py | 45 ++- EdgeCraftRAG/nginx/nginx.conf.template | 37 ++ ...vllm.json => test_pipeline_ipex_vllm.json} | 8 +- .../tests/test_compose_vllm_on_arc.sh | 52 +-- .../tests/test_pipeline_ipex_vllm.json | 44 ++ EdgeCraftRAG/ui/vue/.env.development | 4 +- EdgeCraftRAG/ui/vue/components.d.ts | 10 + EdgeCraftRAG/ui/vue/nginx.conf | 3 +- EdgeCraftRAG/ui/vue/package.json | 1 + EdgeCraftRAG/ui/vue/src/api/chatbot/index.ts | 25 +- .../ui/vue/src/api/knowledgeBase/index.ts | 76 ++++ EdgeCraftRAG/ui/vue/src/api/pipeline/index.ts | 8 +- EdgeCraftRAG/ui/vue/src/api/request.ts | 8 +- .../ui/vue/src/assets/iconFont/iconfont.css | 94 ++++- .../ui/vue/src/assets/iconFont/iconfont.js | 86 ++-- .../ui/vue/src/assets/iconFont/iconfont.json | 154 +++++++ .../ui/vue/src/assets/iconFont/iconfont.ttf | Bin 6480 -> 11444 bytes .../ui/vue/src/assets/iconFont/iconfont.woff | Bin 4332 -> 7448 bytes .../ui/vue/src/assets/iconFont/iconfont.woff2 | Bin 3596 -> 6272 bytes .../ui/vue/src/components/SvgIcon.vue | 2 +- EdgeCraftRAG/ui/vue/src/i18n/en.ts | 218 +++++++++- EdgeCraftRAG/ui/vue/src/i18n/index.ts | 8 +- EdgeCraftRAG/ui/vue/src/i18n/zh.ts | 238 ++++++++++- EdgeCraftRAG/ui/vue/src/layout/Header.vue | 70 +++- EdgeCraftRAG/ui/vue/src/layout/Main.vue | 35 +- EdgeCraftRAG/ui/vue/src/main.ts | 2 +- EdgeCraftRAG/ui/vue/src/store/theme.ts | 4 + EdgeCraftRAG/ui/vue/src/theme/ant.less | 3 + EdgeCraftRAG/ui/vue/src/theme/common.less | 80 ++++ EdgeCraftRAG/ui/vue/src/theme/layout.less | 8 +- EdgeCraftRAG/ui/vue/src/theme/markdown.less | 80 +++- EdgeCraftRAG/ui/vue/src/theme/variables.less | 70 +++- EdgeCraftRAG/ui/vue/src/utils/common.ts | 26 ++ .../ui/vue/src/utils/customRenderer.ts | 128 ++++++ EdgeCraftRAG/ui/vue/src/utils/mitt.ts | 8 + .../src/views/chatbot/components/Chatbot.vue | 275 ------------- .../views/chatbot/components/Chatbot/Chat.vue | 376 ++++++++++++++++++ .../components/{ => Chatbot}/ConfigDrawer.vue | 67 ++-- .../components/{ => Chatbot}/MessageItem.vue | 72 ++-- .../components/{ => Chatbot}/SseService.ts | 4 +- .../views/chatbot/components/Chatbot/index.ts | 8 + .../chatbot/components/Chatbot/index.vue | 91 +++++ .../src/views/chatbot/components/Header.vue | 118 ------ .../KnowledgeBase/KnowledgeDetial.vue | 295 ++++++++++++++ .../components/KnowledgeBase/UpdateDialog.vue | 162 ++++++++ .../chatbot/components/KnowledgeBase/index.ts | 6 + .../components/KnowledgeBase/index.vue | 302 ++++++++++++++ .../views/chatbot/components/UploadFile.vue | 276 ------------- .../vue/src/views/chatbot/components/index.ts | 11 +- .../ui/vue/src/views/chatbot/index.vue | 304 ++++++++++---- EdgeCraftRAG/ui/vue/src/views/chatbot/type.ts | 2 +- EdgeCraftRAG/ui/vue/src/views/error/404.vue | 5 +- .../pipeline/components/Configuration.vue | 54 --- .../pipeline/components/DetailDrawer.vue | 92 +++-- .../pipeline/components/ImportDialog.vue | 20 +- .../views/pipeline/components/QuickStart.vue | 21 +- .../src/views/pipeline/components/System.vue | 12 +- .../views/pipeline/components/SystemChart.vue | 32 +- .../src/views/pipeline/components/Table.vue | 38 +- .../components/UpdateDialog/Activated.vue | 10 +- .../components/UpdateDialog/Basic.vue | 15 +- .../components/UpdateDialog/CreateDialog.vue | 26 +- .../components/UpdateDialog/EditDialog.vue | 26 +- .../components/UpdateDialog/Generator.vue | 45 ++- .../components/UpdateDialog/Indexer.vue | 33 +- .../components/UpdateDialog/NodeParser.vue | 51 ++- .../components/UpdateDialog/PostProcessor.vue | 37 +- .../components/UpdateDialog/Retriever.vue | 37 +- .../src/views/pipeline/components/index.ts | 3 +- .../ui/vue/src/views/pipeline/enum.ts | 23 +- .../ui/vue/src/views/pipeline/index.vue | 10 +- 95 files changed, 3878 insertions(+), 1450 deletions(-) mode change 100644 => 100755 EdgeCraftRAG/Dockerfile.server mode change 100644 => 100755 EdgeCraftRAG/chatqna.py mode change 100644 => 100755 EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml mode change 100644 => 100755 EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_gradio.yaml delete mode 100644 EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml mode change 100644 => 100755 EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm_multi-arc.yaml mode change 100644 => 100755 EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh create mode 100755 EdgeCraftRAG/edgecraftrag/VERSION mode change 100644 => 100755 EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py create mode 100755 EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py create mode 100644 EdgeCraftRAG/edgecraftrag/api/v1/prompt.py mode change 100644 => 100755 EdgeCraftRAG/edgecraftrag/components/data.py mode change 100644 => 100755 EdgeCraftRAG/edgecraftrag/components/generator.py create mode 100644 EdgeCraftRAG/edgecraftrag/components/knowledge_base.py create mode 100644 EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py mode change 100644 => 100755 EdgeCraftRAG/edgecraftrag/requirements.txt mode change 100644 => 100755 EdgeCraftRAG/edgecraftrag/utils.py create mode 100755 EdgeCraftRAG/nginx/nginx.conf.template rename EdgeCraftRAG/tests/configs/{test_pipeline_vllm.json => test_pipeline_ipex_vllm.json} (82%) create mode 100644 EdgeCraftRAG/tests/test_pipeline_ipex_vllm.json create mode 100644 EdgeCraftRAG/ui/vue/src/api/knowledgeBase/index.ts create mode 100644 EdgeCraftRAG/ui/vue/src/utils/customRenderer.ts create mode 100644 EdgeCraftRAG/ui/vue/src/utils/mitt.ts delete mode 100644 EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot.vue create mode 100644 EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/Chat.vue rename EdgeCraftRAG/ui/vue/src/views/chatbot/components/{ => Chatbot}/ConfigDrawer.vue (75%) rename EdgeCraftRAG/ui/vue/src/views/chatbot/components/{ => Chatbot}/MessageItem.vue (77%) rename EdgeCraftRAG/ui/vue/src/views/chatbot/components/{ => Chatbot}/SseService.ts (92%) create mode 100644 EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/index.ts create mode 100644 EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/index.vue delete mode 100644 EdgeCraftRAG/ui/vue/src/views/chatbot/components/Header.vue create mode 100644 EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetial.vue create mode 100644 EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateDialog.vue create mode 100644 EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/index.ts create mode 100644 EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/index.vue delete mode 100644 EdgeCraftRAG/ui/vue/src/views/chatbot/components/UploadFile.vue delete mode 100644 EdgeCraftRAG/ui/vue/src/views/pipeline/components/Configuration.vue diff --git a/.github/code_spell_ignore.txt b/.github/code_spell_ignore.txt index 3c59d07a31..c72099bfd8 100644 --- a/.github/code_spell_ignore.txt +++ b/.github/code_spell_ignore.txt @@ -1,3 +1,4 @@ ModelIn modelin -pressEnter \ No newline at end of file +pressEnter +PromptIn \ No newline at end of file diff --git a/EdgeCraftRAG/Dockerfile.server b/EdgeCraftRAG/Dockerfile.server old mode 100644 new mode 100755 index 13efc304ca..f2bbf49252 --- a/EdgeCraftRAG/Dockerfile.server +++ b/EdgeCraftRAG/Dockerfile.server @@ -40,11 +40,10 @@ USER user WORKDIR /home/user/edgecraftrag RUN pip install --no-cache-dir --upgrade pip setuptools==70.0.0 && \ - pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt WORKDIR /home/user/ RUN git clone https://github.com/openvinotoolkit/openvino.genai.git genai ENV PYTHONPATH="$PYTHONPATH:/home/user/genai/tools/llm_bench" - ENTRYPOINT ["python", "-m", "edgecraftrag.server"] \ No newline at end of file diff --git a/EdgeCraftRAG/README.md b/EdgeCraftRAG/README.md index 93546869da..4b8603bbec 100755 --- a/EdgeCraftRAG/README.md +++ b/EdgeCraftRAG/README.md @@ -106,19 +106,53 @@ docker compose -f compose_vllm.yaml up -d The docker file can be pulled automatically‌, you can also pull the image manually: ```bash -docker pull intelanalytics/ipex-llm-serving-xpu:latest +docker pull intelanalytics/ipex-llm-serving-xpu:0.8.3-b18 +``` + +Generate your nginx config file + +```bash +export HOST_IP=#your host ip +export NGINX_PORT=8086 #set port for nginx +# If you are running with 1 vllm container: +export NGINX_PORT_0=8100 # you can change the port to your preferrance +export NGINX_PORT_1=8100 # you can change the port to your preferrance +# If you are running with 2 vllm containers: +export NGINX_PORT_0=8100 # you can change the port to your preferrance +export NGINX_PORT_1=8200 # you can change the port to your preferrance +# Generate your nginx config file +envsubst < GenAIExamples/EdgeCraftRAG/nginx/nginx.conf.template > /nginx.conf +# set NGINX_CONFIG_PATH +export NGINX_CONFIG_PATH="/nginx.conf" ``` Set up Additional Environment Variables and start with compose_vllm_multi-arc.yaml ```bash +# For 1 vLLM container(1 DP) with multi Intel Arc GPUs +export vLLM_ENDPOINT="http://${HOST_IP}:${NGINX_PORT}" +export LLM_MODEL_PATH=#your model path export LLM_MODEL=#your model id -export VLLM_SERVICE_PORT=8008 -export vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT}" +export CONTAINER_COUNT="single_container" +export TENSOR_PARALLEL_SIZE=#your Intel Arc GPU number to do inference +export SELECTED_XPU_0= # example for selecting 2 Arc GPUs: SELECTED_XPU_0=0,1 +``` + +```bash +# For 2 vLLM container(2 DP) with multi Intel Arc GPUs +export vLLM_ENDPOINT="http://${HOST_IP}:${NGINX_PORT}" export LLM_MODEL_PATH=#your model path +export LLM_MODEL=#your model id +export CONTAINER_COUNT="multi_container" export TENSOR_PARALLEL_SIZE=#your Intel Arc GPU number to do inference +export SELECTED_XPU_0= +export SELECTED_XPU_1= +``` + +start with compose_vllm_multi-arc.yaml -docker compose -f compose_vllm_multi-arc.yaml up -d +```bash +docker compose -f docker_compose/intel/gpu/arc/compose_vllm_multi-arc.yaml --profile ${CONTAINER_COUNT} up -d ``` ### ChatQnA with LLM Example (Command Line) @@ -355,8 +389,26 @@ curl -X PATCH http://${HOST_IP}:16010/v1/data/files/test.pdf -H "Content-Type: a ### System Prompt Management -#### Use custom system prompt +#### Get system prompt + +```bash +curl -X GET http://${HOST_IP}:16010/v1/chatqna/prompt -H "Content-Type: application/json" | jq '.' +``` + +#### Update system prompt + +```bash +curl -X POST http://${HOST_IP}:16010/v1/chatqna/prompt -H "Content-Type: application/json" -d '{"prompt":"This is a template prompt"}' | jq '.' +``` + +#### Reset system prompt + +```bash +curl -X POST http://${HOST_IP}:16010/v1/chatqna/prompt/reset -H "Content-Type: application/json" | jq '.' +``` + +#### Use custom system prompt file ```bash -curl -X POST http://${HOST_IP}:16010/v1/chatqna/prompt -H "Content-Type: multipart/form-data" -F "file=@your_prompt_file.txt" +curl -X POST http://${HOST_IP}:16010/v1/chatqna/prompt-file -H "Content-Type: multipart/form-data" -F "file=@your_prompt_file.txt" ``` diff --git a/EdgeCraftRAG/chatqna.py b/EdgeCraftRAG/chatqna.py old mode 100644 new mode 100755 index bc6f0a6437..1073a66eb4 --- a/EdgeCraftRAG/chatqna.py +++ b/EdgeCraftRAG/chatqna.py @@ -43,6 +43,7 @@ def add_remote_service(self): async def handle_request(self, request: Request): input = await request.json() stream_opt = input.get("stream", False) + input["user"] = request.headers.get("sessionid", None) chat_request = ChatCompletionRequest.parse_obj(input) parameters = LLMParams( max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml old mode 100644 new mode 100755 index e4465e0e7f..46271b1c8e --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml @@ -18,6 +18,7 @@ services: - ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache - ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache - ${PROMPT_PATH:-${PWD}}:/templates/custom + restart: always ports: - ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010} devices: @@ -36,6 +37,7 @@ services: MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}} PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010} PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}} + restart: always ports: - ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011} depends_on: @@ -55,40 +57,12 @@ services: UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0} volumes: - ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache + restart: always ports: - ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082} - restart: always depends_on: - server - ecrag - # vllm-openvino-server: - # container_name: vllm-openvino-server - # image: opea/vllm-arc:latest - # ports: - # - ${VLLM_SERVICE_PORT:-8008}:80 - # environment: - # HTTPS_PROXY: ${https_proxy} - # HTTP_PROXY: ${https_proxy} - # VLLM_OPENVINO_DEVICE: GPU - # HF_ENDPOINT: ${HF_ENDPOINT} - # HF_TOKEN: ${HF_TOKEN} - # volumes: - # - /dev/dri/by-path:/dev/dri/by-path - # - $HOME/.cache/huggingface:/root/.cache/huggingface - # devices: - # - /dev/dri - # group_add: - # - ${VIDEOGROUPID:-44} - # - ${RENDERGROUPID:-109} - # entrypoint: /bin/bash -c "\ - # cd / && \ - # export VLLM_CPU_KVCACHE_SPACE=50 && \ - # export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \ - # python3 -m vllm.entrypoints.openai.api_server \ - # --model '${LLM_MODEL}' \ - # --max_model_len=1024 \ - # --host 0.0.0.0 \ - # --port 80" networks: default: driver: bridge diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_gradio.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_gradio.yaml old mode 100644 new mode 100755 index f753a17460..13c029687e --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_gradio.yaml +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_gradio.yaml @@ -18,6 +18,7 @@ services: - ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache - ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache - ${PROMPT_PATH:-${PWD}}:/templates/custom + restart: always ports: - ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010} devices: @@ -36,6 +37,7 @@ services: MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}} PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010} PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}} + restart: always ports: - ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011} depends_on: @@ -55,9 +57,9 @@ services: UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0} volumes: - ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache + restart: always ports: - ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082} - restart: always depends_on: - server - ecrag diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml deleted file mode 100644 index d1811a4aca..0000000000 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - server: - image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest} - container_name: edgecraftrag-server - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_ENDPOINT: ${HF_ENDPOINT} - vLLM_ENDPOINT: ${vLLM_ENDPOINT} - ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false} - volumes: - - ${MODEL_PATH:-${PWD}}:/home/user/models - - ${DOC_PATH:-${PWD}}:/home/user/docs - - ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache - - ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache - - ${PROMPT_PATH:-${PWD}}:/templates/custom - ports: - - ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010} - devices: - - /dev/dri:/dev/dri - group_add: - - ${VIDEOGROUPID:-44} - - ${RENDERGROUPID:-109} - ecrag: - image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest} - container_name: edgecraftrag - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011} - MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}} - PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010} - PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}} - ports: - - ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011} - depends_on: - - server - ui: - image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest} - container_name: edgecraftrag-ui - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011} - MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}} - PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010} - PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}} - UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082} - UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0} - volumes: - - ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache - ports: - - ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082} - restart: always - depends_on: - - server - - ecrag - vllm-openvino-server: - container_name: vllm-openvino-server - image: ${REGISTRY:-opea}/vllm-arc:${TAG:-latest} - ports: - - ${VLLM_SERVICE_PORT:-8008}:80 - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - VLLM_OPENVINO_DEVICE: GPU - HF_ENDPOINT: ${HF_ENDPOINT} - HF_TOKEN: ${HF_TOKEN} - volumes: - - ${HF_CACHE:-${HOME}/.cache}:/root/.cache - devices: - - /dev/dri - group_add: - - ${VIDEOGROUPID:-44} - - ${RENDERGROUPID:-109} - entrypoint: /bin/bash -c "\ - cd / && \ - export VLLM_CPU_KVCACHE_SPACE=50 && \ - export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \ - python3 -m vllm.entrypoints.openai.api_server \ - --model '${LLM_MODEL}' \ - --max_model_len=4096 \ - --host 0.0.0.0 \ - --port 80" -networks: - default: - driver: bridge diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm_multi-arc.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm_multi-arc.yaml old mode 100644 new mode 100755 index d82f9c9747..5453ae1aa4 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm_multi-arc.yaml +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm_multi-arc.yaml @@ -19,6 +19,7 @@ services: - ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache - ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache - ${PROMPT_PATH:-${PWD}}:/templates/custom + restart: always ports: - ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010} devices: @@ -26,6 +27,9 @@ services: group_add: - ${VIDEOGROUPID:-44} - ${RENDERGROUPID:-109} + profiles: + - single_container + - multi_container ecrag: image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest} container_name: edgecraftrag @@ -37,10 +41,26 @@ services: MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}} PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010} PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}} + restart: always ports: - ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011} depends_on: - server + profiles: + - single_container + - multi_container + nginx: + image: nginx:latest + restart: always + ports: + - ${NGINX_PORT:-8086}:8086 + volumes: + - ${NGINX_CONFIG_PATH:-${PWD}}:/etc/nginx/nginx.conf + depends_on: + - server + profiles: + - single_container + - multi_container ui: image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest} container_name: edgecraftrag-ui @@ -56,18 +76,54 @@ services: UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0} volumes: - ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache + restart: always ports: - ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082} - restart: always depends_on: - server - ecrag - llm-serving-xpu: - container_name: ipex-llm-serving-xpu-container - image: intelanalytics/ipex-llm-serving-xpu:latest + profiles: + - single_container + - multi_container + llm-serving-xpu-0: + container_name: ipex-llm-serving-xpu-container-0 + image: intelanalytics/ipex-llm-serving-xpu:0.8.3-b18 + privileged: true + restart: always + ports: + - ${VLLM_SERVICE_PORT_0:-8100}:${VLLM_SERVICE_PORT_0:-8100} + group_add: + - video + - ${VIDEOGROUPID:-44} + - ${RENDERGROUPID:-109} + volumes: + - ${LLM_MODEL_PATH:-${PWD}}:/llm/models + devices: + - /dev/dri + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_ENDPOINT: ${HF_ENDPOINT} + MODEL_PATH: "/llm/models" + SERVED_MODEL_NAME: ${LLM_MODEL} + TENSOR_PARALLEL_SIZE: ${TENSOR_PARALLEL_SIZE:-1} + PORT: ${VLLM_SERVICE_PORT_0:-8100} + ZE_AFFINITY_MASK: ${SELECTED_XPU_0:-0} + shm_size: '32g' + entrypoint: /bin/bash -c "\ + cd /llm && \ + bash start-vllm-service.sh" + profiles: + - single_container + - multi_container + llm-serving-xpu-1: + container_name: ipex-llm-serving-xpu-container-1 + image: intelanalytics/ipex-llm-serving-xpu:0.8.3-b18 privileged: true + restart: always ports: - - ${VLLM_SERVICE_PORT:-8008}:8000 + - ${VLLM_SERVICE_PORT_1:-8200}:${VLLM_SERVICE_PORT_1:-8200} group_add: - video - ${VIDEOGROUPID:-44} @@ -84,10 +140,14 @@ services: MODEL_PATH: "/llm/models" SERVED_MODEL_NAME: ${LLM_MODEL} TENSOR_PARALLEL_SIZE: ${TENSOR_PARALLEL_SIZE:-1} - shm_size: '16g' + PORT: ${VLLM_SERVICE_PORT_1:-8200} + ZE_AFFINITY_MASK: ${SELECTED_XPU_1:-1} + shm_size: '32g' entrypoint: /bin/bash -c "\ cd /llm && \ bash start-vllm-service.sh" + profiles: + - multi_container networks: default: driver: bridge diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh b/EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh old mode 100644 new mode 100755 index c70928a492..7655f09e8c --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh @@ -15,3 +15,14 @@ export HF_ENDPOINT=${HF_ENDPOINT} export vLLM_ENDPOINT=${vLLM_ENDPOINT} export HF_TOKEN=${HF_TOKEN} export no_proxy="localhost, 127.0.0.1, 192.168.1.1" +export UI_UPLOAD_PATH=${UI_UPLOAD_PATH} +export LLM_MODEL_PATH=${LLM_MODEL_PATH} +export NGINX_PORT_0=${NGINX_PORT_0} +export NGINX_PORT_1=${NGINX_PORT_1} +export VLLM_SERVICE_PORT_0=${VLLM_SERVICE_PORT_0} +export VLLM_SERVICE_PORT_1=${VLLM_SERVICE_PORT_1} +export TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE} +export NGINX_CONFIG_PATH=${NGINX_CONFIG_PATH} +export SELECTED_XPU_0=${SELECTED_XPU_0} +export SELECTED_XPU_1=${SELECTED_XPU_1} +export vLLM_ENDPOINT=${vLLM_ENDPOINT} diff --git a/EdgeCraftRAG/docker_image_build/build.yaml b/EdgeCraftRAG/docker_image_build/build.yaml index 18ad867c75..cde5d49778 100644 --- a/EdgeCraftRAG/docker_image_build/build.yaml +++ b/EdgeCraftRAG/docker_image_build/build.yaml @@ -30,8 +30,3 @@ services: dockerfile: ./ui/docker/Dockerfile.gradio extends: edgecraftrag image: ${REGISTRY:-opea}/edgecraftrag-ui-gradio:${TAG:-latest} - vllm-arc: - build: - context: GenAIComps - dockerfile: comps/third_parties/vllm/src/Dockerfile.intel_gpu - image: ${REGISTRY:-opea}/vllm-arc:${TAG:-latest} diff --git a/EdgeCraftRAG/edgecraftrag/VERSION b/EdgeCraftRAG/edgecraftrag/VERSION new file mode 100755 index 0000000000..40766d6bb4 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/VERSION @@ -0,0 +1 @@ +25.05-Release diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py old mode 100644 new mode 100755 index d0236c82e8..be7aee3d58 --- a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py @@ -5,6 +5,7 @@ from comps.cores.proto.api_protocol import ChatCompletionRequest from edgecraftrag.api_schema import RagOut from edgecraftrag.context import ctx +from edgecraftrag.utils import serialize_contexts, set_current_session from fastapi import FastAPI, File, HTTPException, UploadFile, status from fastapi.responses import StreamingResponse @@ -19,7 +20,7 @@ async def retrieval(request: ChatCompletionRequest): if nodeswithscore is not None: ret = [] for n in nodeswithscore: - ret.append((n.node.node_id, n.node.text, n.score)) + ret.append((n.node.node_id, n.node.text, round(float(n.score), 8))) return ret return None @@ -29,14 +30,16 @@ async def retrieval(request: ChatCompletionRequest): @chatqna_app.post(path="/v1/chatqna") async def chatqna(request: ChatCompletionRequest): try: + sessionid = request.user + set_current_session(sessionid) generator = ctx.get_pipeline_mgr().get_active_pipeline().generator if generator: request.model = generator.model_id if request.stream: - ret, retri_res = ctx.get_pipeline_mgr().run_pipeline(chat_request=request) + ret, contexts = ctx.get_pipeline_mgr().run_pipeline(chat_request=request) return ret else: - ret, retri_res = ctx.get_pipeline_mgr().run_pipeline(chat_request=request) + ret, contexts = ctx.get_pipeline_mgr().run_pipeline(chat_request=request) return str(ret) except Exception as e: raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) @@ -46,7 +49,7 @@ async def chatqna(request: ChatCompletionRequest): @chatqna_app.post(path="/v1/ragqna") async def ragqna(request: ChatCompletionRequest): try: - res, retri_res = ctx.get_pipeline_mgr().run_pipeline(chat_request=request) + res, contexts = ctx.get_pipeline_mgr().run_pipeline(chat_request=request) if isinstance(res, GeneratedDoc): res = res.text elif isinstance(res, StreamingResponse): @@ -55,36 +58,9 @@ async def ragqna(request: ChatCompletionRequest): collected_data.append(chunk) res = "".join(collected_data) - ragout = RagOut(query=request.messages, contexts=[], response=str(res)) - for n in retri_res: - origin_text = n.node.get_text() - ragout.contexts.append(origin_text.strip()) - return ragout - except Exception as e: - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) - + serialized_contexts = serialize_contexts(contexts) -# Upload prompt file for LLM ChatQnA -@chatqna_app.post(path="/v1/chatqna/prompt") -async def load_prompt(file: UploadFile = File(...)): - try: - generator = ctx.get_pipeline_mgr().get_active_pipeline().generator - if generator: - content = await file.read() - prompt_str = content.decode("utf-8") - generator.set_prompt(prompt_str) - return "Set LLM Prompt Successfully" - except Exception as e: - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) - - -# Reset prompt for LLM ChatQnA -@chatqna_app.post(path="/v1/chatqna/prompt/reset") -async def reset_prompt(): - try: - generator = ctx.get_pipeline_mgr().get_active_pipeline().generator - if generator: - generator.reset_prompt() - return "Reset LLM Prompt Successfully" + ragout = RagOut(query=request.messages, contexts=serialized_contexts, response=str(res)) + return ragout except Exception as e: raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/data.py b/EdgeCraftRAG/edgecraftrag/api/v1/data.py index 4a584de425..35a1ab3ff4 100755 --- a/EdgeCraftRAG/edgecraftrag/api/v1/data.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/data.py @@ -94,8 +94,8 @@ async def delete_file(name): # Upload & save a file from UI -@data_app.post(path="/v1/data/file") -async def upload_file(file: UploadFile = File(...)): +@data_app.post(path="/v1/data/file/{file_name}") +async def upload_file(file_name: str, file: UploadFile = File(...)): if ctx.get_pipeline_mgr().get_active_pipeline() is None: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Please activate pipeline and upload the file" @@ -103,7 +103,7 @@ async def upload_file(file: UploadFile = File(...)): try: # DIR for server to save files uploaded by UI UI_DIRECTORY = os.getenv("UI_TMPFILE_PATH", "/home/user/ui_cache") - UPLOAD_DIRECTORY = os.path.join(UI_DIRECTORY, "documents") + UPLOAD_DIRECTORY = os.path.join(UI_DIRECTORY, file_name) os.makedirs(UPLOAD_DIRECTORY, exist_ok=True) file_path = os.path.join(UPLOAD_DIRECTORY, file.filename) with open(file_path, "wb") as buffer: diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py b/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py new file mode 100755 index 0000000000..1a4a8e54a3 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py @@ -0,0 +1,136 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +from edgecraftrag.api.v1.data import add_data +from edgecraftrag.api_schema import DataIn, KnowledgeBaseCreateIn +from edgecraftrag.context import ctx +from fastapi import FastAPI, HTTPException, status + +kb_app = FastAPI() + + +# Get all knowledge bases +@kb_app.get(path="/v1/knowledge") +async def get_all_knowledge_bases(): + try: + return ctx.knowledgemgr.get_all_knowledge_bases() + except Exception as e: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) + + +# Get the specified knowledge base. +@kb_app.get("/v1/knowledge/{knowledge_name}") +async def get_knowledge_base(knowledge_name: str): + kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge_name) + return kb + + +# Create a new knowledge base +@kb_app.post(path="/v1/knowledge") +async def create_knowledge_base(knowledge: KnowledgeBaseCreateIn): + try: + kb = ctx.knowledgemgr.create_knowledge_base(knowledge) + if kb.active: + await update_knowledge_base_handler(kb.get_file_paths()) + return "Create knowledge base successfully" + except Exception as e: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) + + +# Delete the knowledge base by name +@kb_app.delete(path="/v1/knowledge/{knowledge_name}") +async def delete_knowledge_base(knowledge_name: str): + try: + return ctx.knowledgemgr.delete_knowledge_base(knowledge_name) + except Exception as e: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) + + +# Switch the active knowledge base +@kb_app.patch(path="/v1/knowledge/patch") +async def update_knowledge_base(knowledge: KnowledgeBaseCreateIn): + try: + kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge.name) + if knowledge.active is not None and knowledge.active != kb.active: + file_paths = kb.get_file_paths() if knowledge.active else None + await update_knowledge_base_handler(file_paths) + result = ctx.knowledgemgr.update_knowledge_base(knowledge) + return result + except Exception as e: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) + + +# Add a files to the knowledge base +@kb_app.post(path="/v1/knowledge/{knowledge_name}/files") +async def add_file_to_knowledge_base(knowledge_name, file_path: DataIn): + try: + kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge_name) + if os.path.isdir(file_path.local_path): + for root, _, files in os.walk(file_path.local_path): + for file in files: + file_full_path = os.path.join(root, file) + if file_full_path not in kb.get_file_paths(): + kb.add_file_path(file_full_path) + else: + raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="File upload failed") + elif os.path.isfile(file_path.local_path) and file_path.local_path not in kb.get_file_paths(): + kb.add_file_path(file_path.local_path) + else: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="File upload failed") + + active_kb = ctx.knowledgemgr.get_active_knowledge_base() + if active_kb: + if active_kb.name == knowledge_name or active_kb.idx == knowledge_name: + await update_knowledge_base_handler(file_path, add_file=True) + + return "File upload successfully" + except ValueError as e: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) + + +# Remove a file from the knowledge base +@kb_app.delete(path="/v1/knowledge/{knowledge_name}/files") +async def remove_file_from_knowledge_base(knowledge_name, file_path: DataIn): + try: + kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge_name) + if file_path.local_path in kb.get_file_paths(): + kb.remove_file_path(file_path.local_path) + else: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="File remove failure") + + file_path = kb.get_file_paths() + active_kb = ctx.knowledgemgr.get_active_knowledge_base() + if active_kb: + if active_kb.name == knowledge_name or active_kb.idx == knowledge_name: + await update_knowledge_base_handler(file_path) + return "File deleted successfully" + except ValueError as e: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) + + +# Update knowledge base data +async def update_knowledge_base_handler(file_path=None, add_file: bool = False): + if ctx.get_pipeline_mgr().get_active_pipeline() is None: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Please activate pipeline") + + if add_file and file_path: + return await add_data(file_path) + + elif file_path: + pl = ctx.get_pipeline_mgr().get_active_pipeline() + ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx) + pl.indexer.reinitialize_indexer() + pl.update_indexer_to_retriever() + for file in file_path: + request = DataIn(local_path=file) + await add_data(request) + return "Done" + + else: + pl = ctx.get_pipeline_mgr().get_active_pipeline() + ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx) + pl.indexer.reinitialize_indexer() + pl.update_indexer_to_retriever() + return "Done" diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/prompt.py b/EdgeCraftRAG/edgecraftrag/api/v1/prompt.py new file mode 100644 index 0000000000..86639a40a7 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/api/v1/prompt.py @@ -0,0 +1,58 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from edgecraftrag.api_schema import PromptIn +from edgecraftrag.context import ctx +from fastapi import FastAPI, File, HTTPException, UploadFile, status + +prompt_app = FastAPI() + + +# Upload prompt for LLM ChatQnA using file +@prompt_app.post(path="/v1/chatqna/prompt-file") +async def load_prompt_file(file: UploadFile = File(...)): + try: + generator = ctx.get_pipeline_mgr().get_active_pipeline().generator + if generator: + content = await file.read() + prompt_str = content.decode("utf-8") + generator.set_prompt(prompt_str) + return "Set LLM Prompt Successfully" + except Exception as e: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + + +# Update prompt for LLM ChatQnA +@prompt_app.post(path="/v1/chatqna/prompt") +async def load_prompt(request: PromptIn): + try: + generator = ctx.get_pipeline_mgr().get_active_pipeline().generator + if generator: + prompt_str = request.prompt + generator.set_prompt(prompt_str) + return "Set LLM Prompt Successfully" + except Exception as e: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + + +# Get prompt of LLM ChatQnA +@prompt_app.get(path="/v1/chatqna/prompt") +async def get_prompt(): + try: + generator = ctx.get_pipeline_mgr().get_active_pipeline().generator + if generator: + return generator.prompt + except Exception as e: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + + +# Reset prompt for LLM ChatQnA +@prompt_app.post(path="/v1/chatqna/prompt/reset") +async def reset_prompt(): + try: + generator = ctx.get_pipeline_mgr().get_active_pipeline().generator + if generator: + generator.reset_prompt() + return "Reset LLM Prompt Successfully" + except Exception as e: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) diff --git a/EdgeCraftRAG/edgecraftrag/api_schema.py b/EdgeCraftRAG/edgecraftrag/api_schema.py index 7a8a493b1e..599d53115a 100644 --- a/EdgeCraftRAG/edgecraftrag/api_schema.py +++ b/EdgeCraftRAG/edgecraftrag/api_schema.py @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from typing import Optional +from typing import Any, Optional from pydantic import BaseModel @@ -65,5 +65,15 @@ class FilesIn(BaseModel): class RagOut(BaseModel): query: str - contexts: Optional[list[str]] = None + contexts: Optional[dict[str, Any]] = None response: str + + +class PromptIn(BaseModel): + prompt: Optional[str] = None + + +class KnowledgeBaseCreateIn(BaseModel): + name: str + description: Optional[str] = None + active: Optional[bool] = None diff --git a/EdgeCraftRAG/edgecraftrag/components/data.py b/EdgeCraftRAG/edgecraftrag/components/data.py old mode 100644 new mode 100755 index e7fa19e7ad..34b568abde --- a/EdgeCraftRAG/edgecraftrag/components/data.py +++ b/EdgeCraftRAG/edgecraftrag/components/data.py @@ -54,7 +54,7 @@ def convert_text_to_documents(text) -> List[Document]: def convert_file_to_documents(file_path) -> List[Document]: from llama_index.core import SimpleDirectoryReader - supported_exts = [".pdf", ".txt", ".doc", ".docx", ".pptx", ".ppt", ".csv", ".md", ".html", ".rst"] + supported_exts = [".pdf", ".txt", ".doc", ".docx", ".pptx", ".ppt", ".csv", ".md", ".html", ".rst", ".epub"] if file_path.is_dir(): docs = SimpleDirectoryReader(input_dir=file_path, recursive=True, required_exts=supported_exts).load_data() elif file_path.is_file(): diff --git a/EdgeCraftRAG/edgecraftrag/components/generator.py b/EdgeCraftRAG/edgecraftrag/components/generator.py old mode 100644 new mode 100755 index d5a3e73ccd..e002e92a78 --- a/EdgeCraftRAG/edgecraftrag/components/generator.py +++ b/EdgeCraftRAG/edgecraftrag/components/generator.py @@ -8,8 +8,8 @@ import urllib.request from urllib.parse import urlparse -from comps import GeneratedDoc -from edgecraftrag.base import BaseComponent, CompType, GeneratorType, NodeParserType +from edgecraftrag.base import BaseComponent, CompType, GeneratorType, InferenceType, NodeParserType +from edgecraftrag.utils import concat_history, save_history from fastapi.responses import StreamingResponse from langchain_core.prompts import PromptTemplate from llama_index.llms.openai_like import OpenAILike @@ -95,13 +95,33 @@ def extract_unstructured_eles(retrieved_nodes=[], text_gen_context=""): return unstructured_str +async def local_stream_generator(lock, llm, prompt_str, unstructured_str): + async with lock: + response = llm.stream_complete(prompt_str) + collected_data = [] + for r in response: + collected_data.append(r.delta) + yield r.delta + await asyncio.sleep(0) + if unstructured_str: + collected_data.append(unstructured_str) + yield unstructured_str + res = "".join(collected_data) + save_history(res) + + async def stream_generator(llm, prompt_str, unstructured_str): response = llm.stream_complete(prompt_str) + collected_data = [] for r in response: + collected_data.append(r.delta) yield r.delta await asyncio.sleep(0) if unstructured_str: + collected_data.append(unstructured_str) yield unstructured_str + res = "".join(collected_data) + save_history(res) class QnAGenerator(BaseComponent): @@ -135,6 +155,8 @@ def __init__(self, llm_model, prompt_template_file, inference_type, **kwargs): self.model_id = llm_model else: self.model_id = llm_model().model_id + if self.inference_type == InferenceType.LOCAL: + self.lock = asyncio.Lock() def set_prompt(self, prompt): if "{context}" not in prompt: @@ -170,6 +192,7 @@ def run(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): # This could happen when User delete all LLMs through RESTful API raise ValueError("No LLM available, please load LLM") # query transformation + chat_request.messages = concat_history(chat_request.messages) text_gen_context, prompt_str = self.query_transform(chat_request, retrieved_nodes) generate_kwargs = dict( temperature=chat_request.temperature, @@ -186,14 +209,17 @@ def run(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): unstructured_str = extract_unstructured_eles(retrieved_nodes, text_gen_context) if chat_request.stream: return StreamingResponse( - stream_generator(self.llm(), prompt_str, unstructured_str), + local_stream_generator(self.lock, self.llm(), prompt_str, unstructured_str), media_type="text/event-stream", ) else: - return self.llm().complete(prompt_str) + result = self.llm().complete(prompt_str) + save_history(str(result.text)) + return result def run_vllm(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): # query transformation + chat_request.messages = concat_history(chat_request.messages) text_gen_context, prompt_str = self.query_transform(chat_request, retrieved_nodes) llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8008") model_name = os.getenv("LLM_MODEL", self.model_id) @@ -216,10 +242,9 @@ def run_vllm(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): stream_generator(llm, prompt_str, unstructured_str), media_type="text/event-stream" ) else: - response = llm.complete(prompt_str) - response = response.text - - return GeneratedDoc(text=response, prompt=prompt_str) + result = llm.complete(prompt_str) + save_history(str(result)) + return result @model_serializer def ser_model(self): diff --git a/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py b/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py new file mode 100644 index 0000000000..259c4a463f --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py @@ -0,0 +1,51 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +from typing import Any, List, Optional + +from edgecraftrag.base import BaseComponent +from pydantic import model_serializer + + +class Knowledge(BaseComponent): + file_paths: Optional[List[str]] = [] + file_map: Optional[List[str]] = {} + description: Optional[str] = "None" + comp_type: str = "knowledge" + active: bool + + def _update_file_names(self) -> None: + self.file_map = {os.path.basename(path): path for path in self.file_paths if path is not None} + + def add_file_path(self, file_path: str) -> bool: + if file_path not in self.file_paths: + self.file_paths.append(file_path) + self._update_file_names() + return True + return False + + def remove_file_path(self, file_path: str) -> bool: + if file_path in self.file_paths: + self.file_paths.remove(file_path) + self._update_file_names() + return True + return False + + def get_file_paths(self) -> List[str]: + return self.file_paths + + def run(self, **kwargs) -> Any: + pass + + @model_serializer + def ser_model(self): + set = { + "idx": self.idx, + "name": self.name, + "comp_type": self.comp_type, + "file_map": self.file_map, + "description": self.description, + "active": self.active, + } + return set diff --git a/EdgeCraftRAG/edgecraftrag/components/pipeline.py b/EdgeCraftRAG/edgecraftrag/components/pipeline.py index e0a01eba96..a7dd8c4cce 100644 --- a/EdgeCraftRAG/edgecraftrag/components/pipeline.py +++ b/EdgeCraftRAG/edgecraftrag/components/pipeline.py @@ -221,11 +221,13 @@ async def timing_wrapper(): def run_test_generator_ben(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: benchmark_index, benchmark_data = pl.benchmark.init_benchmark_data() + contexts = {} start = time.perf_counter() query = chat_request.messages retri_res = pl.retriever.run(query=query) query_bundle = QueryBundle(query) benchmark_data[CompType.RETRIEVER] = time.perf_counter() - start + contexts[CompType.RETRIEVER] = retri_res start = time.perf_counter() if pl.postprocessor: @@ -236,6 +238,7 @@ def run_test_generator_ben(pl: Pipeline, chat_request: ChatCompletionRequest) -> ): processor.top_n = chat_request.top_n retri_res = processor.run(retri_res=retri_res, query_bundle=query_bundle) + contexts[CompType.POSTPROCESSOR] = retri_res benchmark_data[CompType.POSTPROCESSOR] = time.perf_counter() - start if pl.generator is None: @@ -260,12 +263,14 @@ def run_test_generator_ben(pl: Pipeline, chat_request: ChatCompletionRequest) -> benchmark_data[CompType.GENERATOR] = end - start pl.benchmark.insert_llm_data(benchmark_index, input_token_size) pl.benchmark.insert_benchmark_data(benchmark_data) - return ret, retri_res + return ret, contexts def run_test_generator(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: query = chat_request.messages + contexts = {} retri_res = pl.retriever.run(query=query) + contexts[CompType.RETRIEVER] = retri_res query_bundle = QueryBundle(query) if pl.postprocessor: @@ -276,6 +281,7 @@ def run_test_generator(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any ): processor.top_n = chat_request.top_n retri_res = processor.run(retri_res=retri_res, query_bundle=query_bundle) + contexts[CompType.POSTPROCESSOR] = retri_res if pl.generator is None: raise ValueError("No Generator Specified") @@ -286,4 +292,4 @@ def run_test_generator(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any ret = pl.generator.run_vllm(chat_request, retri_res, np_type) else: raise ValueError("LLM inference_type not supported") - return ret, retri_res + return ret, contexts diff --git a/EdgeCraftRAG/edgecraftrag/context.py b/EdgeCraftRAG/edgecraftrag/context.py index 3555ce4beb..4d013b9bd2 100644 --- a/EdgeCraftRAG/edgecraftrag/context.py +++ b/EdgeCraftRAG/edgecraftrag/context.py @@ -3,6 +3,7 @@ from edgecraftrag.controllers.compmgr import GeneratorMgr, IndexerMgr, NodeParserMgr, PostProcessorMgr, RetrieverMgr from edgecraftrag.controllers.filemgr import FilelMgr +from edgecraftrag.controllers.knowledge_basemgr import KnowledgeManager from edgecraftrag.controllers.modelmgr import ModelMgr from edgecraftrag.controllers.nodemgr import NodeMgr from edgecraftrag.controllers.pipelinemgr import PipelineMgr @@ -20,6 +21,7 @@ def __init__(self): self.modmgr = ModelMgr() self.genmgr = GeneratorMgr() self.filemgr = FilelMgr() + self.knowledgemgr = KnowledgeManager() def get_pipeline_mgr(self): return self.plmgr @@ -48,5 +50,8 @@ def get_generator_mgr(self): def get_file_mgr(self): return self.filemgr + def get_knowledge_mgr(self): + return self.knowledgemgr + ctx = Context() diff --git a/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py new file mode 100644 index 0000000000..091175f64f --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py @@ -0,0 +1,73 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any, Dict, List, Optional + +from edgecraftrag.api_schema import KnowledgeBaseCreateIn +from edgecraftrag.base import BaseMgr +from edgecraftrag.components.knowledge_base import Knowledge +from fastapi import HTTPException, status + + +class KnowledgeManager(BaseMgr): + def __init__(self): + super().__init__() + self.active_knowledge_idx: Optional[str] = None + + def get_knowledge_base_by_name_or_id(self, name: str): + for _, kb in self.components.items(): + if kb.name == name or kb.idx == name: + return kb + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="knowledge base does not exist") + + def get_active_knowledge_base(self) -> Optional[Knowledge]: + if self.active_knowledge_idx: + return self.get_knowledge_base_by_name_or_id(self.active_knowledge_idx) + else: + return None + + def active_knowledge(self, knowledge: KnowledgeBaseCreateIn): + kb = self.get_knowledge_base_by_name_or_id(knowledge.name) + self.active_knowledge_idx = kb.idx if knowledge.active else None + + for idx, comp in self.components.items(): + if isinstance(comp, Knowledge): + comp.active = idx == self.active_knowledge_idx + return kb + + def create_knowledge_base(self, knowledge: KnowledgeBaseCreateIn) -> Knowledge: + for _, kb in self.components.items(): + if kb.name == knowledge.name: + raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="The knowledge base already exists.") + if knowledge.active is None: + knowledge.active = False + kb = Knowledge(name=knowledge.name, description=knowledge.description, active=knowledge.active) + self.add(kb) + if knowledge.active: + self.active_knowledge(knowledge) + return kb + + def delete_knowledge_base(self, name: str): + kb = self.get_knowledge_base_by_name_or_id(name) + if kb.idx == self.active_knowledge_idx: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Cannot delete a running knowledge base." + ) + self.remove(kb.idx) + return "Knowledge base removed successfully" + + def update_knowledge_base(self, knowledge) -> Knowledge: + kb = self.get_knowledge_base_by_name_or_id(knowledge.name) + + if knowledge.description is not None: + kb.description = knowledge.description + + if knowledge.active is not None and kb.active != knowledge.active: + kb = self.active_knowledge(knowledge) + return "Knowledge base update successfully" + + def get_all_knowledge_bases(self) -> List[Dict[str, Any]]: + kb_list = [] + for idx, kb in self.components.items(): + kb_list.append(kb) + return kb_list diff --git a/EdgeCraftRAG/edgecraftrag/requirements.txt b/EdgeCraftRAG/edgecraftrag/requirements.txt old mode 100644 new mode 100755 diff --git a/EdgeCraftRAG/edgecraftrag/server.py b/EdgeCraftRAG/edgecraftrag/server.py index cd8a7f8eab..becf0902f8 100644 --- a/EdgeCraftRAG/edgecraftrag/server.py +++ b/EdgeCraftRAG/edgecraftrag/server.py @@ -6,8 +6,10 @@ import uvicorn from edgecraftrag.api.v1.chatqna import chatqna_app from edgecraftrag.api.v1.data import data_app +from edgecraftrag.api.v1.knowledge_base import kb_app from edgecraftrag.api.v1.model import model_app from edgecraftrag.api.v1.pipeline import pipeline_app +from edgecraftrag.api.v1.prompt import prompt_app from edgecraftrag.api.v1.system import system_app from edgecraftrag.utils import UI_DIRECTORY from fastapi import FastAPI @@ -26,7 +28,7 @@ ) -sub_apps = [data_app, model_app, pipeline_app, chatqna_app, system_app] +sub_apps = [data_app, model_app, pipeline_app, chatqna_app, system_app, prompt_app, kb_app] for sub_app in sub_apps: for route in sub_app.routes: app.router.routes.append(route) diff --git a/EdgeCraftRAG/edgecraftrag/utils.py b/EdgeCraftRAG/edgecraftrag/utils.py old mode 100644 new mode 100755 index be83f47135..0572000f80 --- a/EdgeCraftRAG/edgecraftrag/utils.py +++ b/EdgeCraftRAG/edgecraftrag/utils.py @@ -3,7 +3,7 @@ import io import os -from typing import Iterator +from typing import Iterator, Optional from docx.text.paragraph import Paragraph from PIL import Image as Img @@ -29,3 +29,46 @@ def iter_elements(cls, paragraph: Paragraph, opts: DocxPartitionerOptions) -> It image.save(image_path) element_metadata = ElementMetadata(image_path=image_path) yield Image(text="IMAGE", metadata=element_metadata) + + +def serialize_node_with_score(node_with_score): + return { + "node": node_with_score.node.__dict__, + "score": node_with_score.score.item() if hasattr(node_with_score.score, "item") else node_with_score.score, + } + + +def serialize_contexts(contexts): + return {key: [serialize_node_with_score(node) for node in nodes] for key, nodes in contexts.items()} + + +_history_map = {} +_current_session_id: Optional[str] = None + + +def set_current_session(session_id: str) -> None: + global _current_session_id + _current_session_id = session_id if session_id not in (None, "", "None") else "default_session" + + +def get_current_session() -> Optional[str]: + return _current_session_id + + +def clear_history() -> None: + session_id = get_current_session() + if session_id in _history_map: + _history_map[session_id] = [] + + +def save_history(message: str) -> str: + session_id = get_current_session() + _history_map.setdefault(session_id, []).append(f"content: {message}") + return "History appended successfully" + + +def concat_history(message: str) -> str: + history_id = get_current_session() + _history_map.setdefault(history_id, []).append(f"user: {message}") + str_message = "".join(_history_map.get(history_id, [])) + return str_message[-6000:] if len(str_message) > 6000 else str_message diff --git a/EdgeCraftRAG/nginx/nginx.conf.template b/EdgeCraftRAG/nginx/nginx.conf.template new file mode 100755 index 0000000000..005420e386 --- /dev/null +++ b/EdgeCraftRAG/nginx/nginx.conf.template @@ -0,0 +1,37 @@ +worker_processes auto; +events { + worker_connections 1024; +} +http { + + upstream multi-arc-serving-container { + server ${HOST_IP}:${NGINX_PORT_0}; + server ${HOST_IP}:${NGINX_PORT_1}; + } + include /etc/nginx/mime.types; + default_type application/octet-stream; + client_max_body_size 50M; + sendfile on; + + keepalive_timeout 65; + keepalive_requests 1000; + server { + listen 8086; + server_name _; + location / { + root /usr/share/nginx/html; + index index.html index.htm; + add_header Cache-Control "no-cache"; + try_files $uri $uri/ /index.html; + } + location /v1/completions { + proxy_pass http://multi-arc-serving-container/v1/completions; + proxy_http_version 1.1; + proxy_set_header Connection ""; + } + + location ~ /\. { + deny all; + } + } +} \ No newline at end of file diff --git a/EdgeCraftRAG/tests/configs/test_pipeline_vllm.json b/EdgeCraftRAG/tests/configs/test_pipeline_ipex_vllm.json similarity index 82% rename from EdgeCraftRAG/tests/configs/test_pipeline_vllm.json rename to EdgeCraftRAG/tests/configs/test_pipeline_ipex_vllm.json index 21d709943d..f49367612c 100644 --- a/EdgeCraftRAG/tests/configs/test_pipeline_vllm.json +++ b/EdgeCraftRAG/tests/configs/test_pipeline_ipex_vllm.json @@ -1,5 +1,5 @@ { - "name": "rag_test_vllm", + "name": "rag_test_local_llm", "node_parser": { "chunk_size": 400, "chunk_overlap": 48, @@ -31,14 +31,14 @@ } ], "generator": { + "inference_type": "vllm", "model": { "model_id": "Qwen/Qwen2-7B-Instruct", - "model_path": "./models/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights", + "model_path": "./models/Qwen/Qwen2-7B-Instruct/", "device": "auto", "weight": "INT4" }, - "prompt_path": "./default_prompt.txt", - "inference_type": "vllm" + "prompt_path": "./default_prompt.txt" }, "active": "True" } diff --git a/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh b/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh index 43df736fd2..c3fe7785e3 100755 --- a/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh +++ b/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh @@ -18,19 +18,26 @@ LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') HOST_IP=$ip_address -COMPOSE_FILE="compose_vllm.yaml" +COMPOSE_FILE="compose_vllm_multi-arc.yaml" EC_RAG_SERVICE_PORT=16010 -MODEL_PATH="/home/media/models" +MODEL_PATH="/home/media/qwen" # MODEL_PATH="$WORKPATH/models" DOC_PATH="$WORKPATH/tests" -UI_TMPFILE_PATH="$WORKPATH/tests" - -#HF_ENDPOINT=https://hf-mirror.com -LLM_MODEL="Qwen/Qwen2-7B-Instruct" -VLLM_SERVICE_PORT=8008 -vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT}" - +UI_UPLOAD_PATH="$WORKPATH/tests" + +HF_ENDPOINT=https://hf-mirror.com +NGINX_PORT=8086 +NGINX_PORT_0=8100 +NGINX_PORT_1=8100 +VLLM_SERVICE_PORT_0=8100 +TENSOR_PARALLEL_SIZE=1 +SELECTED_XPU_0=0 +vLLM_ENDPOINT="http://${HOST_IP}:${NGINX_PORT}" +CONTAINER_COUNT="single_container" +LLM_MODEL=Qwen/Qwen2-7B-Instruct +LLM_MODEL_PATH=$MODEL_PATH +NGINX_CONFIG_PATH="$WORKPATH/nginx/nginx.conf" function build_docker_images() { opea_branch=${opea_branch:-"main"} @@ -41,30 +48,29 @@ function build_docker_images() { docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s + echo "Pull intelanalytics/ipex-llm-serving-xpu image" + docker pull intelanalytics/ipex-llm-serving-xpu:0.8.3-b18 + echo "Build all the images with --no-cache, check docker_image_build.log for details..." docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log - echo "Build vllm_openvino image from GenAIComps..." - cd $WORKPATH && git clone --single-branch --branch "${opea_branch:-"main"}" https://github.com/opea-project/GenAIComps.git - cd GenAIComps/comps/third_parties/vllm/src/ - bash ./build_docker_vllm_openvino.sh gpu - docker images && sleep 1s } function start_services() { cd $WORKPATH/docker_compose/intel/gpu/arc source set_env.sh - + envsubst < $WORKPATH/nginx/nginx.conf.template > $WORKPATH/nginx/nginx.conf # Start Docker Containers - docker compose -f $COMPOSE_FILE up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f $COMPOSE_FILE --profile $CONTAINER_COUNT up -d > ${LOG_PATH}/start_services_with_compose.log + echo "ipex-llm-serving-xpu is booting, please wait." n=0 until [[ "$n" -ge 100 ]]; do - docker logs vllm-openvino-server > ${LOG_PATH}/vllm_service_start.log - if grep -q "metrics.py" ${LOG_PATH}/vllm_service_start.log; then + docker logs ipex-llm-serving-xpu-container-0 > ${LOG_PATH}/ipex-llm-serving-xpu-container.log 2>&1 + if grep -q "Starting vLLM API server on http://0.0.0.0:" ${LOG_PATH}/ipex-llm-serving-xpu-container.log; then break fi - sleep 5s + sleep 6s n=$((n+1)) done } @@ -112,7 +118,7 @@ function validate_rag() { "active" \ "pipeline" \ "edgecraftrag-server" \ - '@configs/test_pipeline_vllm.json' + '@configs/test_pipeline_ipex_vllm.json' # add data validate_services \ @@ -127,7 +133,7 @@ function validate_rag() { "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/chatqna" \ "1234567890" \ "query" \ - "vllm-openvino-server" \ + "ipex-llm-serving-xpu-container-0" \ '{"messages":"What is the test id?"}' } @@ -137,7 +143,7 @@ function validate_megaservice() { "${HOST_IP}:16011/v1/chatqna" \ "1234567890" \ "query" \ - "vllm-openvino-server" \ + "ipex-llm-serving-xpu-container-0" \ '{"messages":"What is the test id?"}' } @@ -148,7 +154,7 @@ function stop_docker() { function main() { - mkdir -p "$LOG_PATH" + mkdir -p $LOG_PATH echo "::group::stop_docker" stop_docker diff --git a/EdgeCraftRAG/tests/test_pipeline_ipex_vllm.json b/EdgeCraftRAG/tests/test_pipeline_ipex_vllm.json new file mode 100644 index 0000000000..f49367612c --- /dev/null +++ b/EdgeCraftRAG/tests/test_pipeline_ipex_vllm.json @@ -0,0 +1,44 @@ +{ + "name": "rag_test_local_llm", + "node_parser": { + "chunk_size": 400, + "chunk_overlap": 48, + "parser_type": "simple" + }, + "indexer": { + "indexer_type": "faiss_vector", + "embedding_model": { + "model_id": "BAAI/bge-small-en-v1.5", + "model_path": "./models/BAAI/bge-small-en-v1.5", + "device": "auto", + "weight": "INT4" + } + }, + "retriever": { + "retriever_type": "vectorsimilarity", + "retrieve_topk": 30 + }, + "postprocessor": [ + { + "processor_type": "reranker", + "top_n": 2, + "reranker_model": { + "model_id": "BAAI/bge-reranker-large", + "model_path": "./models/BAAI/bge-reranker-large", + "device": "auto", + "weight": "INT4" + } + } + ], + "generator": { + "inference_type": "vllm", + "model": { + "model_id": "Qwen/Qwen2-7B-Instruct", + "model_path": "./models/Qwen/Qwen2-7B-Instruct/", + "device": "auto", + "weight": "INT4" + }, + "prompt_path": "./default_prompt.txt" + }, + "active": "True" +} diff --git a/EdgeCraftRAG/ui/vue/.env.development b/EdgeCraftRAG/ui/vue/.env.development index c2237f95f5..d500c854a5 100644 --- a/EdgeCraftRAG/ui/vue/.env.development +++ b/EdgeCraftRAG/ui/vue/.env.development @@ -2,5 +2,5 @@ ENV = development # Local Api -VITE_API_URL = http://10.67.106.236:16010/ -VITE_CHATBOT_URL = http://10.67.106.236:16011/ \ No newline at end of file +VITE_API_URL = http://10.67.106.163:16010/ +VITE_CHATBOT_URL = http://10.67.106.163:16011/ \ No newline at end of file diff --git a/EdgeCraftRAG/ui/vue/components.d.ts b/EdgeCraftRAG/ui/vue/components.d.ts index 05af641f82..53696ff831 100644 --- a/EdgeCraftRAG/ui/vue/components.d.ts +++ b/EdgeCraftRAG/ui/vue/components.d.ts @@ -18,17 +18,25 @@ declare module 'vue' { AConfigProvider: typeof import('ant-design-vue/es')['ConfigProvider'] ADescriptions: typeof import('ant-design-vue/es')['Descriptions'] ADescriptionsItem: typeof import('ant-design-vue/es')['DescriptionsItem'] + ADivider: typeof import('ant-design-vue/es')['Divider'] ADrawer: typeof import('ant-design-vue/es')['Drawer'] + ADropdown: typeof import('ant-design-vue/es')['Dropdown'] + ADropdownButton: typeof import('ant-design-vue/es')['DropdownButton'] AEmpty: typeof import('ant-design-vue/es')['Empty'] AForm: typeof import('ant-design-vue/es')['Form'] AFormItem: typeof import('ant-design-vue/es')['FormItem'] + AImage: typeof import('ant-design-vue/es')['Image'] AInput: typeof import('ant-design-vue/es')['Input'] AInputNumber: typeof import('ant-design-vue/es')['InputNumber'] ALayout: typeof import('ant-design-vue/es')['Layout'] ALayoutContent: typeof import('ant-design-vue/es')['LayoutContent'] ALayoutHeader: typeof import('ant-design-vue/es')['LayoutHeader'] + ALayoutSider: typeof import('ant-design-vue/es')['LayoutSider'] + AMenu: typeof import('ant-design-vue/es')['Menu'] + AMenuItem: typeof import('ant-design-vue/es')['MenuItem'] AModal: typeof import('ant-design-vue/es')['Modal'] APagination: typeof import('ant-design-vue/es')['Pagination'] + APopover: typeof import('ant-design-vue/es')['Popover'] ARadio: typeof import('ant-design-vue/es')['Radio'] ARadioGroup: typeof import('ant-design-vue/es')['RadioGroup'] ARow: typeof import('ant-design-vue/es')['Row'] @@ -39,7 +47,9 @@ declare module 'vue' { ASteps: typeof import('ant-design-vue/es')['Steps'] ATable: typeof import('ant-design-vue/es')['Table'] ATag: typeof import('ant-design-vue/es')['Tag'] + ATextarea: typeof import('ant-design-vue/es')['Textarea'] ATooltip: typeof import('ant-design-vue/es')['Tooltip'] + AUpload: typeof import('ant-design-vue/es')['Upload'] AUploadDragger: typeof import('ant-design-vue/es')['UploadDragger'] FormTooltip: typeof import('./src/components/FormTooltip.vue')['default'] RouterLink: typeof import('vue-router')['RouterLink'] diff --git a/EdgeCraftRAG/ui/vue/nginx.conf b/EdgeCraftRAG/ui/vue/nginx.conf index e71fbbc109..e4d0d7fb4c 100644 --- a/EdgeCraftRAG/ui/vue/nginx.conf +++ b/EdgeCraftRAG/ui/vue/nginx.conf @@ -11,7 +11,7 @@ http { client_max_body_size 50M; sendfile on; - keepalive_timeout 65; + keepalive_timeout 90; server { listen 8082; @@ -28,6 +28,7 @@ http { proxy_pass http://server:16010; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_http_version 1.1; + proxy_read_timeout 180s; proxy_set_header Connection ""; } diff --git a/EdgeCraftRAG/ui/vue/package.json b/EdgeCraftRAG/ui/vue/package.json index 8a215ec138..516e870406 100644 --- a/EdgeCraftRAG/ui/vue/package.json +++ b/EdgeCraftRAG/ui/vue/package.json @@ -21,6 +21,7 @@ "js-cookie": "^3.0.5", "lodash": "^4.17.21", "marked": "^15.0.6", + "mitt": "^3.0.1", "pinia": "^3.0.2", "pinia-plugin-persistedstate": "^4.2.0", "qs": "^6.13.1", diff --git a/EdgeCraftRAG/ui/vue/src/api/chatbot/index.ts b/EdgeCraftRAG/ui/vue/src/api/chatbot/index.ts index 17bd6c27e7..f7946ad72d 100644 --- a/EdgeCraftRAG/ui/vue/src/api/chatbot/index.ts +++ b/EdgeCraftRAG/ui/vue/src/api/chatbot/index.ts @@ -17,17 +17,7 @@ export const requestChatbotConfig = (data: Object) => { data, showLoading: true, showSuccessMsg: true, - successMsg: "Configuration update successful !", - }); -}; - -export const requestFileDelete = (name: String) => { - return request({ - url: `/v1/data/files/${name}`, - method: "delete", - showLoading: true, - showSuccessMsg: true, - successMsg: "File deleted successfully !", + successMsg: "request.chatbot.updateSucc", }); }; @@ -37,16 +27,3 @@ export const getBenchmark = (name: String) => { method: "get", }); }; - -export const requestParsingFiles = (data: Object) => { - return request({ - url: `/v1/data`, - method: "post", - data, - showLoading: true, - showSuccessMsg: true, - successMsg: "Document uploaded and parsed successfully !", - }); -}; - -export const uploadFileUrl = `${import.meta.env.VITE_API_URL}v1/data/file`; diff --git a/EdgeCraftRAG/ui/vue/src/api/knowledgeBase/index.ts b/EdgeCraftRAG/ui/vue/src/api/knowledgeBase/index.ts new file mode 100644 index 0000000000..ca146d3651 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/api/knowledgeBase/index.ts @@ -0,0 +1,76 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +import request from "../request"; + +export const getKnowledgeBaseList = () => { + return request({ + url: "/v1/knowledge", + method: "get", + showLoading: true, + }); +}; + +export const getKnowledgeBaseDetialById = (kbId: String) => { + return request({ + url: `/v1/knowledge/${kbId}`, + method: "get", + showLoading: true, + }); +}; + +export const requestKnowledgeBaseCreate = (data: Object) => { + return request({ + url: "/v1/knowledge", + method: "post", + data, + showLoading: true, + showSuccessMsg: true, + successMsg: "request.knowledge.createSucc", + }); +}; + +export const requestKnowledgeBaseUpdate = (data: Object) => { + return request({ + url: `/v1/knowledge/patch`, + method: "patch", + data, + showLoading: true, + showSuccessMsg: true, + successMsg: "request.knowledge.updateSucc", + }); +}; + +export const requestKnowledgeBaseDelete = (kbId: String) => { + return request({ + url: `/v1/knowledge/${kbId}`, + method: "delete", + showLoading: true, + showSuccessMsg: true, + successMsg: "request.knowledge.deleteSucc", + }); +}; + +export const requestKnowledgeBaseRelation = (kbId: String, data: Object) => { + return request({ + url: `/v1/knowledge/${kbId}/files`, + method: "post", + data, + showLoading: true, + showSuccessMsg: true, + successMsg: "request.knowledge.uploadSucc", + }); +}; + +export const requestFileDelete = (name: String, data: Object) => { + return request({ + url: `/v1/knowledge/${name}/files`, + method: "delete", + data, + showLoading: true, + showSuccessMsg: true, + successMsg: "request.knowledge.deleteFileSucc", + }); +}; + +export const uploadFileUrl = `${import.meta.env.VITE_API_URL}v1/data/file/`; diff --git a/EdgeCraftRAG/ui/vue/src/api/pipeline/index.ts b/EdgeCraftRAG/ui/vue/src/api/pipeline/index.ts index 959caa5676..82ae41d271 100644 --- a/EdgeCraftRAG/ui/vue/src/api/pipeline/index.ts +++ b/EdgeCraftRAG/ui/vue/src/api/pipeline/index.ts @@ -32,7 +32,7 @@ export const requestPipelineCreate = (data: Object) => { data, showLoading: true, showSuccessMsg: true, - successMsg: "Pipeline created successfully !", + successMsg: "request.pipeline.createSucc", }); }; @@ -43,7 +43,7 @@ export const requestPipelineUpdate = (name: String, data: Object) => { data, showLoading: true, showSuccessMsg: true, - successMsg: "Pipeline update successfully !", + successMsg: "request.pipeline.updateSucc", }); }; @@ -53,7 +53,7 @@ export const requestPipelineDelete = (name: String) => { method: "delete", showLoading: true, showSuccessMsg: true, - successMsg: "Pipeline deleted successfully !", + successMsg: "request.pipeline.deleteSucc", }); }; @@ -64,7 +64,7 @@ export const requestPipelineSwitchState = (name: String, data: Object) => { data, showLoading: true, showSuccessMsg: true, - successMsg: "Pipeline state switch successful !", + successMsg: "request.pipeline.switchSucc", }); }; diff --git a/EdgeCraftRAG/ui/vue/src/api/request.ts b/EdgeCraftRAG/ui/vue/src/api/request.ts index 95382588fe..91805dbab5 100644 --- a/EdgeCraftRAG/ui/vue/src/api/request.ts +++ b/EdgeCraftRAG/ui/vue/src/api/request.ts @@ -5,12 +5,13 @@ import { NextLoading } from "@/utils/loading"; import serviceManager from "@/utils/serviceManager"; import axios, { AxiosInstance } from "axios"; import qs from "qs"; +import i18n from "@/i18n"; const antNotification = serviceManager.getService("antNotification"); const service: AxiosInstance = axios.create({ baseURL: import.meta.env.VITE_API_URL, - timeout: 50000, + timeout: 600000, headers: { "Content-Type": "application/json" }, }); @@ -38,7 +39,8 @@ service.interceptors.response.use( if (NextLoading) NextLoading.done(); const res = response.data; if (config.showSuccessMsg) { - if (antNotification) antNotification("success", "Success", config.successMsg); + if (antNotification) + antNotification("success", i18n.global.t("common.success"), i18n.global.t(config.successMsg)); } return Promise.resolve(res); }, @@ -53,7 +55,7 @@ service.interceptors.response.use( } else { errorMessage = error.message; } - if (antNotification) antNotification("error", "Error", errorMessage); + if (antNotification) antNotification("error", i18n.global.t("common.error"), errorMessage); return Promise.reject(error); }, diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.css b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.css index 0fd282ff5e..d2cdcac199 100644 --- a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.css +++ b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.css @@ -1,9 +1,9 @@ @font-face { font-family: "iconfont"; /* Project id 4784207 */ src: - url("iconfont.woff2?t=1739238081968") format("woff2"), - url("iconfont.woff?t=1739238081968") format("woff"), - url("iconfont.ttf?t=1739238081968") format("truetype"); + url("iconfont.woff2?t=1748479964596") format("woff2"), + url("iconfont.woff?t=1748479964596") format("woff"), + url("iconfont.ttf?t=1748479964596") format("truetype"); } .iconfont { @@ -14,6 +14,94 @@ -moz-osx-font-smoothing: grayscale; } +.icon-newChat:before { + content: "\e6c7"; +} + +.icon-chat:before { + content: "\ecb1"; +} + +.icon-knowledge:before { + content: "\e6f2"; +} + +.icon-system:before { + content: "\e799"; +} + +.icon-chatbot1:before { + content: "\e630"; +} + +.icon-lang-zh:before { + content: "\e6c5"; +} + +.icon-lang-en:before { + content: "\e609"; +} + +.icon-exit:before { + content: "\e6d9"; +} + +.icon-loading:before { + content: "\e61a"; +} + +.icon-success:before { + content: "\e8ca"; +} + +.icon-results:before { + content: "\e603"; +} + +.icon-rating:before { + content: "\e7b9"; +} + +.icon-chart-line:before { + content: "\e790"; +} + +.icon-export:before { + content: "\e619"; +} + +.icon-rename:before { + content: "\e618"; +} + +.icon-delete:before { + content: "\e664"; +} + +.icon-setting1:before { + content: "\e61b"; +} + +.icon-upload:before { + content: "\e617"; +} + +.icon-clear:before { + content: "\e765"; +} + +.icon-copy-success:before { + content: "\e666"; +} + +.icon-copy:before { + content: "\e660"; +} + +.icon-subway:before { + content: "\e6ed"; +} + .icon-stop:before { content: "\e904"; } diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.js b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.js index 5c567a1196..79fad33e63 100644 --- a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.js +++ b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.js @@ -2,67 +2,67 @@ // SPDX-License-Identifier: Apache-2.0 (window._iconfont_svg_string_4784207 = - ''), - ((l) => { - var a = (t = (t = document.getElementsByTagName("script"))[t.length - 1]).getAttribute("data-injectcss"), - t = t.getAttribute("data-disable-injectsvg"); - if (!t) { - var h, + ''), + ((h) => { + var l = (a = (a = document.getElementsByTagName("script"))[a.length - 1]).getAttribute("data-injectcss"), + a = a.getAttribute("data-disable-injectsvg"); + if (!a) { + var c, + t, i, o, - e, - c, - v = function (a, t) { - t.parentNode.insertBefore(a, t); + v, + e = function (l, a) { + a.parentNode.insertBefore(l, a); }; - if (a && !l.__iconfont__svg__cssinject__) { - l.__iconfont__svg__cssinject__ = !0; + if (l && !h.__iconfont__svg__cssinject__) { + h.__iconfont__svg__cssinject__ = !0; try { document.write( "", ); - } catch (a) { - console && console.log(a); + } catch (l) { + console && console.log(l); } } - (h = function () { - var a, - t = document.createElement("div"); - (t.innerHTML = l._iconfont_svg_string_4784207), - (t = t.getElementsByTagName("svg")[0]) && - (t.setAttribute("aria-hidden", "true"), - (t.style.position = "absolute"), - (t.style.width = 0), - (t.style.height = 0), - (t.style.overflow = "hidden"), - (t = t), - (a = document.body).firstChild ? v(t, a.firstChild) : a.appendChild(t)); + (c = function () { + var l, + a = document.createElement("div"); + (a.innerHTML = h._iconfont_svg_string_4784207), + (a = a.getElementsByTagName("svg")[0]) && + (a.setAttribute("aria-hidden", "true"), + (a.style.position = "absolute"), + (a.style.width = 0), + (a.style.height = 0), + (a.style.overflow = "hidden"), + (a = a), + (l = document.body).firstChild ? e(a, l.firstChild) : l.appendChild(a)); }), document.addEventListener ? ~["complete", "loaded", "interactive"].indexOf(document.readyState) - ? setTimeout(h, 0) - : ((i = function () { - document.removeEventListener("DOMContentLoaded", i, !1), h(); + ? setTimeout(c, 0) + : ((t = function () { + document.removeEventListener("DOMContentLoaded", t, !1), c(); }), - document.addEventListener("DOMContentLoaded", i, !1)) + document.addEventListener("DOMContentLoaded", t, !1)) : document.attachEvent && - ((o = h), - (e = l.document), - (c = !1), - d(), - (e.onreadystatechange = function () { - "complete" == e.readyState && ((e.onreadystatechange = null), n()); + ((i = c), + (o = h.document), + (v = !1), + m(), + (o.onreadystatechange = function () { + "complete" == o.readyState && ((o.onreadystatechange = null), s()); })); } - function n() { - c || ((c = !0), o()); + function s() { + v || ((v = !0), i()); } - function d() { + function m() { try { - e.documentElement.doScroll("left"); - } catch (a) { - return void setTimeout(d, 50); + o.documentElement.doScroll("left"); + } catch (l) { + return void setTimeout(m, 50); } - n(); + s(); } })(window); diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.json b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.json index f00702b38d..85f17b6ee4 100644 --- a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.json +++ b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.json @@ -5,6 +5,160 @@ "css_prefix_text": "icon-", "description": "", "glyphs": [ + { + "icon_id": "43508860", + "name": "newChat", + "font_class": "newChat", + "unicode": "e6c7", + "unicode_decimal": 59079 + }, + { + "icon_id": "6807699", + "name": "chat", + "font_class": "chat", + "unicode": "ecb1", + "unicode_decimal": 60593 + }, + { + "icon_id": "12237229", + "name": "knowledge", + "font_class": "knowledge", + "unicode": "e6f2", + "unicode_decimal": 59122 + }, + { + "icon_id": "25013769", + "name": "system", + "font_class": "system", + "unicode": "e799", + "unicode_decimal": 59289 + }, + { + "icon_id": "28670155", + "name": "chatbot", + "font_class": "chatbot1", + "unicode": "e630", + "unicode_decimal": 58928 + }, + { + "icon_id": "8358946", + "name": "lang-zh", + "font_class": "lang-zh", + "unicode": "e6c5", + "unicode_decimal": 59077 + }, + { + "icon_id": "26283816", + "name": "lang-en", + "font_class": "lang-en", + "unicode": "e609", + "unicode_decimal": 58889 + }, + { + "icon_id": "1786168", + "name": "exit", + "font_class": "exit", + "unicode": "e6d9", + "unicode_decimal": 59097 + }, + { + "icon_id": "40154691", + "name": "loading", + "font_class": "loading", + "unicode": "e61a", + "unicode_decimal": 58906 + }, + { + "icon_id": "20939277", + "name": "success", + "font_class": "success", + "unicode": "e8ca", + "unicode_decimal": 59594 + }, + { + "icon_id": "6820316", + "name": "results", + "font_class": "results", + "unicode": "e603", + "unicode_decimal": 58883 + }, + { + "icon_id": "36924379", + "name": "rating", + "font_class": "rating", + "unicode": "e7b9", + "unicode_decimal": 59321 + }, + { + "icon_id": "6151034", + "name": "chart-line", + "font_class": "chart-line", + "unicode": "e790", + "unicode_decimal": 59280 + }, + { + "icon_id": "43924556", + "name": "export", + "font_class": "export", + "unicode": "e619", + "unicode_decimal": 58905 + }, + { + "icon_id": "43924554", + "name": "rename", + "font_class": "rename", + "unicode": "e618", + "unicode_decimal": 58904 + }, + { + "icon_id": "2570142", + "name": "delete", + "font_class": "delete", + "unicode": "e664", + "unicode_decimal": 58980 + }, + { + "icon_id": "13253937", + "name": "setting", + "font_class": "setting1", + "unicode": "e61b", + "unicode_decimal": 58907 + }, + { + "icon_id": "43796752", + "name": "upload", + "font_class": "upload", + "unicode": "e617", + "unicode_decimal": 58903 + }, + { + "icon_id": "42194548", + "name": "clear", + "font_class": "clear", + "unicode": "e765", + "unicode_decimal": 59237 + }, + { + "icon_id": "1198529", + "name": "copy-success", + "font_class": "copy-success", + "unicode": "e666", + "unicode_decimal": 58982 + }, + { + "icon_id": "9080698", + "name": "copy", + "font_class": "copy", + "unicode": "e660", + "unicode_decimal": 58976 + }, + { + "icon_id": "796912", + "name": "地铁", + "font_class": "subway", + "unicode": "e6ed", + "unicode_decimal": 59117 + }, { "icon_id": "42853460", "name": "stop", diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.ttf b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.ttf index 38480000d31e32f85f015b1177cce49b56053dfe..92ef29764f71fcaf190bc315c5de41309afbe3fc 100644 GIT binary patch delta 5919 zcmbU_4R9OPneXkBcBPfHtCdz-Nw)R3mSjh^vAz0>lQ>So#au8n6Ci{TE;f!52b@@r z4J7TfYdUZPfzYH5ZJ~F$l)Ltz=`jT8-KFIy!zC##;U*Uvdi<28S8hchrBhnwaulua zd%F%KQ*MSiX}|aL?fX8x_x(Se?0x!%y(vBVxjPZU_alUcN4JgCe(w9?TL>|Kg^=T! zE#td3zc;V|UU5$&lsq_A9odwB&GCIeoB=3}0fPG_`5ug)0caiDHofx+JZA#Gb1;8m zd}4Iu$nX=d0R1|cuirMZvxZlqFT(o>%#+(kwpE+`f4lezgxao!AKg-$n413WFWf&t zXaO+9=l4CMuINA*)&jio@X56s#l;^Wj{^`0y>-*@h70}N+??*f1Wtlnh@n{cP`VB@ ziTtx^e29>9hi6aXzF89|XSjhK)_eW@D={ZG6etZyYg>89z2&HGX5fUyszg>(%<+`h)d{ z>W|cq)L(!5r)M4S++3>%~0wf~RrI%Izl@*oan zQ3fSZ6h%+~iAX?hB*B5xs2#PT5b`4#sVIe7P!2UC7DVJiVMLN>0g9s_@}WF(peE!+ zttbI8`CfPPRN4yMxvRE6qQ8$fBO zzSjm&AF4lS1657^AsaxYsQ!ozpj=dk%~1d~qxx$$fWlGzbsIqS=v+N$aCSS_I-h0J>?!s37lMOAFenTLo8&=XFz6eU8~>Rd$ey9dGwnlt ziObgLy&=XUN$d>o+h5sO4mJm2!TwD1T9FBOo3*wO^T$gVG*Onn%ME0MO~FLpQ2URx zb$#5h%f}LS%vJe|${6K620WC(%e_K4r!`Zp&;5r3kvxRGP6P&=DU?g_?JbvzT{@$t zIhLc_4<~R~frew1aPLwW(*@a_vyjW?veqJ5fdv}8E0q-uPF~FQV|-orWuIF!%yA@G z-FMCCwQIV2+S_@KXGOAvbvt~HJ3E)LoWt`0IoO&1^P$IwR^>Z8^Q(rgPs^grkLn!j zBB6!L|NH6{%W`cD>)=;S4Z!;=*Rkc%cHYCY9y0B96LLd<<%maJ+5TsX`_ihK?pu5> z3|buwadtRe+_KhNAveKEkC6MxAh>V@3=m8^sHWMFB!?GatV%gfN=d2SQc)^oNvaDM zN}8UcYR_4daw?8x_H-Su$;0-Dou&hpzbeZK;5GHJbOHrFE zYh|##oCG7RavA}Hf+NUbO#!(p;WCwbp$jZ6l*^Lqvt?Y&9u=p30m<7qLEFv#CoLp? z{GY|2k4o;lcKamB_f5$?HRG1Ejku}_QDLrZ#h#)4l5z8*pG8IlR}B`jdQtTF~`WcVkB>d znp$JAObn-<48&pq+H?c7HpLoW6J)tr#zAt6HIU8SJu7>f!r+oo0@NU1GW`((f z6!xuP0UB@K?NNlvw>DFEcm z@}=8q=0~+{OPA9TcF>WToY=UbRH>9UY<%;=VCBH6Qv;{W_xA7q+5Y`_=F68-ShV3w z2rSC?HgBFo$B$!lRKMq*duTMIuJbo=isAWFK$!TKsl5eV1^K_6z6@sz5Y)idL~t>g%~+lYwhZH7 zD&0jxJT+E`k@E(gRl(q4ZsiI{5P*(jh%*8{<8vS6`Gf9QkiYj{K8R-C)S&}RM+^@`6YF@6k^+Zd%)NUzFgZw9$Aabzyq%#WR>OQUq%YuU z&>qn;W9~FL1hJkbV;{I_4C|a;N40pp(PT9xGBU7alGW_R0PK&sZ_)?l=Rq^ z;L4sGmPp+TJ9}HxO%D+F1btOYXz|8=SRU6B%y`FuLrJBSg|so(-EvFwwqzFeCUUQ%)}j?&0)X(Rc}IiskzhX@Qk=zBR(l2D=$e2?^lBr+3$!mvhc&0)Ho4r z3Iv*h^M&IS^LvQ;P#c$1p_HTrb7iHhklc&Rqm5Yq_1gE|c&_Knt5^0jM&o7meC@-m z`HR}T1pWZU33W@j0hEDKl3CqWihG-b?b}LnJdUHWC=;Qcy7?;#!lX~f)JKX6T%H}l54r&-xAY_;JVT+lDO#idarc( z*$Pl3StsWlbbBy^UEC_C14>gMtgwD=$fJZ}L~yq!%gJPUfHuU-yRUM%+^U1Hi=BSg zpb!oui0Hb;|Yfw?_ntFgnAqByVl6HnSRxo7SHP&nNo*}>dR zld&7KR&`P8O<=3QAUB(v{CV+Be?UM@7Ko6M)$%#yfzX7M0II9v^@V z-U1oC7xe?uis19tQX1B&wOcpV3|wocLm`iag zTpmEb{S3GP12rYR)q(G1xKs{5{xH~#=V4wHr^QgHB^++UFPIhlawr~x=eL`0T7O?Q zm&@L_{-&2c8Qu~y?+tC$Z%wv^6DeOzY%gSTOFPwgGA6Yr$IZS7zCvN{7s3KO=I28# zz&M1j{ra!yFN8wOH!XfAD8IHg%Gw&{KAP?6dB|N%vklm;wx80S|Kx!GW9FVE&y(d)v(Zwd0Ds&Kq1EV`*{k{Z>^8nhx9+es#}}ni zmgH1U4QfFR>T52?k=+$Y!;_OC^`mnzO4Mp$mB@ZcnVbZC<)d?IxK@MYUSYn`cq_V~ zt%(W8vyRN~E477{$=YPCQu$S-!kn^KtBot{#cDVhkSmE&>Q+VzrPK;Z_BKsU=A(^w zQf-+y(;Qo1IwvRb8uM|y=CQ{fd#daS`*6qX)sYSzM{pF!a1(BZI{?k{Nu0uItm0Oj z!C9QcZMYpTfGcbV?nK=7>K!Y_My4I3(712iK5@r*b<>tAJGELzF~7i7p(V5jcY5%x zhibG3=|K#7i6GDyLujd>SD`nJKH-io;Y;>GsM{HDK6;nm&N+9^J?H%Ao~3K6b@$5> z;r1&4y9c1RFBTo2c-nIbkj?`#21h0bUhcFe09PEy>lo^f-mH4{mwbFC8i#1Z^GSY0 z_6kux6pK%EvGgA0`%Zq>$XH+W#C-Jxou_=LiA5*ISs0V_UL(JBG#cwK%zM)O0tmjP zqJ!gOx8l!_?-l@Bgn~j3R`hc!7{wA7Lkk-{xy?V3rO*cOZS}_4gJ(LOPAE*iOMV6< zXP1_9PDKnER){^5{i$hdl{HzalyBXZPK~nd{QzK#lscgidS=q8Z76t6eh8&xNnGV^ zypwnHVIJpGe1ay2S=!1WwAx^vIO(e8D|BZE(DYVJRqQK;Q0ba9e3ggssOGE zoDas^1XLBgQ$Y2>y9Lw<_^^O_0;ho>XcV%@m=aK<;4=bh8GKnl&BLY}cOV(CYXu|- z_N>4WU@r(rBq00FK5 z00g|(4SnQiY%eWnp9h02!bF001!n001^C+61*|Xk}pl z02$N(0015U001Nh-vg{@ZFG1502%ZE000;O006MC0ssDNZ)0Hq02&Mc003G5003IK zV6fJ0VR&#j000_j0000V0000W0Y(6QZeeX@000_@0003%0007K5TiV>aBp*T000}v z0003{0005_@7)Q>lL!H4e=)z=$lUP1fq{Yf4nreQm=Q%0699K|4I6lz)zrC896=bw z@&6viUK?MW3;4cm9PA@Fa7c`tAQ1o(IdVuwP5=SvMotO$0T2>z0OZwxAmIgYHAUbU zJMpxBtDTmnr>DQ_2JQeY`)X}y{-WW3H2UssT=%!Qf8F11{Pg{If4=#dwp!|FMm^1G zUJF{(l2){;HLYt)1MO*Fhq};Z>6TtuE}Lb$?38{Pl;e|w)ArfM`R>&fT#qtxi&2jL z9ObXChQF$|`CGIfHw_+emk+$>6;pH>;W3Zs@YJjIhR=LroF|O&me)MzBhMJ}d+#yf z_5H#NMtR7rD=@<_fA4tdHJ@afrYAS_55Dk!zlQ5ibL{4Bwk9XM>ZlW5e>4)V1R4$3 z1dWBOgT}*kLKESNp~;|*(G6D-O@-@;ri1#0*o9ETIn$wAcf=CmE&1)Ob!_XFp< zLGw56M&^G20C=30ISF_h)tU3=Ni#ZUG@45q$&yB6jiktyZH*4ccN~Zl+krqxa1tO6 z;Rq1G0Sc5te{pCyk`&63r7i8IVHa9x3JVm{mTpTHLduc$OQEpcZI=y{QYiW81ubo( z@&0clJ0Ydt*R^N<_rL$W|IIu8^NFG<_`{!~|4PNF35r4`r3G0fMPdp_mYB5JkBqcg zuJ)ozx#}(tpk7p}mdyf6b94)m5-5ohbafCxhtdG*e@A6A%>bQ4(1Sj1ZLV7+|j+ZCdNf*-;s&06U+ZQmb%l`r1 z)m~UK{_yx{p}oB@I(~Xu;w9gDmhoVD{_vkpe_FU8o1+<*Z}jpZXdatjYU}epzmM_b zD*|4OSA`iC`;A5U6Z?A7hLP^++X_gli^lFYw})NOdJ#prV4dH=pT#4Tmx@rWR3}A= zMw*cYN$Ew%5VEYG3wo(q5y~dki>O?+baLcb2e0V~Br&gOi9#Wv(HD1K{Ym>*bPv?} zf9z4#=VQ?xkI&~h2FD|XWSvPA@U8X-mtKneM=mcU=)WWj^*fNC_rtS;l!ySzIfL)v zCvc8RQU*2TNSrE23~Q*4*s(ehx0+Q0vC9g8A%nFr$Pz3-QY;lVT~%si)5}E=GC6DF z)6E*HnEUuEf?*+0KS1Dlp~oB|didM?e}AqQyj!*f1tIt)!F&0%SJ>|5JRZ(_^<93P ztC;l{%_!$6|BA06P4hY#@{^sYd8VEza3Yq*mNQBf*G z6{%ioF}0Ff3oFqZdu%nfwus#2Dp^w6bT@T~)M{jp3F0QoMbH++0qa~vVjL}!ef*s+f*}{d?9`=e&!~2HSI=1c+AOkY=%ts2 zUb5fYvEwH@cA)7SKFZ->%10O+K4E&t=|u`fieKY8}qpP|-qugXbD|M;TJ{AgbG{=;jl z??hXNCx$N?zM~imA=&8Mj9TV(hU11#HT3Tgz8CA1K($ayDXNz4KxP?C8HgDVx+O6g zM+F-uLDbVlG$)y}qHP+we`Dqy{{T(_$O#0Z&!Iy>?=GKjmsb;gTetc|bXWZ#G1qh_ zz;D{Sv^GuDppGU9)!qdV2yGgg1faF;C7dvTsCl<-gE4lI&YEaXL+do*Z~%Q2op#u? zIw>j8NNVO4yc_I0MFprJ=%+@RRF<-+GFX}gu-cQT)zlj5d^lUHf2#xwFuKyP0O8mQ zlDi~uUX`vg=&OEg1*u&MkC<$i3A^=?ikt0XU>9p$Rd5~XBb8SQ%|G;NVY&cMeY#Pi zUnUZXHSi`@C+%LRa0oO?B$BHW4(`xNA0Xhuas1TW$oj`~eE#SM(6dKB0J1(aGc)Z% zcr)x%6!xb=Y?q`ae-M%B0Yrqlh={A>zy~6bk0Kf%jvi`k8k%b20I6Vxrfe#gz3q&( z2iKl)TQ+wLihdFF-OstiRPg?w+vk^pzM$KC$eYp_-~R^C^oHa6JE;8%`$=C)bNfE& zOL9D7y{RDT4Kgr@{cMPu^&RKog>bI(!0u9_+z`orG%J4wf00+mZ!{#B2r1CailC^D zug@|>e<3vatFEGLs-Cox$#TNx66FNyKUHsn%uU7b&WJ#pcM@rxH+WyX|3x>L`ztcN*6d^PhV-iH@a zU5-^$$UZjqf075g4AT`=GYS9@Fu-5E#< zKa97#UH)|*&$^%xk<=dw$-tdrw-jr-Ce#mzf3a{lCQ^+U@pAePVCjQkF?3lM zEHPVCYbwsZNIy{522QuW@$MFmSF9puU-}j)w@HHWoY|;`(69p{X54|+1^DQ8=Bl=9{{!VG;H*1_@8kSY*&iv zAQFITe{3zwBLthtC?p<3Znj3YmTaJ`s1{;tJgq`YnQSs9jm|Es3jJqEl3o&9?WZ(< zH>)d88yZ?*-_CKI&p(}Uvl$dAqs7rk#E@R9MOs>hlBZts_SA_4W~n zfBHTipF87Q;f&W)Q%*!j1RV6uDfydmnoEi-(%Q8gd!H+sf5u_d} z4TPwGqqU}^56PxZSR%MrNIn`-AYJhAIL@o#1s;)^&tRPN_}CGT7rA|&g%~qT8e?{d z6+FYCP>Y5+&zP6T2|g4EjCn##H!!3ae>dwM@%j;sJnX331yLyQbu%G$+^@>O;?1XO zsZ?!<037gnm%2P&!-bhXcgQosDd8mMJPxv{Fp~zdfc?j@UZ?7j}IZAXBoq| znG}p!W(3coV2~GN%zMW@0hb~AIqn4DsJO|*1D?fRpZ)7pb*P$h05!M2=gjI5e^MO{ zDkAX2RSQ{J7GfuQ=YBO*zbT}uA$0DQJ9l2W3aaP30@~ z-Qj37JPi^2iUv~ONBq^-(R4VvqWFkf2&sB)SAs?Zd|$Q zY$x!Ym)-u2rJDF#QFFkgnc1>()xK3LZ)`P5Bz(iV)hliqr6-!rn%MaEF8n%ty8tD|PR424y*dm=Bplc%k{!NSFDyJe`>=Jo$(zn6CfZ%(B&?HU*TGVsZA>PLAO zT~>R}B}~9d5AZG~uSEIN93H|ucE2nCj2OIG(^9Ex9?8$@{4RU|k2mHnX4S08u*9Pc zfZ?pxVB#=+l09*V?4k-2hv}!m&akosVMVRl0SWNe2L}fge~WhIUPwyP{Pmx|d%w%I z|L)5sq=?dj8R_ybZh84Sn!fJkE$eopKpqFTB$JwUm5Y8F_#`RyJej&Oe?7)OxclxO zVARjM+<7I!Eq8bb@94d*+*9G;7EMbguX-rw#Aw&!|AyEy3?BLf*mV|8oXipCL@tVo zD2=jk{FT4Ce@>~XL{BnlLU%4^9Px0HFnaTzUVlXZgP2-udg! zdFPyU-Mzit>(2SarRxv!*|;58ebRVadwbjXNlz>tZz~o7p$DGm^&?29XtctQdr`y}d=2(|Lane|OfBWp2JP)?B2D!vCu$_jpgbtd_ zx9UWf)rzC}#C;P%b`}Q0<*cFcFCXL7vpSeIl6evL*n;qCQx#j~LO>K)zZPuQ+9C;4 zP!yrQRFGxCOhnqW_F$BC2PJQtnw7OIA=8foJYat>%%97*de7p;@WS5C&R8)h@N6Iu zW>WEDfA8qoQVLE}I(xLY7*8=_zn9^}z`S@zX<$UsC3w2pCBwVi=UdJheEV!Z{1kow zF91K0glw2tHRGs)lAAvLxVpQM6M`O-tZ-JJ=jz8^j#Nzy8aJ1DHoXOXNE1|5Aiy4r z5i|z=j5g&&MEH!L9tFulDk0Nv$w@l3!TvGJf91IQIgS+9q9EUv#6xQ6Wa0@w0xnF* zoDhi!kvYK0iIiGDO-{gIkH`u8N&CkP&)*N@KmkLAl5Ng?&-d}uco;HDk}H)#rpMv* zm&1O@R;6(XWt9|EF=U8Hz*`wtxfrb^OjqU)-Ba_s zH%)Dt>h6BMyPJNg(c7pmZge&jF)Vc_tNKN>B^%v1Rf=ueREXApt>-ccI4+^y8YCN#0!D$3A$_vU`7RkRyZ1ge{SC47#Y)m z2$NjFB@j8qTBQhftmdRVSvV(^RcrL)ZA)YJ9(4QuR2IEdKTN+xmru7Bp*{BK&wsvs zJDRC1EKihGIkvRTI=xJz?6y!)p#59t zPRFJQJcuUiyXZ-Kh*VO+xx1az+#QXB?ds6cUWa)|?-*(l0nkJCB!ap#B7jcl>huqa zlVWhj`2+$;(K9nM*Sjb@4eLCI*djAqYvgo15t>e|i9sb7%0k%0abN26^b0Gx_#HVl+``&u8R5r7X*tNYElT z>bIK%PrJ-%0wzIhtYe-vk-T8Mdr ziL~mbH-QLE5kW$?f9oCgu$$vNoWxy~DD_z_EmmJCQEKfnjIP!OMB9Rqn17@FP0qL1 z$32qBw~WnIWIZXkqRl%|;_xuJ4U1DLa`WrlBoXfpRu4h60#P_p6Lbc0El4_C)ZoSi zHZ?Xh*lPBizo6|4tH*9!D^*sO?A^O}Pp?GOqAO3x=N2wnf42DA72GLjExd9(u;ijs zo_unqkuEV{myY1i!da1=VIE}t<3y%wL@*(#t=9BL5CNf*BgE?L?+9oXsinyeCC(8# z49$F_h%ybk?EK8oU_NfpE8@xuu5W375Pe#S#})gAVE@uSZiN!xA1}n=LHS16W%dW{ z8%P)Q>Dew0f2;k55+hyKkWsjs2=yk8bU@<-zgmf>bGbD1a0*!_gT+x$iN)Gtu{-l= zDAM_6G1~an`eQi?aVPe86F-aNa2_SFY#`rmCs?RDR|-PorUOoe#O*aAfe>c#vp-up zw`d@nqN0lAa?R{O26z_d-m;1ygzxh5iqaMg{7W#{rYOAcPLW=+WJ#+x9DYP9iXkiv(}k$6N9(T) z3pgYerS+Ol8|lPNFgP*{DCv19W&ccV3&Kd6rtscin~E|i`_D_7_nQ0h<9HrOUmkKu zDwF-VfB3lC2$0EzJ4hplC}>^HBvB>!DQK?1_L%rXef^Iy*Z~K4r4! z;`TR!hIZulOFnhiwTl;Dd)KFyT*q>*1nc$Cte=TtSLpTPRNv$*gTra2-w0|3dT!L= zAmE_O&2|rY0?5bCFR@%RZ~qG9SL>*gAs$*!f1OWV1jISBP?_^SvySEO(JFI1vu*NR zzYIiKXx8>L47w!(O$SGfAKNO%Vq($WTO=9ZHUgXq5S^BGbjcd>GuN90tU;Hw1M$e7 zBKq>t?nQJ-u{i1U-@3KQb{xKETgCikrdrMX(yVOT(`W|1KOOC1dyB=QbB@#aTi~NG zf7oL`*pZCj#_h6aLqZMfb2tp*K7aAjfV%I^?m5(w7pL{%}@c4 zeV;$TKEx=@L$3b=@wqw+004NLV_;-pU;yGfvo3_i^V@u7;AUX}fhVyEF);f7fB*mW zER4*JKrROZ6G#*QTf_`j0001ZoMT{QU|??e-@p*V!tfsmSQr_AA}Bxy0HEszk^lgB zoMT~NU|=B%|Nl>%I&v{_<^uqeNd&C`00000b^z)DI02XfA_9a0_ycAG`UF-4umutY zFa=fxz6K%&(gyekAO}JRo(M1qN^S_u2`CAc3JeO63knN53vLVC3{(u{4T2524oVKf z4;~Mu56BP_5KsUBc${NkWME*>VJKt}U;qIoAm#!>28RD&J_7(5X92{s-ws3pe^1GF z!ypXwv<@V8y6^kGmqWgxUqV1lcqQNmI7`32c9YjrbwI;N8jZ5GY#BxNf4pN2>&Vdo z14kD<6e!Wh1~##UZR}tdd)UVT4snEIoZu8^IL8GpafNH#;1+kd#{(YmglD|q6>oTl z$hf1~$5^IZHBax$`I#j>A{G}hf6=7dvO}K++)6hR-{WN_m($!NZoSmTjd-k;q8NFg zSlbj?kg1`IW;i5ajUz_$%7?@PIXNMw$x@Nnp5bt9t=$>l+J4sl?;u3@waZU XS^1bm6?}V>53KQPrj%trC?1bCcwtg* delta 3885 zcmV+|57O|MI_x17cTYw}00961000o|01E&B000?KkrX|DC{0ykZDDW#00D>q008~~ z00RJcre9HKYaBp*T000mm z0002X0003Ztu6f^5p4E%VU-I6_S0x=Lq zpVZ-wC^!f;9S5MNrsD>10d7H?V?aL^6sgjaZ+4Or(PviAT3gn5>^Hy- zP>7KzA$^nZ_|j@>)(ak*^^#A`-+9wt%LX>Fsm*L|3tQUSc6pe`e|eL)`EV>xtMeJG zdvVYHZ_kxTI`8A+H)^VWp`!{5T2%Dt^28&zywax69kp)w%)K)C?b=sQ{Qpt;kgDHV z?VbX?B-RV`rdU64|5gXyD>ex9;@BkctTqks9J2t=Jr86CTLf~1Ed$xY)`7fXyFdoX z!$2;{<3Lu)o51VOf7`%&Iqw5GCm#aY=U4{b)2CHH`2%4XY;phqc${@td2kcw760C@ zq{X&&<+b*ZC0W;ccO@*ZjaCQXOV}jfE5HB_#yQNDl!m0V&Cwwc1SJrlf22u87nk}yohY1&Sv&2$1oGRZWPX4+hx!Rx+ne`TALNhR%fyzjlY-}m0{ zd+&)bgiQZ{eTy9@A(AG9kSi7#GCn%8 zzuDCF`$tAM?%jTSPw#y5@GpkL!4HFBw;4C|eIuhUjE>yjZ1ncrzJ2e8^=tPJFzaTP z!$CsmyN|IKfAHNcWDy}y;z~tDm3^FMK@S+F09*{zVyURcfb-$kV$5!)lvBLOB?Z7| z8a+@bva8GGvYKZ+nO76Cym-seqvt%Hb4Tx7FNf5a$0yzW=z%wOGtBNc4s80B^FhYL zA4nv^;T<04b(E9k#H$$(lUEmSp+YFibO$`XjM5lbf8)wf*}=y>ndgK2fp9pH*l{vL z$n4(FvqROr2Tj#9e4MPPb~m&^v4k7P?Hi`S2|%r=;>L0NDW4a)q&O(0VjC2UpZE0i zC^q9spNmV$3Y`lPRaZ{O>?7-rY&`!<{ae}?7v#pB`dJs##c zl#`^ye=BtB%A!py`_hpkFR{=S@OU#yNLcO4P}zawp7gT~{JwBF9>3>_G+}YyyV<|8 zdq@LmBTLch9D1DY5%*N=fuaQ|Fwy;myotL{QM*g@0CujVD!F1_Pf>clgl=)+AyE!y z&*_f3xB|5RBX#k*$y!E@K+lVT&fbni8#ceUe|f{gj*f*JHsAG?O_KpL>iE}RH>2!@tS`6!UxgCV%wGgQB_Mwjd) ze`Pb1g*-J$E)Fqh0(QNj2Aky;e^BJ=!hCBu8;TjCqKH$&qAZI>ER+qm@{OFAmuj-A zDThr;W}e3Oyz{$;z?MLB%}s$|LtjUGd&J_!0O$8N_!3d8V_>6{KyOML2Rf{1!q-q& z;}e4Z`O&sq_cAFd;cr2!q}8mhtz9i>e*yQb9z5%_>`&PxBtYV1h>W8rwJR623-U(t zs=3#l`tTkbbUJgN=jJJ&ELKlm5Q_$jH!jr%M!tG}Y!r!kD=ez2NE>G`LWhxhRb>o0 z781WHs#ltLOClyS@5pf`@g?UqPDl%n3j*zS0w2i6*|J){0ww>q`EpDa#85~Kf6X<5 z980KEYvdRzJ1NJUGtO(iK;UuIgB_}>k7voe>FMc;2N2JG(ufhPK$fHL*O2Sc9zg-%awEpM~k&G^Y5vn!Dc0-#F_DAmK8PU({iO; zuHdI!o+jloOR9|N%kRz%LVT>YuB>(II~Y`3X7xq!N=0c7P94)jN=u5l1@%o$R4Pz$ z@R^vHxPtpp#Jx;nhC@bhZ?z+^5hX1&aWPpY3f6R7v?f6UL;3xC<> zRaFo8pV~I&_0U&vc#aA7e=fF+=V&BNB#GYaBG=HYgm+O)%C?AMRiwj$z(7()imcH` zH#F)tiC$*v)KbC1xK=VtYDqOYlT$gh#5|oHjyR{`!Ltby{y6nM^A3~0-nkY|I|FaN zdH66)7y9z+@~Rve&f3@K88DwQ;TrRry@s(DJ1~FNrNx;~e(Ssve_ECX=MR@FPqwdY zVvsYgN`f)shYr}7|Kr~{K5?MwO zs#7$IaS3=@s1j#xYSH#yH8-o`bndEyN$104TU(N;(AN2I zk`D%XhtPHs#;1-kf8$P>P7<_%@ws7TZgi|I2@|exIoVd8A!5UnopHdqs}Y-aWOe!Z z;CPUqc3%P55(e+(ZVzEAc+Slc%`Tur6{=` zpqYLqf)!~3!f=$Q`PH6@X#!YGH&DO(*k|&*+AJd;x@0}mf7%ioh_$q4bh%T>%d#Hg z?O01otjdz7Ea|IRRDrTMkHkk-+t9vhT_P$szz3gO=kqfv^cfZOE&Xv+(X4bTwxTqD zF$=!sPl<*^|LCArACC+URk4nEc%Qk5@wIZp|- z6Scz0Sf*)kZXze+qNg!Q%~nl-*fo+UNubRnfm(!SdyIO3)r+0OND*Unle0xz^pKuy zOB^y-?rAm_-?}9G-Sva_-z*hIa?XhpCn_UAuDyF{Cf#@K%H=!P3af7FyL-srf7_}v zXQm0ze^>c1E?s87g}w@5Oqx&nFyA7kOB9$fgk~vOEh?~dazU)pP)kED%nnMpFylg3 zz;#IrI##d7Wyx2X6~0R8ol#}2&^g@2!+lCLsyKW3uHjB$tr9&OZHeL!GSz;8(ZxG^ zDGS^;%i@YU7nKNQ8KYW+{gk?2pbRiX$KOz*f2njjg&b}t%T!qucqJ0aMj{VoQrM+3 zGu=S-U7J2H(GqoQkxT50Y!v;G!^npC_PWIb7F<|Xiu8oCh+d&}e2F3u%dPyqJrnDu<) ze*-s$lQ=X)KK#H<;pDHihKFkcijw91KjitWq6BIm3Nrou{mnHE4JRclSkH>ROiN=j z**Nv*UXiU2TGFO)lF?GJ{iqx?4SQxjB%HsgSspco!%Cor&#Is&od5J!@i%>zeVUz* z=*y7B7{|>os+Wt^f*BBzh(?4c-nx=Of2&N)Q@C6}^BC+;E;Ogxt(B{uUA3aEy}fP4 zs`16Qw_fD6@a2E?-}~^+<;!kkw=pxs}|8#JR9gn9Dx1iRJ&%e+qLl zvupAkpN>@*%*LK-K(~WPOJbsCpF9|hM1qzxVbOnphiKzYkm#>oAV!qL*z2FR;4ENbn=)e}gmGyeyR&fNjnr%&xIU>EPJu zv4c|M7=AuKnfg1EVv5cqAb!zmb%GjFZaw&W_Ib9Cs6@vdUHPAe9+~_1e`d4Qioc{6 z^QLYvuw(0%&aSS`En9cs@RpA5?v7iw?$|ufFV})tQ{(*8%l3BT>PN`tu6;zArip)^ z^H`mqd%~yqp78t^L9H@K0001ZoMT{QU|;}ZZ>2>q;`wd9GH|mnfWYGm(T*_s|NsB> zER4*JKrROZ6G#*QO|%SElhGL)e+&Si)dqY30C=2ZVPIfj!HNI>Cr%6#0|3ox1XTb4 z00000002G!oB;R%W&zv+U;^R;5(7p9v;-^!yaieXk_I9Mng;dIf(R0C=2Z zU}Rum5M#(?;9vj&CLrblLI#HaU_Ju?75M?80C=2*kv(q0KoEtW!H@)$vzHn)0e_Co zZo)7Sggp=l`6;EerBC5QAIY(|#8T?D)*Hm*H$c@>=k#f2wKHaA9%{}1I$?!1Dr~St zjU5`a=+NT{dpzTS7aZ}5H@xG74^tK8+-dwKOV{jD=;JrVN&+gs_ zp99DqSVtuVlz_GlP_;q>$;!IAvIdeH0^dmh3{TtqhsP8Ykj$fWAYu({%-94Y5i1wz zzOp&}qI1!>a#6Y}7tPXXuXUBW4F5N0CM8V|`f;-0#6<`c0B=BsfnqY9DY7rp+ncm| zGtBTpz>bTYB5$Ao0vdoIz%0t(ARq~xb_qPgw(W3z+-jnV_W(g8Zr|6L9a7pbdRWYb zj9KemcVmCRiIMyyUHv~@X)D^(zHVE6e2z1=%3=I|vd(NZVr7w+%p^5P?3@&%k5) z%gYuMLjjb?!1}0`ACe2+zp(AMLywHuA8zT}2%0Mp0R{}^tW#ghN98FNhC__3=&KlH z2C!g8L~!$rH&Tk0D1%bm-ToTA3=H+BNIdB!12(Tf-R8 zqEdlDfkt_{w8~PefsX#;#Q?3C2hb|04Z1*G63fExHGfWaCaFn~2BpapArfJ&?_0Tf_uC14P1 zWQC=2s-Sh{dP!~7xiNp`2z!PbhV8n4)gt1?|*~baNv$e-y_(%=`jX`6v zI5dTVK_Yk4q7gAl1ZIM9j*~1GgpUB>Q;!ftJzl|>@Cpk6Fbj^dSKR<2U=>1mt*3?%tq*S0Y`a)i z3|AOw-o}_!xaWxHo${=dsyijZKc2aT^UP>&ri9|+hK2}NYanr@Z^8yIp;}cHit$jp z85z_sv2`!?#vLE&n&))w3#|@fx>8m&5Uz{$Z%vc%Cn(UT0453PYB)MPE~+@fp6!Y{ z4s$^gHnsGVp-6TEQ~clzqu<@hM6=rEm^q#TlTqj=H^Tl`9U>E850_QKJ?cXSMhwlK zp3*5X#~}piy=IJm2O;{yE-wB@gxR%6`&gi*pI7!*4Y}wuN)?`cwA9FPFNEd&>Ic+2 z%zkRAr&p&Uz~$?DCK>nIOpL)aWKT65_ORIx-E^T_W1;=K$vQ}8O;e7;y zMz&MT8xDpg4H2LcPJ;E8WbXANNElER#6y1ot@5Pqu-e3hSXNdwn|k@O!o|i$EozPO zN7sCX`XGs*)cp#HK5@%hB)k5Ym-O%|3iQeY6R_|Ng844P&X&p}ObJ1I0Eig;g|0OA zQ>vH2m8=nTjJ#th3{8I>9HceB6GT_c$dX%fv5O0jUHgD3vAsb#q1ipMHH9a~9HreY z`$~&tBKkC+NmY!+hXW|k8}ADtKOP*_9Apx!E`z5C`h-=U;ZR-6YMt7ed9dk;y$>r= zx>+2l*?vMRULDabt5)|&t>?;=!Zh9CvRe4a>>X|N^(OTS%evDIH&9-kpE7LA>O2LF zIEqDJ_ZCAlHn-?TJPaY~ky8G^aTxN@(FtKVlTBsy)QS#ojN1Khx^X|-wQtP$F_x?v zuCf^QOobzvEpM|5N!^mCsFp>7Ips`~BnP&m zjG1vZJW6*QOQJ3+eM%yrqB{{JG`BpR^XAaX-|we4KQw(h#BP18(uSF@IVTZe0UQ8b z1q3G~6zg=D(&dW80h<~g>Z;jcS!gv739W0wjxDVSq>mz2+J(a4r5=w*Vg{0!35CeX zp6k^f^|hL4KLQoQNhR4WJmOW`Boy60kdbp|Pczy=^U8l|UUUvIN-q&%c0G8kOn2|U zvlDNQ-1XJ?DpLAfq~+#F^Y-&QIg8W|qB{yiwRP6^X2ohpI%JzWx9to*Fi`&jQo5Dh z+0awOW7d40q=^krsc9RSHl-S(uD?4LRfX^}ouUXH+Ph=>j>>yF8dwPB+nF6rBMOcf zZRn*bg)1X_rak~;67!IGE&I+GL=&2y4Kg|&yrAxIN?v#3Nt0fD&E5(>q$~g5#)HL1 z14uIvhGvU^<+yGJGHY)2Jc%Y@^RBI`tV(7|$y!-nd7Q1zZz8JNiB)oxYByK9AP`S4VuG4r13SQkKQrkFkN2=ohcG6WS#_Pl`~3(w5qs6K zUTm`Zp0(&iCGF4(Bn+xmUJP2Zjq(P9HE+LaLR}c5e!gCBqqWAyyk=9q%Jt=4t)-Gv zGqIG!Ij6TzQBJ6+HJ`;X-WVT^NC-lHofFn$!{fNWHgH%%(Scee`LBf626EV&_UkXR zhc+G$Hngi0eYIw+T&3zOGnGigeKGMDUtp^(&-htN=%1p?D|fQnI`K zm%4tb>fxG624}4vVARx#M~g6f##f#o?kriw+u#w3xGdI)@F^%n<8 z5LNb!KcP>hLsAznbkl?y0^#Z%bqFzRPr0at2zQua7eFtJGT0Z~^#`8PvI8W>w|v!~ z6g6Cc=q&V2)a7&)LJ&6WY?*;CP2YHu1qac=uZ^G0>-Mx){kErVsRShj%;Bux0vllH$5Kg8PiZX~JHE+fEQU9`1tW3BArN(8}~d}!aE zTz-E?OD=pw3PsOga*vM*BG&6w+2ToA;V2d%k7A&8bTHv#kq1mNVC359!V}HjC`B8m~eT&UeRKS;?8TzA@hXp*QI!JQIc}POyFO5#a z5*Ud6?oiGDcTkce`e-KZd3SvDwxNQsu=Z^u7QsMZTM&fiDK~3wECNmF+tv8$+vr({ zWp7%6axJgS%)fq+=u+ZBWA!!8`|)eUX3?&=OVM_m z8}1-uqnBZY#R%a-O*aTZC=fD^r!Ea%ntD9J(Z+-`**D)>F0?EWaq*P?8(mXuURGGs>+<%%!rfL|)g^c8D^7}yFWzFSM2^Uv1tmyxL_b*D| z01>oUXtZC!MHOw{eOiZ{d z4JJdcIqz~eLzySQD)2{AQe`)%rVL{!<5VzK0}7>?lC+j+W{)85$$0Flt+Vd>JAS#w59C_x`n^i*s;hAeA9^kPx-4tmu-92&e+XSc z2ed{SpjYw~Pq?Mrr@q`zT;D!isk}QInla`r+mpV(G(2r*K~a8@ETmMaNuFV`{mRet zl;qvb1|aD7bX<{iqHOs`3upNsZ}HfowQt}HI=$bmNEp9gf17_hW9IUYEtd#7`5W5* zybPCUcB&2l*8@c?^m4tIqo5RrvhAz zn&lp^C(g!S#m^?{;~Q%)lih5|%cu?DN7+R=d;0_)ngH`A)rCBJ?Rif}vnHV8&A?S; z_2wF}s5-JJ#?5+)dsso;^ywnC8S`g!Cv^WCom8fH`3k+S9M0#DuU=s}AHf8x_B{di zvv5o--9F&3jTyhj=A6wMyt#QXbq(zu{U;?-w`~i`gg2vnP-eUdwa~k{{TU7jV3zyH z{tp0r2!nd0oaK-a)iVO^8fS;`IlZ)J8Ucgi9gN{$E-|2q?DicGZ!&6 z0RH~%IwMqi7ZnMNuce&0=U)#WiR4ZBq?@YaOL-yb(dC*@^8CFhM3hZcP6Qn2l~KkI zlJa%aw4V%7+#Mxhj4+40Xa@?v;;ZIsg$Bn0$EO&JWjrAtKj%ZRt8_TGX~z2X1)Z@W z9>f*U<55=73Bk!7J5CDPRTJ9awkMU%3;+FF*bFAXvbF2}$wr5U#Plnd^wOm_xdp+u zMPj|BOfyqcRJ`WLG&qZ*8?m;meQxp)OJCej^n5lWC|SbVA0=xnE2e}Sz*v?(S*#P) z)e3czG|1?DH3gCU3)}y63I;ZK&F6)YyX(ia@~1d>e{Z4 zL80c{Tr*_i+O-`u-w4H~B2s)~r{>-9NcNkauoh8|xLsX+dr#|$4r1<{y^(~iwd#wc z&ieYFKR0fiJ$t_z>^eWHd`?)u?~%i8ZI2RgEeN2~ z?i1M6-QB`9u>7W7y7ca3z_cEXoV7UdLu#3@D=86}UA1CLgsB&gWgKXT70J}$o< zv((bP_VgT1gh4_`;3p6c5rX(Z1c`wYG3WH!Zp%Eo!jPM;O&ulW0qeu0gdjl>;jmNm zhBC;BOnj9)>9xp|F=+(0cLEAX!)&tSark@!?K;A`yqbmoo5V$wAqtK#lTb@2Akaa@ zQAT8Av@25j;=DO!KlyJ#h(eJ!>@d7_>yX2cQ>3a2gLy9>l>_zWzRBvz+{-GFm-e?PyBmXlhvy$*^CwjRWOr99ZgGi<4(8F|qv;QVA7nHIHT~NY7UZ6!bW3pMmxS$& zhUIRF3b#0(y^SRF=&M{1tjP@ym{+CMMkP4lN!EbeovomIBVI5nI0vqgs(4Kg7RK_y z1yI+M<*#qmpY@-#Tw^4%oL#r|ilC?0WJMrQ1b9y#b?+>65B3QW2KxpF*LS+VyJC@~ z&b_lfIM_$)CNO4ox?L5)`=txrLVQA8LwrMmW~lWQK5&DJQPTfjN0`KO4F}22k@0Yn z{l7=#NJ11~RCdC{f+d1sxYG>uOBp?KI@l@Wn2nF^5P8V9V%%oXxtv~~J~kKc#_sd! zEGg6}#1ZGYrMMdetU5d5wZ5@_w1c?Al zok=}O)%6~WxpKbxP}%3M)#tCo9P8Cd3z>3BufZXR>U()K$_v6#0yjbHKFIe|Tp!*mK*j)WfFaw3@O^Qge4!)* z6ENRqe>&g%e_aNXkH+osRu|T5nJ2Q}kj>o( zh6Ark{G&&{(h48FbG%b*oFd-&tPjO16tR<@1!@+{Y(o_g;&k(&Ka0+y8_izX=Ss^CV7*@7kKmaCQ=)Ts0x$Dq#(6S5!^;v zVoS0Xob7b9yfICpl{^>gB+n!|@$*wHt+D*56B=@FH7WFLYrkm`PTvHqoTLC!$>#6x zwtOd&TEDe^vAAO_#GCb>-+>5^QpL>0r=^+54P;Z=^+J!PoxIc%7%6~H3Y=5uo9peJ z>r?o}QKM(L(02}kos;L~mFJ5VS|ZO2rR0pICsFBkQLFH~>!@`(6a;FURB{s}fM8HNBzsh73<1rHW8_0Y z;R%sV*~i+*K$%jsA;J&`EeSx3zhfl>3JX;rIFw}2v+e&cU?W4Bc(RR>X{*yuf z4P`&qNyu)V20!c%R`_S4Td7Qf{*=m-sU$R-MoNbsE*&r2O<3i)#MEXMUk5Ddu;dzT~A=xs)) z`-y2?g7Bt(A(}Da`tI?1(FX|fhz#9KPiCVVg%|hk9vwU!UQc` zoOfTM%tLmnIQT=H`<*#PP@kg#E zR7U#duUg*lJQf7GiCp(yn|*esHFq7hVSlEkTw7k?HH`*JH`A!VBt*N`c)fA4w;HgdaFaYIEhbMW+ww<8`|4ko=fSIUj=N q*k(%DK`ztJ$k;%xi4%McqPr#48Cm+fi`9G;Hb=5Fd9CIRj*bA_O7BSk literal 3596 zcmV+n4)gJMPew8T0RR9101gZQ3jhEB02xpK01dqW0RR9100000000000000000000 z0000SR0d!Gh5!nnP@4<^HUcCASPL!y1Rw>3X9t2J8=4iPqMWkEBKs?B%5X2*u5{4a z88>?3pjQwJ0R7S!7A_6}0IUE2uz&&!D6jwu05=%^P)hhGm+k$OEy>+cda(=&-4v=D z-G##lFb0dV%p`P&-udUZ`S*bd29MH)HV6a#G9|4($`fqLj*ZP>E}~dPVs@0{#?@AuN_+IIpd|5mRqM^{CjYk+ zxZalY#CN;}AO$R2h3bU@O0JZYSyxpO2wRKRSGSd02S-0OEse~=Yc@|@eT7otiTgny z*d~FRe|-Nk%Mod7Zdlo#D9BYcDl#(T$pS8-RH{h9y=V&^@i2##8J_fyuTOKu4h$c* zH*_+5@aFF+{;S&>|2JSCZRWQHMmN9%3)XA4NZ-*-e;_8V>v{-U+j#?+MWtUg^D>!| zWyx2jQJaP7Xm-tkc~M@MZ@sX3x!e3B;iL0!?#%Z5WS0NV&wW;aZq5SBXz7FJC@S@^ z(a+8RYaMmp{^y|%v|I}ywG$XPMyep_1`>gw3yDY&kVpg;iA-RSk_lQ!SsLk$d}!or zltIvj(Fj3=(FQ>YV<7}RFph$tKz1eQ2XY`mg}jJh0LbeIJn~I~4)QI68lP4K!dG|w zm8cJ@7g+~5Wc-pb#l5ZhqU&ad7EMkin@V1tVHF&DL8IDdy17o~mM}qzT;aNTL`TQ2 zc3)*>2WwJ6c3Q2!vYU6f!Bq=f2Upt<4Z5SPMh^IhuA^=wYp*tZ(#JVkbKALgG^{gh zS(rv;+ipi(GOZnKhz4IrA=O}D3V^66a$UG-tZUQ{-E{4pB+d(q*ChX+wR3cHpYB5DibKHP~_zns99k4_?ay3!iy_~+DZ$l_x1EoEY{I=y=h{t8mgN;8&NNN>uZ4>Ys-*6&RRS--BVIW5eZ|OYrU5 zy;lwobKxfHcIVCC#=WcL7eE5t>cc<~`RaE8g2&BXED|7F`$ZIoCuolI^N;|4()C<3 zCuack)sHQV07p{u^{=}gTY86~2p^*R2wq1b7Jm@~i2@(j3Mh~w!Fxr7%@ZS8LNR}si3?$6BfAHV4f;x*Ou3ii9@FLW3q<-VW= zA_!urmuHX}BVc1PRXr4UWLqeu%TiRiakV5@R*ASma#YGMeb|~{>$_GLUj>G&z?2KP z+AqP*MmmZg{jT}H@yQGOx_0_c^NinmxPe_+)jCaU&{#^vQjtK{{G}DZOi9(MZk<9> zD@P(~l2sHtGb`%Wt317Wv5@vuhvv&GfsHvdRgFgjU_DceGt#nHM%&XmQsG-+wE?l$ z&CN|swrmgaqBeiwvE%)8>d1AugFR&OFq5y-HMT-leT|{svVj)j6t5xPoNreX4D#tG zeV495*5v7EMiDng>T1_#)y|`YBmL1-bB2!;y)XrqUom`lt)s;vx`wXHr`mFzr!&qe z6DM)3)pa)gqVG&03~KN92Ut%J6Qk6_$pz?(pH+ zROu#N=c~ZU`g~$*XNr8%MQ0-m#j}(4hZN?ry3*zO&-HXR^?qwd!oXZu7z|lDlQh&7 z{!qV%Ai}@!`Zc@2mX7de18L)Ei-(`1)IW$0+ zI86`+2o{%;LaeKr0JeZ-g;?HcDkp1a!;OdL9ljm?`&h;Fg#fj&(M7kUovdB_uTZUG zAV4Ro#Bl?ZR> z6rLxTdwPESXx5A$zefnRo_(wCG8vSs!iH-P1ff&4+Qq0vsEe1-iwqLlTOCk}Bn^ry zETy!BYds{57cRU$E?X*MDFsc^v_M2+7Vg3DYe2)8m zI-8TiP2zGAxhV&^$(*DlP73c}G7oUyFkbROUJ55MiJQU&p(Z>#syslNxbp!SPEPPj(6Zr7Y9_oNj-p1vY^_(qgp(`!FScV#V|s&U9IdF9uZr`5qi9j^0rdf| z2S)LT_~-hJ7tCq-8E@#$JitfG+~Hze8m<`D7ajC)mU!TP5UY%IhvWpez@2aj>vs{n_i4*Lkrf$b_f~3!g?`=u2_`aq^H5nn_TzS`dnwc8p$3N3-mt(#b}T z$|ffTms#n*)6zjes8J)0RRup6gsn8t9&uAniuBb++_N$#IJKtTnhWf zjKHNyf z!A(gN28>LTi_!j*3llJS1rc>`L(HWi(V`eev^YAt#~k9l%{x&n4Keq;jUt60ptRL2 zC{Cb!>{3CDC?+^Y923>dYc9Plz>}yLeuuyrl;C)v3P?<+E1mxyBPPO0z^&Fyze}4- zyGu8>1CGtxgD0bT`G;*qm^;Kfm>fjV?CR|`C>rwg#^pSRM1x+i8zYDwOb=q%kDRin zIaXRc%B2-4Ld+e^8S$=RrMn?gub(;eOj3YP_}WnL24mjCKL`X_P;)BW+>D!K)y%A&qP`8kelaay0DVNW-HRt@* z+sU*E^HX1b8n7|d<6a$J$vXSsftj(45%}OrV!UBO+yX;foM8dEp<&)tykTO$ zZsb2pjopk|lgl;`1^WpL zVLxUgtS2&=JH7f4kJ>{_)A)L@6<>1^ysrUREo=axV%@jb>%ZX%rmv>YHrQbg=;f+U zuR%g+r4BL?mzz6{IF&dpSB~^hu3VZ^L5B^X5+Wy*iuDl@dQs_ThYh!*R6GIlPA>`% zFA~FmA&SDuM)%Kd*)9*8v>3Oeo7^1^IUV@@{Q*sr`%&wYPxGMEoa7nnxtVr;mm*=v zy3Cfy|Fz|@1qB7NF97zex1c9S7nh9$LU|VEwP^ANB2lF`kL*9Eg9} zr4OQM+{j^WjpjU85}&%r`9%Ez8I8T!bsvR^{wwUKS?~Km?z&QrQXl0T%z&-@-RL-L zfo6L!r&SLOwm$QAKv4e9d$&VMDYFJu8OIC=yZzvwMq97Dbus(c>i-?HPgWSEF| zv*?hwTH!^zSRik|B3Ki1>8MYw9<^#h95#*{b4LJCwWeojUFR<2uvWTnb$)&yAahhB!u0=GA`Tc0 z+zP@M$nZbuEo+-KS-ZhWW~Y&?locqHYFM!o3+iQpS+&8j61Zj4Mrn#zuotk~S20)L zly5c{fzD~S-L0p=f|8Y!SD{jsY6ZQQKRdP6QJ1;}3on9*B8e;)tZL;jYb+WhxCCnb zT;>v}kkeTMNL-$H#jG@9K@D0sEjfzx*qRe-4VHdWg!I>QZvD6^)Jz_JUd>uKBnKw( zqXwuJZ8p;7qzBfutNG~TT~(46bv@8hEZnH9^|(ML "#666666", + default: () => "#808080", }, inherit: { type: Boolean, diff --git a/EdgeCraftRAG/ui/vue/src/i18n/en.ts b/EdgeCraftRAG/ui/vue/src/i18n/en.ts index 39be2d81aa..8eac170d8a 100644 --- a/EdgeCraftRAG/ui/vue/src/i18n/en.ts +++ b/EdgeCraftRAG/ui/vue/src/i18n/en.ts @@ -6,21 +6,227 @@ export default { common: { update: "Update", edit: "Edit", + add: "Add", delete: "Delete", active: "Activate", deactivate: "Deactivate", cancel: "Cancel", confirm: "Confirm", save: "Save", - }, - setting: { - systemStatus: "System Status", - pipelines: "Pipelines", - create: "Create Pipeline", - config: "Configuration Log", + back: "Back", + next: "Next", + submit: "Submit", + prompt: "Prompt", + import: "Import", + chatbot: "Chatbot", jump: "Go to chatbot", + success: "Success", + error: "Error", + clear: "Clear", + uploadTip: "Click or drag file to this area to upload", + }, + system: { + title: "System Status", + cpu: "CPU Usage", + gpu: "GPU Usage", + disk: "Disk Usage", + memory: "Memory Usage", used: "Used", notUsed: "Not Used", + info: "System Information", + kernel: "Kernel", + processor: "KerProcessornel", + os: "OS", + time: "Current time", + }, + quickStart: { + title: "Quick Start", + first: "Step 1", + second: "Step 2", + step1: "Create Pipeline", + step1Tip: + "Build your RAG pipeline with customized settings to maximize the potential of AI information processing capability.", + step2: "Use the Chatbot", + step2Tip: + "Start engaging with the intelligent chatbot, which supports file uploads and information retrieval to assist you in completing tasks more efficiently.", + create: "Go Create", + }, + pipeline: { + pipelines: "Pipelines", + create: "Create Pipeline", + edit: "Edit Pipeline", + detail: "Pipeline Details", import: "Import Pipeline", + activated: "Activated", + inactive: "Inactive", + isActive: "Activated", + pipelineFormatTip: "Supports JSON format, with file size not exceeding 10M.", + importSuccTip: "Files upload successful!", + importErrTip: "Files upload failed!", + name: "Name", + id: "ID", + status: "Status", + operation: "Operation", + deactivateTip: "Are you sure deactivate this pipeline?", + activeTip: "Are you sure activate this pipeline?", + deleteTip: "Are you sure delete this pipeline?", + notActivatedTip: "There is no available pipeline. Please create or activate it first.", + validErr: "Form validation failed !", + config: { + basic: "Basic", + nodeParser: "Node Parser", + nodeParserType: "Node parser type", + chunkSize: "Chunk size", + chunkOverlap: "Chunk overlap", + windowSize: "Window Size", + indexer: "Indexer", + indexerType: "Indexer Type", + embedding: "Embedding Model", + embeddingDevice: "Embedding run device", + retriever: "Retriever", + retrieverType: "Retriever Type", + topk: "Search top k", + postProcessor: "PostProcessor", + postProcessorType: "PostProcessor Type", + rerank: "Rerank Model", + rerankDevice: "Rerank run device", + generator: "Generator", + generatorType: "Generator Type", + llm: "LLM Inference Type", + language: "Large Language Model", + llmDevice: "LLM run device", + weights: "Weights", + local: "Local", + vllm: "Vllm", + }, + valid: { + nameValid1: "Please input name", + nameValid2: "Name should be between 2 and 30 characters", + nodeParserType: "Please select Node Parser Type", + chunkSizeValid1: "Please select Chunk Size", + chunkSizeValid2: "The value of Chunk Size cannot be less than Chunk Overlap", + chunkOverlapValid1: "Please select Chunk Overlap", + chunkOverlapValid2: "The value of Chunk Overlap cannot be greater than Chunk Size", + windowSize: "Please select Chunk Window Size", + indexerType: "Please select Indexer Type", + embedding: "Please select Embedding Model", + embeddingDevice: "Please select Embedding run device", + retrieverType: "Please select Retriever Type", + topk: "Please select Top k", + postProcessorType: "Please select PostProcessor Type", + rerank: "Please select Rerank Model", + rerankDevice: "Please select Rerank run device", + generatorType: "please select Generator Type", + language: "please select Large Language Model", + llmDevice: "please select LLM run device", + weights: "please select Weights", + }, + desc: { + name: "The name identifier of the pipeline", + nodeParserType: "Node parsing type when you use RAG", + chunkSize: "Size of each chunk for processing", + chunkOverlap: "Overlap size between chunks", + windowSize: "The number of sentences on each side of a sentence to capture", + indexerType: "The type of index structure responsible for building based on the parsed nodes", + embedding: "Embed the text data to represent it and build a vector index", + embeddingDevice: "The device used by the Embedding Model", + retrieverType: + "The retrieval type used when retrieving relevant nodes from the index according to the user's query", + topk: "The number of top k results to return", + postProcessorType: "Select postprocessors for post-processing of the context", + rerank: "Rerank Model", + rerankDevice: "Rerank run device", + generatorType: "Local inference generator or vllm generator", + language: "The large model used for generating dialogues", + llmDevice: "The device used by the LLM", + weights: "Model weight", + reranker: "The model for reranking.", + metadataReplace: "Used to replace the node content with a field from the node metadata.", + vectorsimilarity: "retrieval according to vector similarity", + autoMerge: "This retriever will try to merge context into parent context.", + bm25: "A BM25 retriever that uses the BM25 algorithm to retrieve nodes.", + faissVector: "Embeddings are stored within a Faiss index.", + vector: "Vector Store Index.", + simple: "Parse text with a preference for complete sentences.", + hierarchical: "Splits a document into a recursive hierarchy Nodes using a NodeParser.", + sentencewindow: + "Sentence window node parser. Splits a document into Nodes, with each node being a sentence. Each node contains a window from the surrounding sentences in the metadata.", + unstructured: "UnstructedNodeParser is a component that processes unstructured data.", + }, + }, + generation: { + title: "Generation Configuration", + retriever: "Retriever Configuration", + config: { + top_n: "Rerank top n", + temperature: "Temperature", + top_p: "Top-p (nucleus sampling)", + top_k: "Top-k", + penalty: "Repetition Penalty", + maxToken: "Max Token Number", + }, + desc: { + top_n: "Number of rerank results", + temperature: "Higher values produce more diverse outputs", + top_p: + "Sample from the smallest possible set of tokens whose cumulative probability exceeds top_p. Set to 1 to disable and sample from all tokens.", + top_k: `Sample from a shortlist of top-k tokens — 0 to + disable and sample from all tokens.`, + penalty: "Penalize repetition — 1.0 to disable.", + maxToken: "Set Max Output Token.", + }, + }, + chat: { + title: "Chat", + tip1: "Hi, I'm EC RAG ", + tip2: "How can I help you today?", + tip3: "Choosing the right knowledge base can help AI answer questions more accurately", + tip4: "Please enter your question...", + new: "New Chat", + rag: "EC RAG", + setting: "Pipeline Setting", + clear: "Clear Message", + }, + knowledge: { + title: "Knowledge Base", + total: "Total files: ", + upload: "Upload File", + create: "Create Knowledge Base", + edit: "Edit Knowledge Base", + deleteTip: "Are you sure delete this knowledge base?", + activeTip: "Are you sure activate this knowledge base?", + uploadTip: "Supports PDF, Word, TXT,Doc,Html,PPT formats, with a single file size not exceeding 200M", + notFileTip: "The knowledge base is empty. Go upload your files.", + name: "Name", + des: "Description", + activated: "Activated", + nameValid1: "Please input knowledge base name", + nameValid2: "Name should be between 2 and 30 characters", + desValid: "Please input knowledge base description", + activeValid: "Please select whether to activate", + uploadValid: "Single file size not exceeding 50M.", + deleteFileTip: "Are you sure delete this file?", + }, + request: { + pipeline: { + createSucc: "Pipeline created successfully !", + updateSucc: "Pipeline update successfully !", + deleteSucc: "Pipeline deleted successfully !", + switchSucc: "Pipeline state switch successful !", + }, + chatbot: { + updateSucc: "Configuration update successful !", + }, + knowledge: { + uploadSucc: "Document uploaded and parsed successfully !", + deleteFileSucc: "File deleted successfully !", + createSucc: "Knowledge Base created successfully !", + updateSucc: "Knowledge Base update successfully !", + deleteSucc: "Knowledge Base deleted successfully !", + }, + }, + error: { + notFoundTip: "Uh oh! It seems like you're lost", + back: "Go Home", }, }; diff --git a/EdgeCraftRAG/ui/vue/src/i18n/index.ts b/EdgeCraftRAG/ui/vue/src/i18n/index.ts index 6d2fab0c57..ac93f5bf47 100644 --- a/EdgeCraftRAG/ui/vue/src/i18n/index.ts +++ b/EdgeCraftRAG/ui/vue/src/i18n/index.ts @@ -9,13 +9,13 @@ import en from "./en"; import zh from "./zh"; const messages = { - "en-US": { ...en, ...enLocale }, - "zh-CN": { ...zh, ...zhLocale }, + en_US: { ...en, ...enLocale }, + zh_CN: { ...zh, ...zhLocale }, }; const i18n = createI18n({ - locale: Local.get("lang") || "en-US", - fallbackLocale: "en-US", + locale: Local.get("themeInfo")?.lang || "en_US", + fallbackLocale: "en_US", messages, }); diff --git a/EdgeCraftRAG/ui/vue/src/i18n/zh.ts b/EdgeCraftRAG/ui/vue/src/i18n/zh.ts index 39be2d81aa..a96515b7a3 100644 --- a/EdgeCraftRAG/ui/vue/src/i18n/zh.ts +++ b/EdgeCraftRAG/ui/vue/src/i18n/zh.ts @@ -4,23 +4,225 @@ export default { headerTitle: "Edge Craft RAG based Q&A Chatbot", common: { - update: "Update", - edit: "Edit", - delete: "Delete", - active: "Activate", - deactivate: "Deactivate", - cancel: "Cancel", - confirm: "Confirm", - save: "Save", - }, - setting: { - systemStatus: "System Status", - pipelines: "Pipelines", - create: "Create Pipeline", - config: "Configuration Log", - jump: "Go to chatbot", - used: "Used", - notUsed: "Not Used", - import: "Import Pipeline", + update: "更新", + edit: "编辑", + add: "新增", + delete: "删除", + active: "启用", + deactivate: "停用", + cancel: "取消", + confirm: "确认", + save: "保存", + back: "返回", + next: "下一步", + submit: "提交", + prompt: "提示", + import: "导入", + chatbot: "对话", + jump: "前往对话", + success: "成功", + error: "错误", + clear: "清除", + uploadTip: "点击或将文件拖到此区域进行上传", + }, + system: { + title: "系统状态", + cpu: "CPU使用率", + gpu: "GPU使用率", + disk: "磁盘使用率", + memory: "内存使用率", + used: "已使用", + notUsed: "未使用", + info: "系统信息", + kernel: "内核", + processor: "处理器", + os: "操作系统", + time: "当前时间", + }, + quickStart: { + title: "快速开始", + first: "步骤1", + second: "步骤2", + step1: "创建 Pipeline", + step1Tip: "定制您的 RAG 流程,释放 AI 信息处理的最大能力。", + step2: "前往对话", + step2Tip: "开始与智能聊天机器人互动,它支持文件上传和信息检索,帮助您更高效地完成任务。", + create: "去创建", + }, + pipeline: { + pipelines: "Pipeline", + create: "创建 Pipeline", + edit: "编辑 Pipeline", + detail: "Pipeline 详情", + import: "导入Pipeline", + isActive: "状态", + activated: "已启用", + inactive: "已停用", + pipelineFormatTip: "仅支持JSON格式,文件大小不超过10M", + importSuccTip: "Pipeline 配置导入成功!", + importErrTip: "Pipeline 配置导入失败!", + name: "名称", + id: "ID", + status: "状态", + operation: "操作", + deactivateTip: "您确定要停用该Pipeline吗?", + activeTip: "您确定要启用该Pipeline吗?", + deleteTip: "您确定要删除此Pipeline吗 ?此操作不可恢复。", + notActivatedTip: "当前无可用Pipeline,请先配置或激活。", + validErr: "表单验证失败!", + + config: { + basic: "基础", + nodeParser: "节点解析器", + nodeParserType: "节点解析器类型", + chunkSize: "分块大小", + chunkOverlap: "分块重叠部分大小", + windowSize: "句子上下文窗口大小", + indexer: "索引器", + indexerType: "索引器类型", + embedding: "嵌入模型", + embeddingDevice: "模型运行设备", + retriever: "检索器", + retrieverType: "检索器类型", + topk: "检索 top k", + postProcessor: "节点后处理器", + postProcessorType: "节点后处理器类型", + rerank: "重排模型", + rerankDevice: "模型运行设备", + generator: "生成器", + generatorType: "生成器类型", + llm: "推理类型", + language: "语言大模型", + llmDevice: "运行设备", + weights: "权重", + local: "本地", + vllm: "Vllm", + }, + valid: { + nameValid1: "请输入名称", + nameValid2: "请输入 2 到 30 个字符的名称", + nodeParserType: "请选择节点解析器类型", + chunkSizeValid1: "请选择分块大小", + chunkSizeValid2: "分块大小的值不能小于分块重叠值", + chunkOverlapValid1: "请选择分块重叠值", + chunkOverlapValid2: "分块重叠值不能大于分块大小的值", + windowSize: "请选择句子上下文窗口大小", + indexerType: "请选择索引器类型", + embedding: "请选择嵌入模型", + embeddingDevice: "请选择嵌入模型运行设备", + retrieverType: "请选择检索器类型", + topk: "请选择Top k", + postProcessorType: "请选择后处理器类型", + rerank: "请选择重排模型", + rerankDevice: "请选择重排模型运行设备", + generatorType: "请选择生成器类型", + language: "请选择大语言模型", + llmDevice: "请选择大语言模型运行设备", + weights: "请选择模型权重", + }, + desc: { + name: "Pipeline的名称标识,用于区分不同工作流", + nodeParserType: "RAG 处理时的文本拆分策略,支持简单句子、层次结构等解析方式", + chunkSize: "文本处理时的单块数据大小", + chunkOverlap: "相邻数据块的重叠部分大小,确保跨块语义连续性", + windowSize: "每个节点捕获的上下文句子窗口大小,用于增强语义完整性", + indexerType: "基于解析节点构建的索引结构类型", + embedding: "将文本转换为向量表示的过程", + embeddingDevice: "执行嵌入模型推理的硬件设备(CPU/GPU)", + retrieverType: "根据用户查询从索引中检索节点的算法类型", + topk: "检索时返回的最相关结果数量", + postProcessorType: "对检索结果进行后处理的组件类型", + rerank: "对检索结果进行二次排序的模型,提升答案相关性", + rerankDevice: "执行重排模型推理的硬件设备(CPU/GPU)", + generatorType: "回答生成方式的类型(本地部署模型或 vllm 高效推理框架)", + language: "用于生成自然语言回答的大模型(如 LLaMA、ChatGLM)", + llmDevice: "大语言模型推理的硬件设备(需匹配模型规模要求)", + weights: "大模型的权重", + reranker: "重排序的模型", + metadataReplace: "用来将节点元数据中的字段替换节点内容", + vectorsimilarity: "根据向量相似性进行检索", + autoMerge: "该检索器会尝试将上下文合并到父级上下文中", + bm25: "使用BM25算法检索节点的BM25检索器", + faissVector: "嵌入存储在Faiss索引中。", + vector: "矢量存储索引", + simple: "解析文本,优先选择完整的句子。", + hierarchical: "使用借点解析将文档分割成递归层次节点", + sentencewindow: "将文档分割成节点,每个节点代表一个句子。每个节点包含一个来自元数据中周围句子的窗口", + unstructured: "一个处理非结构化数据的组件", + }, + }, + generation: { + title: "生成器配置", + retriever: "检索器配置", + config: { + top_n: "Rerank top n", + temperature: "Temperature", + top_p: "Top-p (nucleus sampling)", + top_k: "Top-k", + penalty: "Repetition Penalty", + maxToken: "Max Token Number", + }, + desc: { + top_n: "重排后结果的数量", + temperature: "数值越高,输出越多样化", + top_p: "从累积概率超过 top_p 的最小标记集中采样,设为1则禁用并从所有标记取样。", + top_k: "从概率前k的 Token 中采样", + penalty: "抑制重复的系数,设为1.0表示禁用", + maxToken: "生成回答的最大Token数量", + }, + }, + + chat: { + title: "对话", + tip1: "您好,我是 EC RAG", + tip2: "请问需要什么帮助?", + tip3: "合理选择知识库有助于提升人工智能在问答任务中的准确性。", + tip4: "有什么问题?请在这里输入...", + new: "开启新对话", + rag: "EC RAG", + setting: "Pipeline 配置", + clear: "清除消息", + }, + knowledge: { + title: "知识库", + total: "文档总数: ", + upload: "上 传", + create: "新建知识库", + edit: "编辑知识库", + deleteTip: "您确定要删除此知识库吗?此操作不可恢复。", + activeTip: "您确定要激活此知识库吗?", + uploadTip: "支持 PDF、Word、TXT、Doc、HTML、PPT 格式,单个文件大小不超过 200M。", + notFileTip: "您还没有上传任何文件,点击“上传”按钮开始添加内容吧~", + name: "名称", + des: "描述", + activated: "激活状态", + nameValid1: "请输入知识库名称", + nameValid2: "请输入 2 到 30 个字符的名称", + desValid: "请输入知识库描述", + activeValid: "请选择是否启用该功能。", + uploadValid: "单个文件大小不得超过 50MB", + deleteFileTip: "您确定要删除此文档吗?此操作不可恢复。", + }, + request: { + pipeline: { + createSucc: "Pipeline 创建成功!", + updateSucc: "Pipeline 更新成功!", + deleteSucc: "Pipeline 删除成功!", + switchSucc: "Pipeline 状态切换成功!", + }, + chatbot: { + updateSucc: "配置更新成功!", + }, + knowledge: { + uploadSucc: "文档上传成功,内容已解析完毕。", + deleteFileSucc: "文档删除成功!", + createSucc: "知识库创建成功!", + updateSucc: "知识库更新成功!", + deleteSucc: " 知识库删除成功!", + }, + }, + error: { + notFoundTip: "Oops 好像走错地方啦~", + back: "首页", }, }; diff --git a/EdgeCraftRAG/ui/vue/src/layout/Header.vue b/EdgeCraftRAG/ui/vue/src/layout/Header.vue index c8cd68d69a..9b42e62194 100644 --- a/EdgeCraftRAG/ui/vue/src/layout/Header.vue +++ b/EdgeCraftRAG/ui/vue/src/layout/Header.vue @@ -4,10 +4,35 @@

{{ $t("headerTitle") }}

-
-
- Sun - Moon +
+ +
+
+ +
+
+ +
+
+
+ Sun + Moon +
@@ -17,12 +42,20 @@ import DarkIcon from "@/assets/svgs/dark-icon.svg"; import headerLog from "@/assets/svgs/header-log.svg"; import LightIcon from "@/assets/svgs/light-icon.svg"; +import SvgIcon from "@/components/SvgIcon.vue"; import { themeAppStore } from "@/store/theme"; +import { useI18n } from "vue-i18n"; +const { locale } = useI18n(); const themeStore = themeAppStore(); const emit = defineEmits(["change-theme"]); const isDark = ref(false); +const currentLanguage = computed(() => locale.value); +const handleLanguageChange = ({ key }: { key: string }) => { + locale.value = key; + themeStore.toggleLanguage(key); +}; const handleThemeChange = () => { isDark.value = !isDark.value; const theme = isDark.value ? "dark" : "light"; @@ -36,9 +69,20 @@ onMounted(() => { diff --git a/EdgeCraftRAG/ui/vue/src/layout/Main.vue b/EdgeCraftRAG/ui/vue/src/layout/Main.vue index 8fde16cd9c..8c2147cc3f 100644 --- a/EdgeCraftRAG/ui/vue/src/layout/Main.vue +++ b/EdgeCraftRAG/ui/vue/src/layout/Main.vue @@ -3,13 +3,42 @@
- + - \ No newline at end of file + diff --git a/EdgeCraftRAG/ui/vue/src/main.ts b/EdgeCraftRAG/ui/vue/src/main.ts index 180b48b4df..25e2b930cf 100644 --- a/EdgeCraftRAG/ui/vue/src/main.ts +++ b/EdgeCraftRAG/ui/vue/src/main.ts @@ -16,7 +16,7 @@ import { Local } from "@/utils/storage"; // setting dayjs language const setDayjsLocale = (locale: string) => { - if (locale === "en-US") { + if (locale === "en_US") { dayjs.locale("en"); } else { dayjs.locale("zh-cn"); diff --git a/EdgeCraftRAG/ui/vue/src/store/theme.ts b/EdgeCraftRAG/ui/vue/src/store/theme.ts index 9f99b7c531..072d41d975 100644 --- a/EdgeCraftRAG/ui/vue/src/store/theme.ts +++ b/EdgeCraftRAG/ui/vue/src/store/theme.ts @@ -6,6 +6,7 @@ import { defineStore } from "pinia"; export const themeAppStore = defineStore("theme", { state: () => ({ theme: "light", + lang: "en_US", }), persist: { key: "themeInfo", @@ -15,5 +16,8 @@ export const themeAppStore = defineStore("theme", { toggleTheme(type: string) { this.theme = type; }, + toggleLanguage(lang: string) { + this.lang = lang; + }, }, }); diff --git a/EdgeCraftRAG/ui/vue/src/theme/ant.less b/EdgeCraftRAG/ui/vue/src/theme/ant.less index 89f82ed7bf..c2427cb257 100644 --- a/EdgeCraftRAG/ui/vue/src/theme/ant.less +++ b/EdgeCraftRAG/ui/vue/src/theme/ant.less @@ -154,3 +154,6 @@ margin: 12px auto; } } +.intel-tooltip { + max-width: 60vw; +} diff --git a/EdgeCraftRAG/ui/vue/src/theme/common.less b/EdgeCraftRAG/ui/vue/src/theme/common.less index 5fd7f82488..5aa87f8500 100644 --- a/EdgeCraftRAG/ui/vue/src/theme/common.less +++ b/EdgeCraftRAG/ui/vue/src/theme/common.less @@ -11,11 +11,82 @@ align-items: center; justify-content: space-between; } +.flex-column { + display: flex; + flex-direction: column; +} +.flex-end { + display: flex; + align-items: center; + justify-content: flex-end; +} .vertical-center { display: flex; align-items: center; justify-content: center; } +.vertical-between { + display: flex; + flex-direction: column; + justify-content: space-between; + align-items: center; +} +.single-ellipsis { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} +.multi-ellipsis { + display: -webkit-box; + -webkit-line-clamp: 2; + -webkit-box-orient: vertical; + overflow: hidden; + text-overflow: ellipsis; +} +.icon-button { + width: 24px; + height: 24px; + font-size: 18px; + border-radius: 4px; + border: 1px solid var(--font-tip-color); + .vertical-center; + &.primary { + i { + color: var(--color-primary) !important; + } + &:hover { + background-color: var(--color-primaryBg); + border: 1px solid var(--color-primary); + } + } + &.success { + i { + color: var(--color-success) !important; + } + &:hover { + background-color: var(--color-successBg); + border: 1px solid var(--color-success); + } + } + &.warning { + i { + color: var(--color-warning) !important; + } + &:hover { + background-color: var(--color-warningBg); + border: 1px solid var(--color-warning); + } + } + &.error { + i { + color: var(--color-error) !important; + } + &:hover { + background-color: var(--color-errorBg); + border: 1px solid var(--color-error); + } + } +} .special-button-primary { background: linear-gradient( to bottom, @@ -106,6 +177,12 @@ padding: 16px 24px 24px !important; } } +.rename-dialog { + overflow: hidden; + .intel-modal-body { + padding: 16px 24px 0 !important; + } +} .centered-model { .ant-modal-confirm-btns { text-align: center; @@ -177,3 +254,6 @@ } .loopStyle(100); +.intel-dropdown { + z-index: 9999; +} diff --git a/EdgeCraftRAG/ui/vue/src/theme/layout.less b/EdgeCraftRAG/ui/vue/src/theme/layout.less index d27f45344d..3f8d58d5c0 100644 --- a/EdgeCraftRAG/ui/vue/src/theme/layout.less +++ b/EdgeCraftRAG/ui/vue/src/theme/layout.less @@ -82,18 +82,18 @@ div[aria-hidden="true"] { // 滚动条凹槽的颜色,还可以设置边框属性 &::-webkit-scrollbar-track-piece { - background-color: var(--bg-content-color); + background-color: var(--bg-scrollbar); } // 滚动条的宽度 &::-webkit-scrollbar { - width: 9px; - height: 9px; + width: 7px; + height: 7px; } // 滚动条的设置 &::-webkit-scrollbar-thumb { - background-color: var(--bg-scrollbar); + background-color: var(--color-scrollbar); background-clip: padding-box; min-height: 28px; border-radius: 3px; diff --git a/EdgeCraftRAG/ui/vue/src/theme/markdown.less b/EdgeCraftRAG/ui/vue/src/theme/markdown.less index 918650ff33..2a497ecac3 100644 --- a/EdgeCraftRAG/ui/vue/src/theme/markdown.less +++ b/EdgeCraftRAG/ui/vue/src/theme/markdown.less @@ -12,13 +12,16 @@ } code, tt { - background-color: var(--color-primaryBg); + background-color: var(--message-bg); border-radius: 6px; font-size: 85%; margin: 0; padding: 0.2em 0.4em; white-space: break-spaces; } + pre { + margin-bottom: 0; + } h1, h2, h3, @@ -69,4 +72,79 @@ img[align="left"] { padding-right: 20px; } + table { + width: 100%; + margin-top: 16px; + border: 1px solid var(--border-main-color); + border-collapse: collapse; + color: var(--font-main-color); + } + table th { + background-color: var(--table-th-bg); + font-weight: 600; + text-align: left; + } + + table td, + table th { + border: 1px solid var(--border-main-color); + padding: 6px 13px; + margin: 0; + } + table td { + background-color: var(--table-td-bg); + } + table td > :last-child { + margin-bottom: 0; + } + + table tr { + font-size: 14px; + border-top: 1px solid var(--border-main-color); + } + + blockquote { + color: var(--blockquote-color); + border-left: 3px solid var(--color-scrollbar); + margin: 8px 0px; + padding: 0px 10px; + } + .intel-highlighter { + width: 100%; + border-radius: 4px; + overflow: hidden; + margin-top: 16px; + margin-bottom: 12px; + .header-wrap { + align-items: center; + background-color: var(--code-header-bg); + color: var(--code-header-font); + display: flex; + align-items: center; + justify-content: space-between; + font-size: 14px; + height: 32px; + padding: 0 14px; + .copy-icon { + display: block; + color: var(--font-main-color); + cursor: pointer; + &:hover { + color: var(--color-primary); + } + } + .success-icon { + display: none; + color: var(--color-success); + } + } + .content-wrap { + overflow-x: auto; + background: var(--code-content-bg); + padding: 16px; + font-size: 13px; + margin-top: 0; + color: var(--font-main-color); + } + } } diff --git a/EdgeCraftRAG/ui/vue/src/theme/variables.less b/EdgeCraftRAG/ui/vue/src/theme/variables.less index e8914623af..42c05d8feb 100644 --- a/EdgeCraftRAG/ui/vue/src/theme/variables.less +++ b/EdgeCraftRAG/ui/vue/src/theme/variables.less @@ -3,43 +3,71 @@ sans-serif; --header-font-family: "IntelOneDisplayNormal"; --color-white: #ffffff; + --color-fuzzy-white: rgba(255, 255, 255, 0.7); --color-primary: #00377c; --color-primary-hover: #0054ae; + --color-primary-tip: #3b82f6; + --color-primary-second: #1677ff; --color-primaryBg: #e0eaff; + --color-second-primaryBg: #d4e1fd; + --message-bg: var(--color-second-primaryBg); --color-error: #ce0000; --color-error-hover: #ff5d52; + --color-errorBg: #ffa3a3; --color-info: #aaaaaa; + --color-infoBg: #ffffff; --color-success: #179958; - --color-warning: #faad14; - --color-big-icon: #111111; --color-successBg: #d6ffe8; + --color-second-successBg: #f0fdf4; + --color-warning: #faad14; + --color-second-warning: #854d0e; --color-warningBg: #feefd0; - --color-errorBg: #ffa3a3; - --color-infoBg: #ffffff; + --color-second-warningBg: #fefce8; + --color-big-icon: #111111; --bg-main-color: #f5f5f5; --bg-card-color: #f9f9f9; + --bg-second-card-bg: var(--color-white); --bg-loading-color: rgba(0, 0, 0, 0.45); --bg-content-color: var(--color-white); --font-main-color: #333333; --font-text-color: #595959; --font-info-color: #808080; --font-tip-color: #999999; - --bg-scrollbar: #dddddd; + --bg-scrollbar: #f5f5f5; + --color-scrollbar: #d5d5d5; --bg-scrollbar-hover: #bbbbbb; --bg-box-shadow: rgba(0, 0, 0, 0.05); + --bg-primary-shadow: rgba(0, 55, 124, 0.3); --bg-gradient-shadow: 0px 4px 6px -4px rgba(0, 0, 0, 0.1), 0px 10px 15px -3px rgba(0, 0, 0, 0.1); --menu-bg: var(--bg-main-color); + --input-bg: var(--color-white); --color-switch-theme: #e5e7eb; - + --think-done-icon: #356bfd; + --think-done-bg: linear-gradient(180deg, #f3f5fc 30%, #ffffff 100%); + --font-think-color: #5e5e5e; + --face-icon-bg: #a6a6a6; + --bg-switch: var(--color-primaryBg); //边框 --border-main-color: #e5e7eb; + --border-fuzzy-color: rgba(255, 255, 255, 0.1); + --border-warning: var(--color-warningBg); + --border-success: var(--color-successBg); + --border-primary: var(--color-second-primaryBg); //黑色按钮 --bg-black-color: #434343; --bg-black-hover-color: #595959; --bg-black-active-color: #262626; --border-black-color: #434343; + + //md显示 + --code-header-bg: #dddddd; + --code-header-font: #2c2c36; + --code-content-bg: #f0f0f0; + --table-th-bg: #dddddd; + --table-td-bg: #f0f0f0; + --blockquote-color: var(--font-info-color); } [data-theme="dark"] { @@ -51,26 +79,46 @@ --font-text-color: #e9e9e9; --font-info-color: #aeaeae; --font-tip-color: #aeaeae; - --bg-scrollbar: #dddddd; - --bg-scrollbar-hover: #bbbbbb; + --bg-scrollbar: #1e1e1e; + --color-scrollbar: #565656; + --bg-scrollbar-hover: #666666; --color-primary: #0054ae; --color-primary-hover: #1668dc; - --color-primaryBg: #95b5fa; - --bg-box-shadow: rgba(109, 153, 233, 0.05); + --color-primaryBg: #e0eaff; + --color-primary-second: #1677ff; + --color-second-primaryBg: #d4e1fd; + --bg-box-shadow: rgba(255, 255, 255, 0.1); --bg-gradient-shadow: 0px 4px 6px -4px rgba(255, 255, 255, 0.1), 0px 5px 8px 1px rgba(255, 255, 255, 0.1); --menu-bg: #3e3e3e; --color-big-icon: #ffffff; + --bg-second-card-bg: #111111; --color-switch-theme: var(--color-primary-hover); + --think-done-bg: linear-gradient(180deg, #32313a 30%, #2d2d2d 100%); + --font-think-color: #e0ecffcc; + --bg-switch: var(--bg-card-color); + --input-bg: var(--menu-bg); + --message-bg: var(--bg-card-color); //边框 - --border-main-color: #2b2b2b; + --border-main-color: #3b3b3b; + --border-warning: #f8e9ca; + --border-success: #d7f8e8; + --border-primary: #d5daf8; //黑色按钮 --bg-black-color: #434343; --bg-black-hover-color: #595959; --bg-black-active-color: #262626; --border-black-color: #434343; + + //md显示 + --code-header-bg: #585a73; + --code-header-font: #fafafc; + --code-content-bg: #2c2c36; + --table-th-bg: #585a73; + --table-td-bg: #2c2c36; + --blockquote-color: var(--bg-scrollbar-hover); } @use "ant-design-vue/es/style/themes/default.less"; diff --git a/EdgeCraftRAG/ui/vue/src/utils/common.ts b/EdgeCraftRAG/ui/vue/src/utils/common.ts index efdf142be9..e78100a06b 100644 --- a/EdgeCraftRAG/ui/vue/src/utils/common.ts +++ b/EdgeCraftRAG/ui/vue/src/utils/common.ts @@ -3,6 +3,7 @@ import { inject } from "vue"; import { customNotification } from "./notification"; +import { Local } from "./storage"; export const useNotification = () => { const customNotificationInjected = inject("customNotification"); @@ -19,3 +20,28 @@ export const formatDecimals = (num: number, decimalPlaces: number = 2) => { const factor = Math.pow(10, decimalPlaces); return Math.round(num * factor) / factor; }; + +export const formatCapitalize = (string: string, start: number = 0, length: number = 1) => { + const end = start + length; + const part1 = string.slice(0, start); + const part2 = string.slice(start, end).toUpperCase(); + const part3 = string.slice(end); + return part1 + part2 + part3; +}; + +export const getChatSessionId = (): string => { + const STORAGE_KEY = "chat_session_id"; + + const storedSessionId = Local.get(STORAGE_KEY); + if (storedSessionId) { + return storedSessionId; + } + const newSessionId = self.crypto?.randomUUID?.() || generateFallbackId(); + + Local.set(STORAGE_KEY, newSessionId); + return newSessionId; +}; + +const generateFallbackId = (): string => { + return `${Date.now()}_${Math.random().toString(36).substring(2, 9)}`; +}; diff --git a/EdgeCraftRAG/ui/vue/src/utils/customRenderer.ts b/EdgeCraftRAG/ui/vue/src/utils/customRenderer.ts new file mode 100644 index 0000000000..5a19ade40a --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/utils/customRenderer.ts @@ -0,0 +1,128 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +import { marked } from "marked"; +import hljs from "highlight.js"; +import { formatCapitalize } from "./common"; +import ClipboardJS from "clipboard"; +import { message } from "ant-design-vue"; + +interface CodeRenderParams { + text: string; + lang?: string; +} + +class ClipboardManager { + private clipboard: ClipboardJS | null = null; + private observer: MutationObserver | null = null; + + constructor() { + this.autoInit(); + } + + private autoInit() { + if (typeof document === "undefined") return; + const init = () => { + this.init(".copy-btn"); + this.setupMutationObserver(); + }; + + if (document.readyState === "complete") { + init(); + } else { + document.addEventListener("DOMContentLoaded", init); + } + } + + private init(selector: string) { + this.destroy(); + + this.clipboard = new ClipboardJS(selector, { container: document.body }); + + this.clipboard.on("success", (e) => this.handleSuccess(e)); + this.clipboard.on("error", (e) => this.handleError(e)); + } + + private setupMutationObserver() { + this.observer = new MutationObserver((mutations) => { + const hasNewButtons = mutations.some((mutation) => + Array.from(mutation.addedNodes).some( + (node) => node instanceof HTMLElement && (node.matches(".copy-btn") || node.querySelector(".copy-btn")), + ), + ); + if (hasNewButtons) this.init(".copy-btn"); + }); + + this.observer.observe(document.body, { + childList: true, + subtree: true, + }); + } + + destroy() { + this.clipboard?.destroy(); + this.observer?.disconnect(); + this.clipboard = null; + this.observer = null; + } + + private handleSuccess(e: ClipboardJS.Event) { + e.clearSelection(); + message.success("Copy Successful !"); + const button = e.trigger as HTMLElement; + const copyIcon = button.querySelector(".copy-icon") as HTMLElement; + const successIcon = button.querySelector(".success-icon") as HTMLElement; + + copyIcon.style.display = "none"; + successIcon.style.display = "block"; + + let timeout = null; + if (timeout) clearTimeout(timeout); + + timeout = setTimeout(() => { + copyIcon.style.display = "block"; + successIcon.style.display = "none"; + }, 2000); + } + + private handleError(e: ClipboardJS.Event) { + message.error("Copy Failure !"); + } +} + +export const clipboardManager = new ClipboardManager(); + +const createCustomRenderer = () => { + const renderer = new marked.Renderer(); + + renderer.link = ({ href, title, text }) => { + return `${text}`; + }; + + renderer.code = ({ text, lang }: CodeRenderParams) => { + const language = hljs.getLanguage(lang || "") ? lang : "plaintext"; + const codeTitle = formatCapitalize(language || "Code"); + const codeHtml = hljs.highlight(text, { + language: language || "plaintext", + }).value; + const uniqueId = `code-${Date.now()}-${Math.random().toString(16).slice(2)}`; + + return ` +
+
+ ${codeTitle} + + + + +
+
${codeHtml}
+
+ `; + }; + + return renderer; +}; + +const CustomRenderer = createCustomRenderer(); +export default CustomRenderer; diff --git a/EdgeCraftRAG/ui/vue/src/utils/mitt.ts b/EdgeCraftRAG/ui/vue/src/utils/mitt.ts new file mode 100644 index 0000000000..f512797f56 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/utils/mitt.ts @@ -0,0 +1,8 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +import mitt from "mitt"; + +const eventBus = mitt(); + +export default eventBus; diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot.vue deleted file mode 100644 index 34464506bd..0000000000 --- a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot.vue +++ /dev/null @@ -1,275 +0,0 @@ - - - - diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/Chat.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/Chat.vue new file mode 100644 index 0000000000..8fe8bc6f83 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/Chat.vue @@ -0,0 +1,376 @@ + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/ConfigDrawer.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/ConfigDrawer.vue similarity index 75% rename from EdgeCraftRAG/ui/vue/src/views/chatbot/components/ConfigDrawer.vue rename to EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/ConfigDrawer.vue index 635058ed6e..732818c1c1 100644 --- a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/ConfigDrawer.vue +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/ConfigDrawer.vue @@ -1,7 +1,7 @@ @@ -118,7 +131,7 @@ import { InfoCircleFilled } from "@ant-design/icons-vue"; import { FormInstance } from "ant-design-vue"; import { reactive, ref } from "vue"; -import { ConfigType } from "../type"; +import { ConfigType } from "../../type"; const props = defineProps({ drawerData: { @@ -162,7 +175,7 @@ const rules = reactive({ const sliderMarks = reactive({ top_n: { 1: "1", - 10: "10", + 30: "30", }, temperature: { 0: "0", @@ -178,7 +191,7 @@ const sliderMarks = reactive({ }, max_tokens: { 1: "1", - 8192: "8192", + 10240: "10240", }, }); const handleClose = () => { diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/MessageItem.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/MessageItem.vue similarity index 77% rename from EdgeCraftRAG/ui/vue/src/views/chatbot/components/MessageItem.vue rename to EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/MessageItem.vue index e1a828c165..03b64c53b0 100644 --- a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/MessageItem.vue +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/MessageItem.vue @@ -1,9 +1,13 @@