diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index a6745257..4eb8f1c2 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -182,11 +182,11 @@ jobs: colpali_pdf_dpi = 150 [morphik] - enable_colpali = true + enable_colpali = false mode = "self_hosted" use_local_env = true api_domain = "api.morphik.ai" - morphik_embedding_api_domain = "http://localhost:6000" + morphik_embedding_api_domain = ["http://localhost:6000"] colpali_mode = "local" [pdf_viewer] @@ -203,9 +203,47 @@ jobs: max_local_bytes = 1073741824 EOF + # Create a Docker network for the test + docker network create test-net + + # Start PostgreSQL container with pgvector + PG_CONTAINER=$(docker run -d --name postgres --network test-net \ + -e POSTGRES_USER=morphik \ + -e POSTGRES_PASSWORD=morphik \ + -e POSTGRES_DB=morphik \ + pgvector/pgvector:pg16) + + # Start Redis container + REDIS_CONTAINER=$(docker run -d --name redis --network test-net redis:7-alpine) + echo "Started Redis container: $REDIS_CONTAINER" + + echo "Started PostgreSQL container: $PG_CONTAINER" + + # Wait for PostgreSQL to be ready + pg_timeout=30 + pg_elapsed=0 + echo "Waiting for PostgreSQL to be ready..." + while [ $pg_elapsed -lt $pg_timeout ]; do + if docker exec postgres pg_isready -U morphik -d morphik > /dev/null 2>&1; then + echo "โœ… PostgreSQL is ready" + break + fi + sleep 1 + pg_elapsed=$((pg_elapsed + 1)) + done + + if [ $pg_elapsed -ge $pg_timeout ]; then + echo "โŒ PostgreSQL failed to start within ${pg_timeout} seconds" + docker logs postgres + docker rm -f postgres redis + docker network rm test-net + exit 1 + fi + # Start container in detached mode with config mounted - CONTAINER_ID=$(docker run -d -p 8000:8000 \ - -e POSTGRES_URI="postgresql://morphik:morphik@localhost:5432/morphik" \ + CONTAINER_ID=$(docker run -d --network test-net -p 8000:8000 \ + -e POSTGRES_URI="postgresql+asyncpg://morphik:morphik@postgres:5432/morphik" \ + -e PGPASSWORD="morphik" \ -v "$(pwd)/morphik.toml.test:/app/morphik.toml" \ "$IMAGE_TAG") @@ -235,6 +273,8 @@ jobs: docker logs "$CONTAINER_ID" docker stop "$CONTAINER_ID" docker rm "$CONTAINER_ID" + docker rm -f postgres redis + docker network rm test-net exit 1 fi @@ -247,11 +287,15 @@ jobs: docker logs "$CONTAINER_ID" docker stop "$CONTAINER_ID" docker rm "$CONTAINER_ID" + docker rm -f postgres redis + docker network rm test-net exit 1 fi # Clean up - echo "๐Ÿงน Cleaning up container" + echo "๐Ÿงน Cleaning up containers" docker stop "$CONTAINER_ID" docker rm "$CONTAINER_ID" + docker rm -f postgres redis + docker network rm test-net echo "โœ… Test completed successfully" diff --git a/core/config.py b/core/config.py index 6c4978dc..6b6388fe 100644 --- a/core/config.py +++ b/core/config.py @@ -372,6 +372,8 @@ def get_settings() -> Settings: api_domain = config["morphik"].get("api_domain", "api.morphik.ai") # morphik_embedding_api_domain is always a list of endpoints embedding_api_endpoints = config["morphik"].get("morphik_embedding_api_domain", [f"https://{api_domain}"]) + if isinstance(embedding_api_endpoints, str): + embedding_api_endpoints = [embedding_api_endpoints] secret_manager = config["morphik"].get("secret_manager", "env") settings_dict.update( diff --git a/core/embedding/litellm_embedding.py b/core/embedding/litellm_embedding.py index c5b47b3b..7449fb11 100644 --- a/core/embedding/litellm_embedding.py +++ b/core/embedding/litellm_embedding.py @@ -77,8 +77,8 @@ async def embed_documents(self, texts: List[str]) -> List[List[float]]: # Use a harmless placeholder; some LiteLLM providers demand a key even if backend ignores it model_params["api_key"] = get_settings().LITELLM_DUMMY_API_KEY - # Call LiteLLM - response = await litellm.aembedding(input=texts, **model_params) + # Call LiteLLM with retries for transient provider errors (e.g. OpenAI 500s) + response = await litellm.aembedding(input=texts, num_retries=3, **model_params) embeddings = [data["embedding"] for data in response.data]