Skip to content

Commit 9d22c08

Browse files
committed
Add langchain standard integration tests
1 parent d67221a commit 9d22c08

File tree

5 files changed

+1212
-240
lines changed

5 files changed

+1212
-240
lines changed

langchain/pyproject.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,11 @@ Issues = "https://github.com/vectorize-io/integrations-python/issues"
3333
dev = [
3434
"mypy>=1.17.1,<1.18",
3535
"pytest>=8.3.3",
36+
"pytest-asyncio>=0.26.0",
3637
"ruff>=0.12.7,<0.13",
38+
"langchain-tests>=0.3.20",
39+
"requests>=2.31.0",
40+
"types-requests>=2.31.0",
3741
]
3842

3943
[tool.ruff.lint]
@@ -73,3 +77,5 @@ packages = ["langchain_vectorize"]
7377
requires = ["hatchling"]
7478
build-backend = "hatchling.build"
7579

80+
[tool.pytest.ini_options]
81+
asyncio_mode = "auto"

langchain/tests/conftest.py

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
import json
2+
import logging
3+
import os
4+
import time
5+
from collections.abc import Iterator
6+
from pathlib import Path
7+
from typing import Literal
8+
9+
import pytest
10+
import requests
11+
from vectorize_client.api.ai_platform_connectors_api import AIPlatformConnectorsApi
12+
from vectorize_client.api.destination_connectors_api import DestinationConnectorsApi
13+
from vectorize_client.api.pipelines_api import PipelinesApi
14+
from vectorize_client.api.source_connectors_api import SourceConnectorsApi
15+
from vectorize_client.api.uploads_api import UploadsApi
16+
from vectorize_client.api_client import ApiClient
17+
from vectorize_client.configuration import Configuration
18+
from vectorize_client.exceptions import ApiException
19+
from vectorize_client.models.ai_platform_config_schema import AIPlatformConfigSchema
20+
from vectorize_client.models.ai_platform_type_for_pipeline import (
21+
AIPlatformTypeForPipeline,
22+
)
23+
from vectorize_client.models.create_source_connector_request import (
24+
CreateSourceConnectorRequest,
25+
)
26+
from vectorize_client.models.destination_connector_type_for_pipeline import (
27+
DestinationConnectorTypeForPipeline,
28+
)
29+
from vectorize_client.models.file_upload import FileUpload
30+
from vectorize_client.models.pipeline_ai_platform_connector_schema import (
31+
PipelineAIPlatformConnectorSchema,
32+
)
33+
from vectorize_client.models.pipeline_configuration_schema import (
34+
PipelineConfigurationSchema,
35+
)
36+
from vectorize_client.models.pipeline_destination_connector_schema import (
37+
PipelineDestinationConnectorSchema,
38+
)
39+
from vectorize_client.models.pipeline_source_connector_schema import (
40+
PipelineSourceConnectorSchema,
41+
)
42+
from vectorize_client.models.retrieve_documents_request import RetrieveDocumentsRequest
43+
from vectorize_client.models.schedule_schema import ScheduleSchema
44+
from vectorize_client.models.schedule_schema_type import ScheduleSchemaType
45+
from vectorize_client.models.source_connector_type import SourceConnectorType
46+
from vectorize_client.models.start_file_upload_to_connector_request import (
47+
StartFileUploadToConnectorRequest,
48+
)
49+
50+
logger = logging.getLogger(__name__)
51+
52+
53+
@pytest.fixture(scope="session")
54+
def api_token() -> str:
55+
token = os.getenv("VECTORIZE_TOKEN")
56+
if not token:
57+
msg = "Please set the VECTORIZE_TOKEN environment variable"
58+
raise ValueError(msg)
59+
return token
60+
61+
62+
@pytest.fixture(scope="session")
63+
def org_id() -> str:
64+
org = os.getenv("VECTORIZE_ORG")
65+
if not org:
66+
msg = "Please set the VECTORIZE_ORG environment variable"
67+
raise ValueError(msg)
68+
return org
69+
70+
71+
@pytest.fixture(scope="session")
72+
def environment() -> Literal["prod", "dev", "local", "staging"]:
73+
env = os.getenv("VECTORIZE_ENV", "prod")
74+
if env not in {"prod", "dev", "local", "staging"}:
75+
msg = "Invalid VECTORIZE_ENV environment variable."
76+
raise ValueError(msg)
77+
return env # type: ignore[return-value]
78+
79+
80+
@pytest.fixture(scope="session")
81+
def api_client(api_token: str, environment: str) -> Iterator[ApiClient]:
82+
header_name = None
83+
header_value = None
84+
if environment == "prod":
85+
host = "https://api.vectorize.io/v1"
86+
elif environment == "dev":
87+
host = "https://api-dev.vectorize.io/v1"
88+
elif environment == "local":
89+
host = "http://localhost:3000/api"
90+
header_name = "x-lambda-api-key"
91+
header_value = api_token
92+
else:
93+
host = "https://api-staging.vectorize.io/v1"
94+
95+
with ApiClient(
96+
Configuration(host=host, access_token=api_token, debug=True),
97+
header_name,
98+
header_value,
99+
) as api:
100+
yield api
101+
102+
103+
@pytest.fixture(scope="session")
104+
def pipeline_id(api_client: ApiClient, org_id: str) -> Iterator[str]:
105+
pipelines = PipelinesApi(api_client)
106+
107+
connectors_api = SourceConnectorsApi(api_client)
108+
response = connectors_api.create_source_connector(
109+
org_id,
110+
CreateSourceConnectorRequest(FileUpload(name="from api", type="FILE_UPLOAD")),
111+
)
112+
source_connector_id = response.connector.id
113+
logger.info("Created source connector %s", source_connector_id)
114+
115+
uploads_api = UploadsApi(api_client)
116+
upload_response = uploads_api.start_file_upload_to_connector(
117+
org_id,
118+
source_connector_id,
119+
StartFileUploadToConnectorRequest( # type: ignore[call-arg]
120+
name="research.pdf",
121+
content_type="application/pdf",
122+
metadata=json.dumps({"created-from-api": True}),
123+
),
124+
)
125+
126+
this_dir = Path(__file__).parent
127+
file_path = this_dir / "research.pdf"
128+
129+
with file_path.open("rb") as f:
130+
http_response = requests.put(
131+
upload_response.upload_url,
132+
data=f,
133+
headers={
134+
"Content-Type": "application/pdf",
135+
},
136+
timeout=60,
137+
)
138+
http_response.raise_for_status()
139+
140+
logger.info("Upload successful")
141+
142+
ai_platforms = AIPlatformConnectorsApi(api_client).get_ai_platform_connectors(
143+
org_id
144+
)
145+
builtin_ai_platform = next(
146+
c.id for c in ai_platforms.ai_platform_connectors if c.type == "VECTORIZE"
147+
)
148+
logger.info("Using AI platform %s", builtin_ai_platform)
149+
150+
vector_databases = DestinationConnectorsApi(api_client).get_destination_connectors(
151+
org_id
152+
)
153+
builtin_vector_db = next(
154+
c.id for c in vector_databases.destination_connectors if c.type == "VECTORIZE"
155+
)
156+
logger.info("Using destination connector %s", builtin_vector_db)
157+
158+
pipeline_response = pipelines.create_pipeline(
159+
org_id,
160+
PipelineConfigurationSchema( # type: ignore[call-arg]
161+
source_connectors=[
162+
PipelineSourceConnectorSchema(
163+
id=source_connector_id,
164+
type=SourceConnectorType.FILE_UPLOAD,
165+
config={},
166+
)
167+
],
168+
destination_connector=PipelineDestinationConnectorSchema(
169+
id=builtin_vector_db,
170+
type=DestinationConnectorTypeForPipeline.VECTORIZE,
171+
config={},
172+
),
173+
ai_platform_connector=PipelineAIPlatformConnectorSchema(
174+
id=builtin_ai_platform,
175+
type=AIPlatformTypeForPipeline.VECTORIZE,
176+
config=AIPlatformConfigSchema(),
177+
),
178+
pipeline_name="Test pipeline",
179+
schedule=ScheduleSchema(type=ScheduleSchemaType.MANUAL),
180+
),
181+
)
182+
pipeline_id = pipeline_response.data.id
183+
184+
# Wait for the pipeline to be created
185+
request = RetrieveDocumentsRequest( # type: ignore[call-arg]
186+
question="query",
187+
num_results=2,
188+
)
189+
start = time.time()
190+
while True:
191+
try:
192+
doc_response = pipelines.retrieve_documents(org_id, pipeline_id, request)
193+
except ApiException as e:
194+
if "503" not in str(e):
195+
raise
196+
else:
197+
docs = doc_response.documents
198+
if len(docs) == 2:
199+
break
200+
if time.time() - start > 180:
201+
msg = "Docs not retrieved in time"
202+
raise RuntimeError(msg)
203+
time.sleep(1)
204+
205+
logger.info("Created pipeline %s", pipeline_id)
206+
207+
yield pipeline_id
208+
209+
try:
210+
pipelines.delete_pipeline(org_id, pipeline_id)
211+
except Exception:
212+
logger.exception("Failed to delete pipeline %s", pipeline_id)
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
from typing import Any, Literal
2+
3+
import pytest
4+
from langchain_core.retrievers import BaseRetriever
5+
from langchain_tests.integration_tests import RetrieversIntegrationTests
6+
7+
from langchain_vectorize import VectorizeRetriever
8+
9+
10+
class TestVectorizeRetrieverIntegration(RetrieversIntegrationTests):
11+
@pytest.fixture(autouse=True)
12+
def setup(
13+
self,
14+
environment: Literal["prod", "dev", "local", "staging"],
15+
api_token: str,
16+
org_id: str,
17+
pipeline_id: str,
18+
) -> None:
19+
self._environment = environment
20+
self._api_token = api_token
21+
self._org_id = org_id
22+
self._pipeline_id = pipeline_id
23+
24+
@property
25+
def retriever_constructor(self) -> type[VectorizeRetriever]:
26+
return VectorizeRetriever
27+
28+
@property
29+
def retriever_constructor_params(self) -> dict[str, Any]:
30+
return {
31+
"environment": self._environment,
32+
"api_token": self._api_token,
33+
"organization": self._org_id,
34+
"pipeline_id": self._pipeline_id,
35+
}
36+
37+
@property
38+
def retriever_query_example(self) -> str:
39+
return "What are you?"
40+
41+
@pytest.mark.xfail(
42+
reason="VectorizeRetriever does not support k parameter in constructor"
43+
)
44+
def test_k_constructor_param(self) -> None:
45+
raise NotImplementedError
46+
47+
@pytest.mark.xfail(
48+
reason="VectorizeRetriever does not support k parameter in invoke"
49+
)
50+
def test_invoke_with_k_kwarg(self, retriever: BaseRetriever) -> None:
51+
raise NotImplementedError

0 commit comments

Comments
 (0)