Skip to content

Commit e659bde

Browse files
authored
Support presigned URL for file upload (#102)
1 parent 7c7ff22 commit e659bde

File tree

5 files changed

+107
-17
lines changed

5 files changed

+107
-17
lines changed

vlmrun/client/files.py

Lines changed: 91 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@
22

33
from __future__ import annotations
44

5+
import requests
6+
import time
57
import hashlib
68
from pathlib import Path
7-
from typing import Optional, Union, List
9+
from typing import Optional, Union, List, Literal
810

911
from loguru import logger
1012
from vlmrun.client.base_requestor import APIRequestor
1113
from vlmrun.types.abstract import VLMRunProtocol
12-
from vlmrun.client.types import FileResponse
14+
from vlmrun.client.types import FileResponse, PresignedUrlResponse
1315

1416

1517
class Files:
@@ -89,40 +91,116 @@ def upload(
8991
self,
9092
file: Union[Path, str],
9193
purpose: str = "assistants",
94+
method: Literal["auto", "direct", "presigned-url"] = "auto",
9295
timeout: int = 3 * 60,
96+
expiration: int = 24 * 60 * 60,
97+
force: bool = False,
9398
) -> FileResponse:
9499
"""Upload a file.
95100
96101
Args:
97102
file: Path to file to upload
98103
purpose: Purpose of file (default: fine-tune)
99104
timeout: Timeout for upload (default: 3 minutes)
105+
method: Method to use for upload (default: "auto")
106+
expiration: Expiration time for presigned URL (default: 24 hours)
107+
force: Force upload even if file already exists (default: False)
100108
101109
Returns:
102110
FileResponse: Uploaded file object
103111
"""
104112
if isinstance(file, str):
105-
file = Path(file)
113+
try:
114+
file = Path(file)
115+
except Exception as exc:
116+
raise ValueError(f"Invalid file path: {file}") from exc
117+
elif not isinstance(file, Path):
118+
raise ValueError(f"Invalid file path: {file}")
119+
120+
# Check if the file exists
121+
if not file.exists():
122+
raise FileNotFoundError(f"File does not exist: {file}")
106123

107124
# Check if the file already exists in the database
108125
cached_response: Optional[FileResponse] = self.get_cached_file(file)
109-
if cached_response:
126+
if cached_response and not force:
110127
return cached_response
111128

112-
# Upload the file
113-
with open(file, "rb") as f:
114-
files = {"file": (file.name, f)}
129+
# If method is "auto", check if the file is too large to upload directly
130+
if method == "auto":
131+
if file.stat().st_size > 32 * 1024 * 1024: # 32MB
132+
method = "presigned-url"
133+
else:
134+
method = "direct"
135+
logger.debug(f"Upload method [file={file}, method={method}]")
136+
137+
# If method is "presigned-url", generate a presigned URL for the file
138+
if method == "presigned-url":
139+
# Generate a presigned URL for the file
115140
response, status_code, headers = self._requestor.request(
116141
method="POST",
117-
url="files",
118-
params={"purpose": purpose},
119-
files=files,
120-
timeout=timeout,
142+
url="files/presigned-url",
143+
data={
144+
"filename": file.name,
145+
"purpose": purpose,
146+
"expiration": expiration,
147+
},
121148
)
122-
123149
if not isinstance(response, dict):
124150
raise TypeError("Expected dict response")
125-
return FileResponse(**response)
151+
response = PresignedUrlResponse(**response)
152+
153+
# PUT the file to the presigned URL
154+
start_t = time.time()
155+
logger.debug(
156+
f"Uploading file to presigned URL [file={file}, id={response.id}, url={response.url}]"
157+
)
158+
with file.open("rb") as f:
159+
put_response = requests.put(
160+
response.url,
161+
headers={"Content-Type": response.content_type},
162+
data=f,
163+
)
164+
status_code = put_response.status_code
165+
end_t = time.time()
166+
logger.debug(
167+
f"Uploaded file to presigned URL [file={file}, url={response.url}, time={end_t - start_t:.1f}s]"
168+
)
169+
if status_code == 200:
170+
# Verify the file upload
171+
verify_response, status_code, headers = self._requestor.request(
172+
method="GET",
173+
url=f"files/verify-upload/{response.id}",
174+
)
175+
if status_code == 200:
176+
return FileResponse(**verify_response)
177+
else:
178+
raise Exception(f"Failed to verify file upload: {verify_response}")
179+
else:
180+
raise Exception(f"Failed to upload file to presigned URL: {response}")
181+
182+
# If method is "direct", upload the file directly
183+
elif method == "direct":
184+
logger.debug(f"Uploading file directly [file={file}]")
185+
# Upload the file
186+
with open(file, "rb") as f:
187+
files = {"file": (file.name, f)}
188+
response, status_code, headers = self._requestor.request(
189+
method="POST",
190+
url="files",
191+
params={"purpose": purpose},
192+
files=files,
193+
timeout=timeout,
194+
)
195+
if status_code == 201:
196+
if not isinstance(response, dict):
197+
raise TypeError("Expected dict response")
198+
return FileResponse(**response)
199+
else:
200+
raise Exception(f"Failed to upload file directly: {response}")
201+
202+
else:
203+
raise ValueError(f"Invalid upload method: {method}")
126204

127205
def get(self, file_id: str) -> FileResponse:
128206
"""Get file metadata.

vlmrun/client/hub.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,9 @@ def list_domains(self) -> List[HubDomainInfo]:
9696
except Exception as e:
9797
raise APIError(f"Failed to list domains: {str(e)}")
9898

99-
def get_schema(self, domain: str, gql_stmt: Optional[str] = None) -> HubSchemaResponse:
99+
def get_schema(
100+
self, domain: str, gql_stmt: Optional[str] = None
101+
) -> HubSchemaResponse:
100102
"""Get the JSON schema for a given domain.
101103
102104
Args:

vlmrun/client/types.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ class FileResponse(BaseModel):
3535
object: str = "file"
3636

3737

38+
class PresignedUrlResponse(BaseModel):
39+
id: Optional[str]
40+
url: Optional[str]
41+
filename: Optional[str]
42+
expiration: Optional[int]
43+
method: Optional[str]
44+
content_type: Optional[str]
45+
created_at: datetime
46+
47+
3848
class CreditUsage(BaseModel):
3949
elements_processed: Optional[int] = None
4050
element_type: Optional[Literal["image", "page", "video", "audio"]] = None

vlmrun/common/image.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212

1313
def _open_image_with_exif(path: Union[str, Path]) -> Image.Image:
1414
"""Open an image and apply EXIF orientation if available.
15-
15+
1616
Args:
1717
path: Path to the image file
18-
18+
1919
Returns:
2020
PIL Image with EXIF orientation applied and converted to RGB
2121
"""

vlmrun/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.2.16"
1+
__version__ = "0.2.17"

0 commit comments

Comments
 (0)