|
2 | 2 |
|
3 | 3 | from __future__ import annotations
|
4 | 4 |
|
| 5 | +import requests |
| 6 | +import time |
5 | 7 | import hashlib
|
6 | 8 | from pathlib import Path
|
7 |
| -from typing import Optional, Union, List |
| 9 | +from typing import Optional, Union, List, Literal |
8 | 10 |
|
9 | 11 | from loguru import logger
|
10 | 12 | from vlmrun.client.base_requestor import APIRequestor
|
11 | 13 | from vlmrun.types.abstract import VLMRunProtocol
|
12 |
| -from vlmrun.client.types import FileResponse |
| 14 | +from vlmrun.client.types import FileResponse, PresignedUrlResponse |
13 | 15 |
|
14 | 16 |
|
15 | 17 | class Files:
|
@@ -89,40 +91,116 @@ def upload(
|
89 | 91 | self,
|
90 | 92 | file: Union[Path, str],
|
91 | 93 | purpose: str = "assistants",
|
| 94 | + method: Literal["auto", "direct", "presigned-url"] = "auto", |
92 | 95 | timeout: int = 3 * 60,
|
| 96 | + expiration: int = 24 * 60 * 60, |
| 97 | + force: bool = False, |
93 | 98 | ) -> FileResponse:
|
94 | 99 | """Upload a file.
|
95 | 100 |
|
96 | 101 | Args:
|
97 | 102 | file: Path to file to upload
|
98 | 103 | purpose: Purpose of file (default: fine-tune)
|
99 | 104 | timeout: Timeout for upload (default: 3 minutes)
|
| 105 | + method: Method to use for upload (default: "auto") |
| 106 | + expiration: Expiration time for presigned URL (default: 24 hours) |
| 107 | + force: Force upload even if file already exists (default: False) |
100 | 108 |
|
101 | 109 | Returns:
|
102 | 110 | FileResponse: Uploaded file object
|
103 | 111 | """
|
104 | 112 | if isinstance(file, str):
|
105 |
| - file = Path(file) |
| 113 | + try: |
| 114 | + file = Path(file) |
| 115 | + except Exception as exc: |
| 116 | + raise ValueError(f"Invalid file path: {file}") from exc |
| 117 | + elif not isinstance(file, Path): |
| 118 | + raise ValueError(f"Invalid file path: {file}") |
| 119 | + |
| 120 | + # Check if the file exists |
| 121 | + if not file.exists(): |
| 122 | + raise FileNotFoundError(f"File does not exist: {file}") |
106 | 123 |
|
107 | 124 | # Check if the file already exists in the database
|
108 | 125 | cached_response: Optional[FileResponse] = self.get_cached_file(file)
|
109 |
| - if cached_response: |
| 126 | + if cached_response and not force: |
110 | 127 | return cached_response
|
111 | 128 |
|
112 |
| - # Upload the file |
113 |
| - with open(file, "rb") as f: |
114 |
| - files = {"file": (file.name, f)} |
| 129 | + # If method is "auto", check if the file is too large to upload directly |
| 130 | + if method == "auto": |
| 131 | + if file.stat().st_size > 32 * 1024 * 1024: # 32MB |
| 132 | + method = "presigned-url" |
| 133 | + else: |
| 134 | + method = "direct" |
| 135 | + logger.debug(f"Upload method [file={file}, method={method}]") |
| 136 | + |
| 137 | + # If method is "presigned-url", generate a presigned URL for the file |
| 138 | + if method == "presigned-url": |
| 139 | + # Generate a presigned URL for the file |
115 | 140 | response, status_code, headers = self._requestor.request(
|
116 | 141 | method="POST",
|
117 |
| - url="files", |
118 |
| - params={"purpose": purpose}, |
119 |
| - files=files, |
120 |
| - timeout=timeout, |
| 142 | + url="files/presigned-url", |
| 143 | + data={ |
| 144 | + "filename": file.name, |
| 145 | + "purpose": purpose, |
| 146 | + "expiration": expiration, |
| 147 | + }, |
121 | 148 | )
|
122 |
| - |
123 | 149 | if not isinstance(response, dict):
|
124 | 150 | raise TypeError("Expected dict response")
|
125 |
| - return FileResponse(**response) |
| 151 | + response = PresignedUrlResponse(**response) |
| 152 | + |
| 153 | + # PUT the file to the presigned URL |
| 154 | + start_t = time.time() |
| 155 | + logger.debug( |
| 156 | + f"Uploading file to presigned URL [file={file}, id={response.id}, url={response.url}]" |
| 157 | + ) |
| 158 | + with file.open("rb") as f: |
| 159 | + put_response = requests.put( |
| 160 | + response.url, |
| 161 | + headers={"Content-Type": response.content_type}, |
| 162 | + data=f, |
| 163 | + ) |
| 164 | + status_code = put_response.status_code |
| 165 | + end_t = time.time() |
| 166 | + logger.debug( |
| 167 | + f"Uploaded file to presigned URL [file={file}, url={response.url}, time={end_t - start_t:.1f}s]" |
| 168 | + ) |
| 169 | + if status_code == 200: |
| 170 | + # Verify the file upload |
| 171 | + verify_response, status_code, headers = self._requestor.request( |
| 172 | + method="GET", |
| 173 | + url=f"files/verify-upload/{response.id}", |
| 174 | + ) |
| 175 | + if status_code == 200: |
| 176 | + return FileResponse(**verify_response) |
| 177 | + else: |
| 178 | + raise Exception(f"Failed to verify file upload: {verify_response}") |
| 179 | + else: |
| 180 | + raise Exception(f"Failed to upload file to presigned URL: {response}") |
| 181 | + |
| 182 | + # If method is "direct", upload the file directly |
| 183 | + elif method == "direct": |
| 184 | + logger.debug(f"Uploading file directly [file={file}]") |
| 185 | + # Upload the file |
| 186 | + with open(file, "rb") as f: |
| 187 | + files = {"file": (file.name, f)} |
| 188 | + response, status_code, headers = self._requestor.request( |
| 189 | + method="POST", |
| 190 | + url="files", |
| 191 | + params={"purpose": purpose}, |
| 192 | + files=files, |
| 193 | + timeout=timeout, |
| 194 | + ) |
| 195 | + if status_code == 201: |
| 196 | + if not isinstance(response, dict): |
| 197 | + raise TypeError("Expected dict response") |
| 198 | + return FileResponse(**response) |
| 199 | + else: |
| 200 | + raise Exception(f"Failed to upload file directly: {response}") |
| 201 | + |
| 202 | + else: |
| 203 | + raise ValueError(f"Invalid upload method: {method}") |
126 | 204 |
|
127 | 205 | def get(self, file_id: str) -> FileResponse:
|
128 | 206 | """Get file metadata.
|
|
0 commit comments