Skip to content

Commit 3a3a0fb

Browse files
andreaazzinisimedw
andauthored
Pull images with API keys when token is missing (#89)
* Pull images with API keys when token is missing * added option during export for token * bump version number Co-authored-by: Simon Edwardsson <[email protected]>
1 parent d7507f2 commit 3a3a0fb

File tree

6 files changed

+48
-18
lines changed

6 files changed

+48
-18
lines changed

darwin/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def run(args, parser):
6969
elif args.action == "report":
7070
f.dataset_report(args.dataset, args.granularity or "day")
7171
elif args.action == "export":
72-
f.export_dataset(args.dataset, args.annotation_class, args.name)
72+
f.export_dataset(args.dataset, args.include_url_token, args.annotation_class, args.name)
7373
elif args.action == "releases":
7474
f.dataset_list_releases(args.dataset)
7575
elif args.action == "pull":

darwin/cli_functions.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,9 @@ def dataset_report(dataset_slug: str, granularity) -> Path:
199199
_error(f"Dataset '{dataset_slug}' does not exist.")
200200

201201

202-
def export_dataset(dataset_slug: str, annotation_class_ids: Optional[List] = None, name: Optional[str] = None):
202+
def export_dataset(
203+
dataset_slug: str, include_url_token: bool, annotation_class_ids: Optional[List] = None, name: Optional[str] = None
204+
):
203205
"""Create a new release for the dataset
204206
205207
Parameters
@@ -214,7 +216,7 @@ def export_dataset(dataset_slug: str, annotation_class_ids: Optional[List] = Non
214216
client = _load_client(offline=False)
215217
identifier = DatasetIdentifier.parse(dataset_slug)
216218
ds = client.get_remote_dataset(identifier)
217-
ds.export(annotation_class_ids=annotation_class_ids, name=name)
219+
ds.export(annotation_class_ids=annotation_class_ids, name=name, include_url_token=include_url_token)
218220
identifier.version = name
219221
print(f"Dataset {dataset_slug} successfully exported to {identifier}")
220222

darwin/dataset/download_manager.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22
import json
33
import time
44
from pathlib import Path
5-
from typing import Optional
65

76
import requests
87

98
from darwin.utils import is_image_extension_allowed
109

1110

1211
def download_all_images_from_annotations(
12+
api_key: str,
1313
api_url: str,
1414
annotations_path: Path,
1515
images_path: Path,
@@ -21,6 +21,8 @@ def download_all_images_from_annotations(
2121
2222
Parameters
2323
----------
24+
api_key : str
25+
API Key of the current team
2426
api_url : str
2527
Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/')
2628
annotations_path : Path
@@ -73,17 +75,23 @@ def download_all_images_from_annotations(
7375
# Create the generator with the partial functions
7476
count = len(annotations_to_download_path)
7577
generator = lambda: (
76-
functools.partial(download_image_from_annotation, api_url, annotation_path, images_path, annotation_format)
78+
functools.partial(
79+
download_image_from_annotation, api_key, api_url, annotation_path, images_path, annotation_format
80+
)
7781
for annotation_path in annotations_to_download_path
7882
)
7983
return generator, count
8084

8185

82-
def download_image_from_annotation(api_url: str, annotation_path: Path, images_path: str, annotation_format: str):
86+
def download_image_from_annotation(
87+
api_key: str, api_url: str, annotation_path: Path, images_path: str, annotation_format: str
88+
):
8389
"""Helper function: dispatcher of functions to download an image given an annotation
8490
8591
Parameters
8692
----------
93+
api_key : str
94+
API Key of the current team
8795
api_url : str
8896
Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/')
8997
annotation_path : Path
@@ -94,20 +102,22 @@ def download_image_from_annotation(api_url: str, annotation_path: Path, images_p
94102
Format of the annotations. Currently only JSON is supported
95103
"""
96104
if annotation_format == "json":
97-
download_image_from_json_annotation(api_url, annotation_path, images_path)
105+
download_image_from_json_annotation(api_key, api_url, annotation_path, images_path)
98106
elif annotation_format == "xml":
99107
print("sorry can't let you do that dave")
100108
raise NotImplementedError
101109
# download_image_from_xml_annotation(annotation_path, images_path)
102110

103111

104-
def download_image_from_json_annotation(api_url: str, annotation_path: Path, image_path: str):
112+
def download_image_from_json_annotation(api_key: str, api_url: str, annotation_path: Path, image_path: str):
105113
"""
106114
Helper function: downloads an image given a .json annotation path
107115
and renames the json after the image filename
108116
109117
Parameters
110118
----------
119+
api_key : str
120+
API Key of the current team
111121
api_url : str
112122
Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/')
113123
annotation_path : Path
@@ -122,10 +132,10 @@ def download_image_from_json_annotation(api_url: str, annotation_path: Path, ima
122132
original_filename_suffix = Path(annotation["image"]["original_filename"]).suffix
123133
path = Path(image_path) / (annotation_path.stem + original_filename_suffix)
124134

125-
download_image(annotation["image"]["url"], path)
135+
download_image(annotation["image"]["url"], path, api_key)
126136

127137

128-
def download_image(url: str, path: Path, verbose: Optional[bool] = False):
138+
def download_image(url: str, path: Path, api_key: str):
129139
"""Helper function: downloads one image from url.
130140
131141
Parameters
@@ -134,17 +144,18 @@ def download_image(url: str, path: Path, verbose: Optional[bool] = False):
134144
Url of the image to download
135145
path : Path
136146
Path where to download the image, with filename
137-
verbose : bool
138-
Flag for the logging level
147+
api_key : str
148+
API Key of the current team
139149
"""
140150
if path.exists():
141151
return
142-
if verbose:
143-
print(f"Dowloading {path.name}")
144152
TIMEOUT = 60
145153
start = time.time()
146154
while True:
147-
response = requests.get(url, stream=True)
155+
if "token" in url:
156+
response = requests.get(url, stream=True)
157+
else:
158+
response = requests.get(url, headers={"Authorization": f"ApiKey {api_key}"}, stream=True)
148159
# Correct status: download image
149160
if response.status_code == 200:
150161
with open(str(path), "wb") as file:

darwin/dataset/remote_dataset.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,12 @@ def pull(
233233
# No images will be downloaded
234234
return None, 0
235235

236+
team_config = self.client.config.get_team(self.team)
237+
api_key = team_config.get("api_key")
238+
236239
# Create the generator with the download instructions
237240
progress, count = download_all_images_from_annotations(
241+
api_key=api_key,
238242
api_url=self.client.url,
239243
annotations_path=annotations_dir,
240244
images_path=self.local_images_path,
@@ -295,7 +299,7 @@ def fetch_remote_classes(self):
295299
"annotation_classes"
296300
]
297301

298-
def export(self, name: str, annotation_class_ids: Optional[List[str]] = None):
302+
def export(self, name: str, annotation_class_ids: Optional[List[str]] = None, include_url_token: bool = False):
299303
"""Create a new release for the dataset
300304
301305
Parameters
@@ -304,10 +308,16 @@ def export(self, name: str, annotation_class_ids: Optional[List[str]] = None):
304308
Name of the release
305309
annotation_class_ids: List
306310
List of the classes to filter
311+
include_url_token: bool
312+
Should the image url in the export be include a token enabling access without team membership
307313
"""
308314
if annotation_class_ids is None:
309315
annotation_class_ids = []
310-
payload = {"annotation_class_ids": annotation_class_ids, "name": name}
316+
payload = {
317+
"annotation_class_ids": annotation_class_ids,
318+
"name": name,
319+
"include_export_token": include_url_token,
320+
}
311321
self.client.post(
312322
f"/datasets/{self.dataset_id}/exports",
313323
payload=payload,

darwin/options.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,13 @@ def __init__(self):
9595
parser_export.add_argument("dataset", type=str, help="Remote dataset name to export.")
9696
parser_export.add_argument("name", type=str, help="Name with with the version gets tagged.")
9797
parser_export.add_argument("annotation_class", type=str, nargs="?", help="List of class filters")
98+
parser_export.add_argument(
99+
"--include-url-token",
100+
default=False,
101+
action="store_true",
102+
help="Each annotation file includes a url with an access token."
103+
"Warning, anyone with the url can access the images, even without being a team member",
104+
)
98105

99106
# Releases
100107
parser_dataset_version = dataset_action.add_parser("releases", help="Available version of a dataset.")

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name="darwin-py",
8-
version="0.5.1",
8+
version="0.5.2",
99
author="V7",
1010
author_email="[email protected]",
1111
description="Library and command line interface for darwin.v7labs.com",

0 commit comments

Comments
 (0)