Skip to content

Commit 87388d5

Browse files
authored
[MOD-603][external] Add annotation file item_id and annotation id. (#545)
* [MOD-603][external] Add annotation file item_id and annotation id. These fields are present in the darwin JSON v2 export, but they are not exposed by the relevant python types. We need these for some upcoming work which reads darwin-py exports. * [MOD-603][internal] Pass annotation ID through video annotations * [MOD-603][internal] Move id to bottom for backwards compatibility * [MOD-603][internal] Add tests for new IDs
1 parent 2103a94 commit 87388d5

File tree

3 files changed

+142
-7
lines changed

3 files changed

+142
-7
lines changed

darwin/datatypes.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,9 @@ class Annotation:
168168
#: Authorship of the annotation (reviewers)
169169
reviewers: Optional[List[AnnotationAuthor]] = None
170170

171+
# The darwin ID of this annotation.
172+
id: Optional[str] = None
173+
171174
def get_sub(self, annotation_type: str) -> Optional[SubAnnotation]:
172175
"""
173176
Returns the first SubAnnotation that matches the given type.
@@ -219,6 +222,9 @@ class VideoAnnotation:
219222
#: Authorship of the annotation (reviewers)
220223
reviewers: Optional[List[AnnotationAuthor]] = None
221224

225+
# The darwin ID of this annotation.
226+
id: Optional[str] = None
227+
222228
def get_data(
223229
self,
224230
only_keyframes: bool = True,
@@ -388,6 +394,9 @@ class AnnotationFile:
388394
# e.g. (1, 0, 'a')
389395
version: AnnotationFileVersion = field(default_factory=AnnotationFileVersion)
390396

397+
# The darwin ID of the item that these annotations belong to.
398+
item_id: Optional[str] = None
399+
391400
@property
392401
def full_path(self) -> str:
393402
"""

darwin/utils.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile:
436436
version=_parse_version(data),
437437
path=path,
438438
filename=item["name"],
439+
item_id=item.get("source_info", {}).get("item_id", None),
439440
dataset_name=item.get("source_info", {}).get("dataset", {}).get("name", None),
440441
annotation_classes=annotation_classes,
441442
annotations=annotations,
@@ -456,6 +457,7 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile:
456457
version=_parse_version(data),
457458
path=path,
458459
filename=item["name"],
460+
item_id=item.get("source_info", {}).get("item_id", None),
459461
dataset_name=item.get("source_info", {}).get("dataset", {}).get("name", None),
460462
annotation_classes=annotation_classes,
461463
annotations=annotations,
@@ -632,6 +634,8 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati
632634
print(f"[WARNING] Unsupported annotation type: '{annotation.keys()}'")
633635
return None
634636

637+
if "id" in annotation:
638+
main_annotation.id = annotation["id"]
635639
if "instance_id" in annotation:
636640
main_annotation.subs.append(dt.make_instance_id(annotation["instance_id"]["value"]))
637641
if "attributes" in annotation:
@@ -662,7 +666,9 @@ def _parse_darwin_video_annotation(annotation: dict) -> Optional[dt.VideoAnnotat
662666
keyframes: Dict[int, bool] = {}
663667
frames = {**annotation.get("frames", {}), **annotation.get("sections", {})}
664668
for f, frame in frames.items():
665-
frame_annotations[int(f)] = _parse_darwin_annotation({**frame, **{"name": name}})
669+
frame_annotations[int(f)] = _parse_darwin_annotation(
670+
{**frame, **{"name": name, "id": annotation.get("id", None)}}
671+
)
666672
keyframes[int(f)] = frame.get("keyframe", False)
667673

668674
if not frame_annotations:
@@ -675,6 +681,9 @@ def _parse_darwin_video_annotation(annotation: dict) -> Optional[dt.VideoAnnotat
675681
slot_names=parse_slot_names(annotation),
676682
)
677683

684+
if "id" in annotation:
685+
main_annotation.id = annotation["id"]
686+
678687
if "annotators" in annotation:
679688
main_annotation.annotators = _parse_annotators(annotation["annotators"])
680689

@@ -734,6 +743,7 @@ def split_video_annotation(annotation: dt.AnnotationFile) -> List[dt.AnnotationF
734743
frame_url,
735744
annotation.workview_url,
736745
annotation.seq,
746+
item_id=annotation.item_id,
737747
slots=annotation.slots,
738748
)
739749
)

tests/darwin/utils_test.py

Lines changed: 122 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def it_parses_darwin_images_correctly(tmp_path):
149149
assert annotation_file.path == import_file
150150
assert annotation_file.filename == "P49-RediPad-ProPlayLEFTY_442.jpg"
151151
assert annotation_file.dataset_name == None
152-
assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix='')
152+
assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix="")
153153

154154
assert len(annotation_file.annotations) == 2
155155
assert len(annotation_file.annotation_classes) == 2
@@ -236,7 +236,7 @@ def it_parses_darwin_videos_correctly(tmp_path):
236236
assert annotation_file.path == import_file
237237
assert annotation_file.filename == "above tractor.mp4"
238238
assert annotation_file.dataset_name == None
239-
assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix='')
239+
assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix="")
240240

241241
assert len(annotation_file.annotations) == 1
242242
assert len(annotation_file.annotation_classes) == 1
@@ -272,7 +272,7 @@ def it_parses_darwin_videos_correctly(tmp_path):
272272
)
273273
]
274274

275-
def it_parses_darwin_v2_correctly(tmp_path):
275+
def it_parses_darwin_v2_images_correctly(tmp_path):
276276
content = """
277277
{
278278
"version": "2.0",
@@ -355,19 +355,135 @@ def it_parses_darwin_v2_correctly(tmp_path):
355355
assert annotation_file.path == import_file
356356
assert annotation_file.filename == "item-0.jpg"
357357
assert annotation_file.dataset_name == "Dataset 0"
358-
assert annotation_file.version == dt.AnnotationFileVersion(major=2, minor=0, suffix='')
358+
assert annotation_file.item_id == "0185c280-bbad-6117-71a7-a6853a6e3f2e"
359+
assert annotation_file.version == dt.AnnotationFileVersion(major=2, minor=0, suffix="")
359360

360361
assert len(annotation_file.annotations) == 1
361362
assert len(annotation_file.annotation_classes) == 1
363+
assert annotation_file.annotations[0].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7"
362364
assert not annotation_file.is_video
363365
assert annotation_file.image_width == 123
364366
assert annotation_file.image_height == 456
365-
assert annotation_file.image_url == "http://example.com/fake-api-url/v2/teams/v7/uploads/43a83276-1abf-483b-877e-6e61349f2d1f"
366-
assert annotation_file.workview_url == "http://example.com/workview?dataset=545&item=0185c280-bbad-6117-71a7-a6853a6e3f2e"
367+
assert (
368+
annotation_file.image_url
369+
== "http://example.com/fake-api-url/v2/teams/v7/uploads/43a83276-1abf-483b-877e-6e61349f2d1f"
370+
)
371+
assert (
372+
annotation_file.workview_url
373+
== "http://example.com/workview?dataset=545&item=0185c280-bbad-6117-71a7-a6853a6e3f2e"
374+
)
367375
assert not annotation_file.seq
368376
assert not annotation_file.frame_urls
369377
assert annotation_file.remote_path == "/path-0/folder"
370378

379+
def it_parses_darwin_v2_videos_correctly(tmp_path):
380+
content = """
381+
{
382+
"version": "2.0",
383+
"schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json_2_0.schema.json",
384+
"item": {
385+
"name": "item-0.mp4",
386+
"path": "/path-0/folder",
387+
"source_info": {
388+
"dataset": {
389+
"name": "Dataset 0",
390+
"slug": "dataset-0",
391+
"dataset_management_url": "http://example.com/datasets/545/dataset-management"
392+
},
393+
"item_id": "0185c280-bbad-6117-71a7-a6853a6e3f2e",
394+
"team": {
395+
"name": "Team 0",
396+
"slug": "team-0"
397+
},
398+
"workview_url": "http://example.com/workview?dataset=545&item=0185c280-bbad-6117-71a7-a6853a6e3f2e"
399+
},
400+
"slots": [
401+
{
402+
"type": "video",
403+
"slot_name": "0",
404+
"width": 123,
405+
"height": 456,
406+
"thumbnail_url": "http://example.com/fake-api-url/v2/teams/v7/files/71857eb3-6feb-428a-8fc6-0c8a895ea611/thumbnail",
407+
"frame_urls": [
408+
"http://example.com/fake-api-url/v2/teams/v7/files/71857eb3-6feb-428a-8fc6-0c8a895ea611/frames/1",
409+
"http://example.com/fake-api-url/v2/teams/v7/files/71857eb3-6feb-428a-8fc6-0c8a895ea611/frames/2"
410+
],
411+
"frame_count": 2,
412+
"source_files": [
413+
{
414+
"file_name": "file-0",
415+
"url": "http://example.com/fake-api-url/v2/teams/v7/uploads/43a83276-1abf-483b-877e-6e61349f2d1f"
416+
}
417+
]
418+
}
419+
]
420+
},
421+
"annotations": [
422+
{
423+
"frames": {
424+
"3": {
425+
"bounding_box": {
426+
"h": 2,
427+
"w": 1,
428+
"x": 1,
429+
"y": 1
430+
},
431+
"polygon": {
432+
"paths": [
433+
[
434+
{ "x": 1, "y": 1 },
435+
{ "x": 2, "y": 2 },
436+
{ "x": 1, "y": 3 }
437+
]
438+
]
439+
}
440+
}
441+
},
442+
"id": "f8f5f235-bd47-47be-b4fe-07d49e0177a7",
443+
"interpolate_algorithm": "linear-1.1",
444+
"interpolated": true,
445+
"name": "polygon",
446+
"ranges": [ [ 0, 1 ] ],
447+
"slot_names": [
448+
"1"
449+
]
450+
}
451+
]
452+
}
453+
"""
454+
455+
directory = tmp_path / "imports"
456+
directory.mkdir()
457+
import_file = directory / "darwin-file.json"
458+
import_file.write_text(content)
459+
460+
annotation_file: dt.AnnotationFile = parse_darwin_json(import_file, None)
461+
462+
assert annotation_file.path == import_file
463+
assert annotation_file.filename == "item-0.mp4"
464+
assert annotation_file.dataset_name == "Dataset 0"
465+
assert annotation_file.item_id == "0185c280-bbad-6117-71a7-a6853a6e3f2e"
466+
assert annotation_file.version == dt.AnnotationFileVersion(major=2, minor=0, suffix="")
467+
468+
assert len(annotation_file.annotations) == 1
469+
assert len(annotation_file.annotation_classes) == 1
470+
assert annotation_file.annotations[0].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7"
471+
assert list(annotation_file.annotations[0].frames.keys()) == [3]
472+
assert annotation_file.annotations[0].frames[3].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7"
473+
assert annotation_file.is_video
474+
assert annotation_file.image_width == 123
475+
assert annotation_file.image_height == 456
476+
assert (
477+
annotation_file.image_url
478+
== "http://example.com/fake-api-url/v2/teams/v7/uploads/43a83276-1abf-483b-877e-6e61349f2d1f"
479+
)
480+
assert (
481+
annotation_file.workview_url
482+
== "http://example.com/workview?dataset=545&item=0185c280-bbad-6117-71a7-a6853a6e3f2e"
483+
)
484+
assert not annotation_file.seq
485+
assert len(annotation_file.frame_urls) == 2
486+
assert annotation_file.remote_path == "/path-0/folder"
371487

372488
def it_returns_None_if_no_annotations_exist(tmp_path):
373489
content = """

0 commit comments

Comments
 (0)