humanprotocol
diff --git a/‎packages/examples/cvat/exchange-oracle/src/core/annotation_meta.py‎
Lines changed: 1 addition & 0 deletions b/‎packages/examples/cvat/exchange-oracle/src/core/annotation_meta.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎packages/examples/cvat/exchange-oracle/src/core/config.py‎
Lines changed: 3 additions & 2 deletions b/‎packages/examples/cvat/exchange-oracle/src/core/config.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎packages/examples/cvat/exchange-oracle/src/cvat/api_calls.py‎
Lines changed: 28 additions & 19 deletions b/‎packages/examples/cvat/exchange-oracle/src/cvat/api_calls.py‎
Lines changed: 28 additions & 19 deletions
diff --git a/‎packages/examples/cvat/exchange-oracle/src/handlers/job_creation.py‎
Lines changed: 36 additions & 32 deletions b/‎packages/examples/cvat/exchange-oracle/src/handlers/job_creation.py‎
Lines changed: 36 additions & 32 deletions
diff --git a/‎packages/examples/cvat/exchange-oracle/src/handlers/job_export.py‎
Lines changed: 1 addition & 0 deletions b/‎packages/examples/cvat/exchange-oracle/src/handlers/job_export.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎packages/examples/cvat/recording-oracle/src/core/annotation_meta.py‎
Lines changed: 1 addition & 0 deletions b/‎packages/examples/cvat/recording-oracle/src/core/annotation_meta.py‎
Lines changed: 1 addition & 0 deletions
@@ -8,6 +8,7 @@
 
 class JobMeta(BaseModel):
     job_id: int
+    task_id: int
     annotation_filename: Path
     annotator_wallet_address: str
     assignment_id: str
 
@@ -180,8 +180,9 @@ class CvatConfig:
     cvat_task_creation_check_interval = int(os.environ.get("CVAT_TASK_CREATION_CHECK_INTERVAL", 5))
 
     # quality control settings
-    cvat_val_frames_per_job_count = int(os.environ.get("CVAT_VAL_FRAMES_PER_JOB_COUNT", 2))
     cvat_max_validation_checks = int(os.environ.get("CVAT_MAX_VALIDATION_CHECKS", 3))
+    "Maximum number of attempts to run a validation check on a job after completing annotation"
+
     cvat_iou_threshold = float(os.environ.get("CVAT_IOU_THRESHOLD", 0.8))
     cvat_oks_sigma = float(os.environ.get("CVAT_OKS_SIGMA", 0.1))
 
@@ -232,7 +233,7 @@ class FeaturesConfig:
     default_export_timeout = int(os.environ.get("DEFAULT_EXPORT_TIMEOUT", 60))
     "Timeout, in seconds, for annotations or dataset export waiting"
 
-    default_import_timeout = int(os.environ.get("DEFAULT_IMPORT_TIMEOUT", 60))
+    default_import_timeout = int(os.environ.get("DEFAULT_IMPORT_TIMEOUT", 60 * 60))
     "Timeout, in seconds, for waiting on GT annotations import"
 
     request_logging_enabled = to_bool(os.getenv("REQUEST_LOGGING_ENABLED", "0"))
 
@@ -26,6 +26,10 @@
 _NOTSET = object()
 
 
+class CVATException(Exception):
+    """Indicates that CVAT API returned unexpected response"""
+
+
 def _request_annotations(endpoint: Endpoint, cvat_id: int, format_name: str) -> bool:
     """
     Requests annotations export.
@@ -314,7 +318,7 @@ def create_task(
     project_id: int,
     name: str,
     *,
-    segment_size: int = Config.cvat_config.cvat_task_segment_size,
+    segment_size: int,
 ) -> models.TaskRead:
     logger = logging.getLogger("app")
     with get_api_client() as api_client:
@@ -325,7 +329,7 @@ def create_task(
             segment_size=segment_size,
         )
         try:
-            (task_info, response) = api_client.tasks_api.create(task_write_request)
+            (task_info, _) = api_client.tasks_api.create(task_write_request)
             return task_info
 
         except exceptions.ApiException as e:
@@ -351,6 +355,7 @@ def put_task_data(
     task_id: int,
     cloudstorage_id: int,
     *,
+    chunk_size: int,
     filenames: list[str] | None = None,
     sort_images: bool = True,
     validation_params: dict[str, str | float | list[str]] | None = None,
@@ -384,14 +389,11 @@ def put_task_data(
                 mode=models.ValidationMode("gt_pool"),
                 frames=gt_filenames,
                 frame_selection_method=models.FrameSelectionMethod("manual"),
-                frames_per_job_count=validation_params.get(
-                    "gt_frames_per_job_count",
-                    Config.cvat_config.cvat_val_frames_per_job_count,
-                ),
+                frames_per_job_count=validation_params["gt_frames_per_job_count"],
             )
 
         data_request = models.DataRequest(
-            chunk_size=Config.cvat_config.cvat_task_segment_size,
+            chunk_size=chunk_size,
             cloud_storage_id=cloudstorage_id,
             image_quality=Config.cvat_config.cvat_default_image_quality,
             use_cache=True,
@@ -614,9 +616,11 @@ def get_gt_job(task_id: int) -> models.JobRead:
     with get_api_client() as api_client:
         try:
             (paginated_jobs, _) = api_client.jobs_api.list(task_id=task_id, type="ground_truth")
-            assert (
-                len(paginated_jobs["results"]) == 1
-            ), f'CVAT returned {len(paginated_jobs["results"])} GT jobs'
+            if (gt_jobs_count := len(paginated_jobs["results"])) != 1:
+                raise CVATException(
+                    f"CVAT returned {gt_jobs_count} GT jobs for the task({task_id})"
+                )
+
             return paginated_jobs["results"][0]
         except (exceptions.ApiException, AssertionError) as ex:
             logger.exception(f"Exception when calling JobsApi.list(): {ex}\n")
@@ -631,7 +635,7 @@ def upload_gt_annotations(
     sleep_interval: int = 5,
     timeout: int | None = Config.features.default_import_timeout,
 ) -> None:
-    # FUTURE-TODO: use job.import_annotations when CVAT will support waiting timeout
+    # FUTURE-TODO: use job.import_annotations when CVAT supports a waiting timeout
     start_time = datetime.now(timezone.utc)
     logger = logging.getLogger("app")
 
@@ -653,7 +657,11 @@ def upload_gt_annotations(
             raise
 
         request_id = json.loads(response.data).get("rq_id")
-        assert request_id, "CVAT server have not returned rq_id in the response."
+        if not request_id:
+            raise CVATException(
+                "CVAT server has not returned rq_id in the response when "
+                f"uploading GT annotations to the {job_id} job"
+            )
 
         while True:
             try:
@@ -695,23 +703,24 @@ def get_quality_control_settings(task_id: int) -> models.QualitySettings:
     with get_api_client() as api_client:
         try:
             paginated_data, _ = api_client.quality_api.list_settings(task_id=task_id)
-            assert len(paginated_data["results"]) == 1, (
-                f'CVAT returned {len(paginated_data["results"])}'
-                "quality control settings associated with the task"
-            )
+            if (settings_count := paginated_data["results"]) != 1:
+                raise CVATException(
+                    f"CVAT returned {settings_count}"
+                    f"quality control settings associated with the task({task_id})"
+                )
             return paginated_data["results"][0]
 
-        except (exceptions.ApiException, AssertionError) as e:
-            logger.exception(f"Exception when calling QualityApi.list_settings(): {e}\n")
+        except exceptions.ApiException as ex:
+            logger.exception(f"Exception when calling QualityApi.list_settings(): {ex}\n")
             raise
 
 
 def update_quality_control_settings(
     settings_id: int,
     *,
     target_metric_threshold: float,
-    max_validations_per_job: int = Config.cvat_config.cvat_max_validation_checks,
     target_metric: str = "accuracy",
+    max_validations_per_job: int = Config.cvat_config.cvat_max_validation_checks,
     iou_threshold: float = Config.cvat_config.cvat_iou_threshold,
     oks_sigma: float = Config.cvat_config.cvat_oks_sigma,
 ) -> None:
 
@@ -211,10 +211,10 @@ def _setup_gt_job(self, task_id: int, dataset_path: Path, format_name: str) -> N
         cvat_api.upload_gt_annotations(gt_job.id, dataset_path, format_name=format_name)
         cvat_api.finish_gt_job(gt_job.id)
 
-    def _setup_quality_settings(self, task_id: int, *, quality_threshold: float) -> None:
+    def _setup_quality_settings(self, task_id: int, **overrides) -> None:
         settings = cvat_api.get_quality_control_settings(task_id)
         cvat_api.update_quality_control_settings(
-            settings.id, target_metric_threshold=quality_threshold
+            settings.id, target_metric_threshold=self.manifest.validation.min_quality, **overrides
         )
 
     @abstractmethod
@@ -292,7 +292,7 @@ def _get_gt_filenames(
 
         return list(matched_gt_filenames)
 
-    def split_dataset_per_task(
+    def _split_dataset_per_task(
         self,
         data_filenames: list[str],
         *,
@@ -324,7 +324,6 @@ def build(self):
         # Create task configuration
         gt_filenames = self._get_gt_filenames(gt_dataset, data_filenames, manifest=manifest)
         data_to_be_annotated = [f for f in data_filenames if f not in set(gt_filenames)]
-        segment_size = manifest.annotation.job_size or Config.cvat_config.cvat_task_segment_size
         label_configuration = make_label_configuration(manifest)
 
         self._upload_task_meta(gt_dataset)
@@ -346,7 +345,7 @@ def build(self):
         cvat_webhook = cvat_api.create_cvat_webhook(cvat_project.id)
 
         with SessionLocal.begin() as session:
-            segment_size = manifest.annotation.job_size or Config.cvat_config.cvat_task_segment_size
+            segment_size = manifest.annotation.job_size
             total_jobs = math.ceil(len(data_to_be_annotated) / segment_size)
 
             self.logger.info(
@@ -376,7 +375,7 @@ def build(self):
             db_service.get_project_by_id(session, project_id, for_update=True)  # lock the row
             db_service.add_project_images(session, cvat_project.id, data_filenames)
 
-            for data_subset in self.split_dataset_per_task(
+            for data_subset in self._split_dataset_per_task(
                 data_to_be_annotated,
                 subset_size=Config.cvat_config.cvat_max_jobs_per_task * segment_size,
             ):
@@ -388,13 +387,14 @@ def build(self):
                 )
                 db_service.get_task_by_id(session, task_id, for_update=True)  # lock the row
 
-                # Actual task creation in CVAT takes some time, so it's done in an async process.
                 # The task is fully created once 'update:task' or 'update:job' webhook is received.
                 cvat_api.put_task_data(
                     cvat_task.id,
                     cloud_storage.id,
                     filenames=data_subset,
                     sort_images=False,
+                    # use the same value for the chunk size as for the job size
+                    chunk_size=segment_size,
                     validation_params={
                         "gt_filenames": gt_filenames,  # include whole GT dataset into each task
                         "gt_frames_per_job_count": manifest.validation.val_size,
@@ -453,15 +453,12 @@ def _setup_gt_job_for_cvat_task(
             task_id=task_id, gt_dataset=gt_dataset, dm_export_format="datumaro"
         )
 
-    def _setup_quality_settings(self, task_id) -> None:
+    def _setup_quality_settings(self, task_id: int, **overrides) -> None:
         assert self._mean_gt_bbox_radius_estimation is not _unset
 
-        settings = cvat_api.get_quality_control_settings(task_id)
-        cvat_api.update_quality_control_settings(
-            settings.id,
-            target_metric_threshold=self.manifest.validation.min_quality,
-            oks_sigma=self._mean_gt_bbox_radius_estimation,
-        )
+        values = { "oks_sigma": self._mean_gt_bbox_radius_estimation }
+        values.update(overrides)
+        super()._setup_quality_settings(task_id, **values)
 
 
 class BoxesFromPointsTaskBuilder(_TaskBuilderBase):
@@ -476,7 +473,7 @@ def __init__(self, manifest: TaskManifest, escrow_address: str, chain_id: int) -
         self._input_gt_dataset: _MaybeUnset[dm.Dataset] = _unset
         self._gt_dataset: _MaybeUnset[dm.Dataset] = _unset
         self._gt_roi_dataset: _MaybeUnset[dm.Dataset] = _unset
-        self._gt_filenames: _MaybeUnset[dm.Dataset] = _unset
+        self._gt_filenames: _MaybeUnset[Sequence[str]] = _unset
         self._points_dataset: _MaybeUnset[dm.Dataset] = _unset
 
         self._bbox_point_mapping: _MaybeUnset[boxes_from_points_task.BboxPointMapping] = _unset
@@ -1372,7 +1369,7 @@ def _roi_key(e):
                     roi_bytes,
                 )
 
-    def split_dataset_per_task(
+    def _split_dataset_per_task(
         self,
         data_filenames: list[str],
         *,
@@ -1435,9 +1432,7 @@ def _create_on_cvat(self):
         cvat_webhook = cvat_api.create_cvat_webhook(cvat_project.id)
 
         with SessionLocal.begin() as session:
-            segment_size = (
-                self.manifest.annotation.job_size or Config.cvat_config.cvat_task_segment_size
-            )
+            segment_size = self.manifest.annotation.job_size
             total_jobs = math.ceil(len(self._data_filenames_to_be_annotated) / segment_size)
             self.logger.info(
                 "Task creation for escrow '%s': will create %s assignments",
@@ -1475,7 +1470,7 @@ def _create_on_cvat(self):
                 ],
             )
 
-            for data_subset in self.split_dataset_per_task(
+            for data_subset in self._split_dataset_per_task(
                 self._data_filenames_to_be_annotated,
                 subset_size=Config.cvat_config.cvat_max_jobs_per_task * segment_size,
             ):
@@ -1495,13 +1490,13 @@ def _create_on_cvat(self):
                     for fn in self._gt_filenames
                 ]
 
-                # FUTURE-FIXME:
-                # Actual task creation in CVAT takes some time, so it's done in an async process.
                 cvat_api.put_task_data(
                     cvat_task.id,
                     cvat_cloud_storage.id,
                     filenames=filenames,
                     sort_images=False,
+                    # use the same value for the chunk size as for the job size
+                    chunk_size=segment_size,
                     validation_params={
                         "gt_filenames": gt_filenames,
                         "gt_frames_per_job_count": self.manifest.validation.val_size,
@@ -1568,6 +1563,10 @@ def __init__(self, manifest: TaskManifest, escrow_address: str, chain_id: int) -
         self._excluded_gt_info: _MaybeUnset[_ExcludedAnnotationsInfo] = _unset
         self._excluded_boxes_info: _MaybeUnset[_ExcludedAnnotationsInfo] = _unset
 
+        # Configuration / constants
+        self.job_size_mult = skeletons_from_boxes_task.DEFAULT_ASSIGNMENT_SIZE_MULTIPLIER
+        "Job size multiplier"
+
         # TODO: consider WebP if produced files are too big
         self.roi_file_ext = ".png"  # supposed to be lossless and reasonably compressing
         "File extension for RoI images, with leading dot (.) included"
@@ -2218,6 +2217,13 @@ def _mangle_filenames(self):
             roi_info.bbox_id: str(uuid.uuid4()) + self.roi_file_ext for roi_info in self._roi_infos
         }
 
+    @property
+    def _task_segment_size(self):
+        # Unlike other task types, here we use a grid of RoIs,
+        # so the absolute job size numbers from manifest are multiplied by the job size multiplier.
+        # Then, we add a percent of job tiles for validation, keeping the requested ratio.
+        return self.manifest.annotation.job_size * self.job_size_mult
+
     def _prepare_task_params(self):
         assert self._roi_infos is not _unset
         assert self._skeleton_bbox_mapping is not _unset
@@ -2233,9 +2239,8 @@ def _prepare_task_params(self):
 
         roi_info_by_id = {roi_info.bbox_id: roi_info for roi_info in self._roi_infos}
         self._roi_info_by_id = roi_info_by_id
-        segment_size = (
-            self.manifest.annotation.job_size or Config.cvat_config.cvat_task_segment_size
-        )
+
+        segment_size = self._task_segment_size
 
         for label_id, _ in enumerate(self.manifest.annotation.labels):
             label_gt_roi_ids = set(
@@ -2458,6 +2463,7 @@ def _task_params_label_key(ts):
         label_specs_by_skeleton = {
             skeleton_label_id: [
                 {
+                    # why not just use skeleton node?
                     "name": self.point_labels[(skeleton_label.name, skeleton_point)],
                     "type": "points",
                 }
@@ -2474,9 +2480,7 @@ def _task_params_label_key(ts):
         _params["bucket_host"] = "http://minio:9010"
         cvat_cloud_storage = cvat_api.create_cloudstorage(**_params)
 
-        segment_size = (
-            self.manifest.annotation.job_size or Config.cvat_config.cvat_task_segment_size
-        )
+        segment_size = self._task_segment_size
 
         total_jobs = sum(
             len(self.manifest.annotation.labels[tp.label_id].nodes)
@@ -2620,19 +2624,19 @@ def _task_params_label_key(ts):
                         )
                         db_service.get_task_by_id(session, task_id, for_update=True)  # lock the row
 
-                        # FUTURE-FIXME: now we must wait for the task to be created to set up GT
-                        # Actual task creation in CVAT takes some time,
-                        # so it's done in an async process.
                         # The task is fully created once 'update:task' or 'update:job'
                         # webhook is received.
                         cvat_api.put_task_data(
                             cvat_task.id,
                             cvat_cloud_storage.id,
                             filenames=point_label_filenames + gt_point_label_filenames,
                             sort_images=False,
+                            # use the same value for the chunk size as for the job size
+                            chunk_size=segment_size,
                             validation_params={
                                 "gt_filenames": gt_point_label_filenames,
-                                "gt_frames_per_job_count": self.manifest.validation.val_size,
+                                "gt_frames_per_job_count": self.manifest.validation.val_size
+                                * self.job_size_mult,
                             },
                         )
 
 
@@ -56,6 +56,7 @@ def prepare_annotation_metafile(
                 annotation_filename=job_annotations[job.cvat_id].filename,
                 annotator_wallet_address=job.latest_assignment.user_wallet_address,
                 assignment_id=job.latest_assignment.id,
+                task_id=job.cvat_task_id,
             )
             for job in jobs
         ]
 
@@ -8,6 +8,7 @@
 
 class JobMeta(BaseModel):
     job_id: int
+    task_id: int
     annotation_filename: Path
     annotator_wallet_address: str
     assignment_id: str
Original file line number	Diff line number	Diff line change
`@@ -56,6 +56,7 @@ def prepare_annotation_metafile(`
`56`	`56`	`annotation_filename=job_annotations[job.cvat_id].filename,`
`57`	`57`	`annotator_wallet_address=job.latest_assignment.user_wallet_address,`
`58`	`58`	`assignment_id=job.latest_assignment.id,`
	`59`	`+ task_id=job.cvat_task_id,`
`59`	`60`	`)`
`60`	`61`	`for job in jobs`
`61`	`62`	`]`