humanprotocol
diff --git a/‎packages/examples/cvat/recording-oracle/alembic/versions/76f0bc042477_update_gt_stats.py‎
Lines changed: 60 additions & 0 deletions b/‎packages/examples/cvat/recording-oracle/alembic/versions/76f0bc042477_update_gt_stats.py‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎packages/examples/cvat/recording-oracle/src/.env.template‎
Lines changed: 4 additions & 3 deletions b/‎packages/examples/cvat/recording-oracle/src/.env.template‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎packages/examples/cvat/recording-oracle/src/core/config.py‎
Lines changed: 25 additions & 17 deletions b/‎packages/examples/cvat/recording-oracle/src/core/config.py‎
Lines changed: 25 additions & 17 deletions
diff --git a/‎packages/examples/cvat/recording-oracle/src/core/gt_stats.py‎
Lines changed: 6 additions & 5 deletions b/‎packages/examples/cvat/recording-oracle/src/core/gt_stats.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎packages/examples/cvat/recording-oracle/src/core/validation_errors.py‎
Lines changed: 13 additions & 0 deletions b/‎packages/examples/cvat/recording-oracle/src/core/validation_errors.py‎
Lines changed: 13 additions & 0 deletions
@@ -0,0 +1,60 @@
+"""Update GT stats with total_uses field
+
+Revision ID: 76f0bc042477
+Revises: 9d4367899f90
+Create Date: 2024-12-12 18:14:43.885249
+
+"""
+
+import sqlalchemy as sa
+from sqlalchemy import Column, ForeignKey, Integer, String, update
+from sqlalchemy.orm import declarative_base
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "76f0bc042477"
+down_revision = "9d4367899f90"
+branch_labels = None
+depends_on = None
+
+Base = declarative_base()
+
+
+class GtStats(Base):
+    __tablename__ = "gt_stats"
+
+    # A composite primary key is used
+    task_id = Column(
+        String, ForeignKey("tasks.id", ondelete="CASCADE"), primary_key=True, nullable=False
+    )
+    gt_frame_name = Column(String, primary_key=True, nullable=False)
+
+    failed_attempts = Column(Integer, default=0, nullable=False)
+    accepted_attempts = Column(Integer, default=0, nullable=False)
+    total_uses = Column(Integer, default=0, nullable=False)
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column(
+        "gt_stats", sa.Column("total_uses", sa.Integer(), nullable=False, server_default="0")
+    )
+    op.add_column(
+        "gt_stats", sa.Column("enabled", sa.Boolean(), nullable=False, server_default="True")
+    )
+    # ### end Alembic commands ###
+
+    op.execute(
+        update(GtStats).values(total_uses=GtStats.accepted_attempts + GtStats.failed_attempts)
+    )
+
+    op.alter_column("gt_stats", "total_uses", server_default=None)
+    op.alter_column("gt_stats", "enabled", server_default=None)
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column("gt_stats", "total_uses")
+    op.drop_column("gt_stats", "enabled")
+    # ### end Alembic commands ###
@@ -76,12 +76,13 @@ ENABLE_CUSTOM_CLOUD_HOST=
 
 # Validation
 
-DEFAULT_POINT_VALIDITY_RELATIVE_RADIUS=
-DEFAULT_OKS_SIGMA=
-GT_FAILURE_THRESHOLD=
+MIN_AVAILABLE_GT_THRESHOLD=
+MAX_USABLE_GT_SHARE=
 GT_BAN_THRESHOLD=
 UNVERIFIABLE_ASSIGNMENTS_THRESHOLD=
 MAX_ESCROW_ITERATIONS=
+WARMUP_ITERATIONS=
+MIN_WARMUP_PROGRESS=
 
 # Encryption
 PGP_PRIVATE_KEY=
 
@@ -165,43 +165,51 @@ class FeaturesConfig:
 
 
 class ValidationConfig:
-    default_point_validity_relative_radius = float(
-        os.environ.get("DEFAULT_POINT_VALIDITY_RELATIVE_RADIUS", 0.9)
-    )
-
-    default_oks_sigma = float(
-        os.environ.get("DEFAULT_OKS_SIGMA", 0.1)  # average value for COCO points
-    )
-    "Default OKS sigma for GT skeleton points validation. Valid range is (0; 1]"
+    min_available_gt_threshold = float(os.environ.get("MIN_AVAILABLE_GT_THRESHOLD", "0.3"))
+    """
+    The minimum required share of available GT frames required to continue annotation attempts.
+    When there is no enough GT left, annotation stops.
+    """
 
-    gt_failure_threshold = float(os.environ.get("GT_FAILURE_THRESHOLD", 0.9))
+    max_gt_share = float(os.environ.get("MAX_USABLE_GT_SHARE", "0.05"))
     """
-    The maximum allowed fraction of failed assignments per GT sample,
-    before it's considered failed for the current validation iteration.
-    v = 0 -> any GT failure leads to image failure
-    v = 1 -> any GT failures do not lead to image failure
+    The maximum share of the dataset to be used for validation. If the available GT share is
+    greater than this number, the extra frames will not be used. It's recommended to keep this
+    value small enough for faster convergence rate of the annotation process.
     """
 
-    gt_ban_threshold = int(os.environ.get("GT_BAN_THRESHOLD", 3))
+    gt_ban_threshold = float(os.environ.get("GT_BAN_THRESHOLD", "0.03"))
     """
-    The maximum allowed number of failures per GT sample before it's excluded from validation
+    The minimum allowed rating (annotation probability) per GT sample,
+    before it's considered bad and banned for further use.
     """
 
     unverifiable_assignments_threshold = float(
-        os.environ.get("UNVERIFIABLE_ASSIGNMENTS_THRESHOLD", 0.1)
+        os.environ.get("UNVERIFIABLE_ASSIGNMENTS_THRESHOLD", "0.1")
     )
     """
     The maximum allowed fraction of jobs with insufficient GT available for validation.
     Each such job will be accepted "blindly", as we can't validate the annotations.
     """
 
-    max_escrow_iterations = int(os.getenv("MAX_ESCROW_ITERATIONS", "0"))
+    max_escrow_iterations = int(os.getenv("MAX_ESCROW_ITERATIONS", "50"))
     """
     Maximum escrow annotation-validation iterations.
     After this, the escrow is finished automatically.
     Supposed only for testing. Use 0 to disable.
     """
 
+    warmup_iterations = int(os.getenv("WARMUP_ITERATIONS", "1"))
+    """
+    The first escrow iterations where the annotation speed is checked to be big enough.
+    """
+
+    min_warmup_progress = float(os.getenv("MIN_WARMUP_PROGRESS", "10"))
+    """
+    Minimum percent of the accepted jobs in an escrow after the first WARMUP iterations.
+    If the value is lower, the escrow annotation is paused for manual investigation.
+    """
+
 
 class EncryptionConfig(_BaseConfig):
     pgp_passphrase = os.environ.get("PGP_PASSPHRASE", "")
 
@@ -6,12 +6,13 @@ class ValidationFrameStats:
     accumulated_quality: float = 0.0
     failed_attempts: int = 0
     accepted_attempts: int = 0
+    total_uses: int = 0
+    enabled: bool = True
 
     @property
-    def average_quality(self) -> float:
-        return self.accumulated_quality / ((self.failed_attempts + self.accepted_attempts) or 1)
+    def rating(self) -> float:
+        return (self.accepted_attempts + 1) / (self.total_uses + 1)
 
 
-_TaskIdValFrameIdPair = tuple[int, int]
-
-GtStats = dict[_TaskIdValFrameIdPair, ValidationFrameStats]
+GtKey = str
+GtStats = dict[GtKey, ValidationFrameStats]
@@ -13,3 +13,16 @@ def __str__(self) -> str:
 
 class LowAccuracyError(DatasetValidationError):
     pass
+
+
+class TooSlowAnnotationError(DatasetValidationError):
+    def __init__(self, current_progress: float, current_iteration: int):
+        super().__init__()
+        self.current_progress = current_progress
+        self.current_iteration = current_iteration
+
+    def __str__(self):
+        return (
+            f"Escrow annotation progress is too small: {self.current_progress:.2f}% "
+            f"at the {self.current_iteration} iterations"
+        )