Merge pull request #20 from midas-research/feat/job-overlap

rohan220217 · web-flow · commit 0f5d039a21a8 · 2025-02-04T14:52:43.000+05:30
overlap added while creating task and gt
diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py
@@ -1031,6 +1031,7 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir):
     labels_list = list(labels_queryset.values())
 
     labels_mapping = {}
+    gt_jobs = []
 
     for label in labels_list:
         labels_mapping[label["id"]] = label
@@ -1044,36 +1045,128 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir):
         for attribute in attributes_list:
             labels_mapping[label["id"]]["attributes"][attribute["id"]] = attribute
 
-        slogger.glob.debug("JOB LABELS ATTRIBUTES")
-        slogger.glob.debug(json.dumps(attributes_list))
-
-
-    slogger.glob.debug("JOB LABELS")
-    slogger.glob.debug(json.dumps(labels_list))
-
-    # audio_file_path = os.path.join(temp_dir, str(job_id) + ".wav")
-    # with wave.open(audio_file_path, 'wb') as wave_file:
-    #     wave_file.setnchannels(1)
-    #     wave_file.setsampwidth(4)
-    #     wave_file.setframerate(44100)
-    #     wave_file.writeframes(concat_array)
-
     annotation_audio_chunk_file_paths = chunk_annotation_audio(concat_array, temp_dir, annotations)
 
-    for i, annotation in enumerate(annotations):
-        entry = {
-            "path": os.path.basename(annotation_audio_chunk_file_paths[i]),
+    # handle Gt jobs
+    if job_details.segment.type == "specific_frames":
+        frames = job_details.segment.frames
+        start_frame = frames[0]
+        segment_size = job_details.segment.task.segment_size
+        overlap = job_details.segment.task.overlap
+
+        def generate_jobs(included_frames: list[int], segment_size: int, overlap: int) -> list[dict]:
+            if not included_frames:
+                return []
+
+            jobs = []
+            start_frame = included_frames[0]
+
+            while start_frame + segment_size <= included_frames[-1]:
+                end_frame = start_frame + segment_size - 1
+
+                # Check if both start_frame and end_frame exist in included_frames
+                start_exists = start_frame in included_frames
+                end_exists = end_frame in included_frames
+
+                if start_exists and end_exists:
+                    jobs.append({
+                        "start_frame": start_frame,
+                        "end_frame": end_frame
+                    })
+
+                # Move start_frame back by the overlap for the next job
+                start_frame = end_frame - overlap + 1
+
+                # Find the next valid start frame
+                while start_frame not in included_frames and start_frame <= included_frames[-1]:
+                    start_frame += 1
+
+                # Break if we can't find a valid next start frame
+                if start_frame not in included_frames:
+                    break
+
+            # Handle the last section if necessary
+            last_start = start_frame
+            if last_start in included_frames and last_start < included_frames[-1]:
+                jobs.append({
+                    "start_frame": last_start,
+                    "end_frame": included_frames[-1]
+                })
+
+            return jobs
+
+        gt_jobs = generate_jobs(included_frames=frames, segment_size=segment_size, overlap=overlap)
+        # fetch all jobs of this task
+        task_jobs = Job.objects.filter(segment__task__id=job_details.segment.task_id).order_by('id')
+        start = 0
+        for job_index, job in enumerate(task_jobs):
+            for i, gt_job in enumerate(gt_jobs):
+                if job.segment.start_frame == gt_job['start_frame'] and job.segment.stop_frame == gt_job['end_frame']:
+                    diff_in_frame = gt_job['end_frame'] - gt_job['start_frame'] + 1
+                    duration = int(((job_details.segment.task.audio_total_duration/job_details.segment.task.data.size) * diff_in_frame)/1000)
+                    gt_jobs[i]['job_index'] = job_index
+                    gt_jobs[i]['start'] = start
+                    gt_jobs[i]['end'] = start + duration
+                    start = start + duration
+                    break
+
+    def process_annotations(annotations, gt_jobs, job_details, labels_mapping):
+        final_data = []
+
+        for i, annotation in enumerate(annotations):
+            start = annotation["points"][0]
+            end = annotation["points"][3]
+
+            if job_details.segment.type == "specific_frames":
+                overlapping_jobs = []
+                for gt_job in gt_jobs:
+                    if not (end <= gt_job['start'] or start >= gt_job['end']):
+                        overlapping_jobs.append(gt_job)
+
+                if len(overlapping_jobs) > 1:
+                    for job in overlapping_jobs:
+                        entry = create_entry(annotation, job_details, labels_mapping, i)
+                        entry['job_id'] = job['job_index']
+                        entry['start'] = 0 if start <= job['start'] else start - job['start']
+                        entry['end'] = job['end'] - job['start'] if end >= job['end'] else end - job['start']
+                        add_attributes(entry, annotation, labels_mapping)
+                        final_data.append(entry)
+                else:
+                    entry = create_entry(annotation, job_details, labels_mapping, i)
+                    for gt_job in gt_jobs:
+                        if gt_job['start'] <= start and gt_job['end'] >= end:
+                            entry['job_id'] = gt_job['job_index']
+                            entry['start'] = start - gt_job['start']
+                            entry['end'] = end - gt_job['start']
+                            break
+                    add_attributes(entry, annotation, labels_mapping)
+                    final_data.append(entry)
+            else:
+                # Handle normal jobs
+                entry = create_entry(annotation, job_details, labels_mapping, i)
+                entry['job_id'] = job_details.id
+                entry['start'] = start
+                entry['end'] = end
+                add_attributes(entry, annotation, labels_mapping)
+                final_data.append(entry)
+
+        return final_data
+
+    def create_entry(annotation, job_details, labels_mapping, index):
+        return {
+            "project_id": job_details.segment.task.project_id,
+            "task_id": job_details.segment.task_id,
+            "path": os.path.basename(annotation_audio_chunk_file_paths[index]),
             "sentence": annotation.get("transcript", ""),
             "age": annotation.get("age", ""),
             "gender": annotation.get("gender", ""),
             "accents": annotation.get("accent", ""),
             "locale": annotation.get("locale", ""),
             "emotion": annotation.get("emotion", ""),
             "label": labels_mapping[annotation["label_id"]]["name"],
-            "start": annotation["points"][0],
-            "end": annotation["points"][3]
         }
 
+    def add_attributes(entry, annotation, labels_mapping):
         attributes = annotation.get("attributes", [])
         for idx, attr in enumerate(attributes):
             annotation_attribute_id = attr.get("spec_id", "")
@@ -1085,12 +1178,7 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir):
             entry[f"attribute_{idx+1}_name"] = attribute_name
             entry[f"attribute_{idx+1}_value"] = attribute_val
 
-        final_data.append(entry)
-
-    slogger.glob.debug("JOB ANNOTATION DATA")
-    slogger.glob.debug(json.dumps(final_data))
-    slogger.glob.debug("All ANNOTATIONs DATA")
-    slogger.glob.debug(json.dumps(annotations))
+    final_data = process_annotations(annotations, gt_jobs, job_details, labels_mapping)
     return final_data, annotation_audio_chunk_file_paths
 
 def convert_annotation_data_format(data, format_name):
@@ -1247,7 +1335,8 @@ def export_audino_job(job_id, dst_file, format_name, server_url=None, save_image
         df = pd.DataFrame(final_data)
 
         # sorting by start column in ascending order
-        df = df.sort_values(by='start')
+        if 'job_id' in df.columns:
+            df = df.sort_values(by='job_id')
 
         # Saving the metadata file
         meta_data_file_path = os.path.join(temp_dir_base, str(job_id) + ".tsv")
diff --git a/cvat/apps/engine/frame_provider.py b/cvat/apps/engine/frame_provider.py
@@ -156,8 +156,8 @@ def get_chunk_number(self, frame_number):
 
     def _validate_chunk_number(self, chunk_number):
         chunk_number_ = int(chunk_number)
-        if chunk_number_ < 0 or chunk_number_ >= math.ceil(self._db_data.size / self._db_data.chunk_size):
-            raise ValidationError('requested chunk does not exist')
+        # if chunk_number_ < 0 or chunk_number_ >= math.ceil(self._db_data.size / self._db_data.chunk_size):
+        #     raise ValidationError('requested chunk does not exist')
 
         return chunk_number_
 
diff --git a/cvat/apps/engine/rules/notifications.rego b/cvat/apps/engine/rules/notifications.rego
@@ -0,0 +1,15 @@
+package notifications
+
+default allow = false
+
+allow {
+    # Allow authenticated users to view their notifications
+    input.scope == "view"
+    input.auth.user.id != null
+}
+
+allow {
+    # Allow marking notifications as read
+    input.scope == "mark_as_read"
+    input.auth.user.id != null
+}
diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py
@@ -612,6 +612,7 @@ class JobReadSerializer(serializers.ModelSerializer):
     target_storage = StorageSerializer(required=False, allow_null=True)
     source_storage = StorageSerializer(required=False, allow_null=True)
     task_flags = TaskFlagsSerializer(source="segment.task.flags", read_only=True)
+    overlap = serializers.ReadOnlyField(source="segment.task.overlap")
 
     class Meta:
         model = models.Job
@@ -620,7 +621,7 @@ class Meta:
             'start_frame', 'stop_frame', 'data_chunk_size', 'data_compressed_chunk_type',
             'created_date', 'updated_date', 'issues', 'labels', 'type', 'organization',
             'target_storage', 'source_storage', 'ai_audio_annotation_status',
-            'ai_audio_annotation_task_id', 'ai_audio_annotation_error_msg', 'task_flags')
+            'ai_audio_annotation_task_id', 'ai_audio_annotation_error_msg', 'task_flags', 'overlap')
         read_only_fields = fields
 
     def to_representation(self, instance):
@@ -701,6 +702,7 @@ def create(self, validated_data):
             size = task.data.size
             valid_frame_ids = task.data.get_valid_frame_indices()
             segment_size = task.segment_size
+            overlap = task.overlap
 
             frame_selection_method = validated_data.pop("frame_selection_method", None)
             if frame_selection_method == models.JobFrameSelectionMethod.RANDOM_UNIFORM:
@@ -712,26 +714,27 @@ def create(self, validated_data):
                     )
 
                 if task.data.original_chunk_type == DataChoice.AUDIO:
-                    num_segments = size // segment_size
-                    jobs_frame_list = []
-                    for i in range(num_segments):
-                        start = i * segment_size
-                        end  = (i+1) * segment_size - 1
-                        array = [j for j in range(start,end+1)]
-                        jobs_frame_list.append(array)
+                    effective_increment = segment_size - overlap
 
-                    #  if there's a remainder, create the  last array
-                    if size % segment_size != 0:
-                        start = num_segments * segment_size
-                        end  = size - 1
-                        array = [j for j in range(start,end+1)]
+                    # Create overlapping segments
+                    jobs_frame_list = []
+                    start = 0
+                    while start < size:
+                        end = min(start + segment_size - 1, size - 1)  # last frame does not exceed the total size
+                        array = [j for j in range(start, end + 1)]
                         jobs_frame_list.append(array)
+                        start += effective_increment  # Move to the next start position considering the overlap
 
-                    #Random select from the list
+                    # Randomly select from the list
                     import math, random
-                    random_jobs_no = math.ceil(frame_count / segment_size)
+
+                    job_percent = math.ceil((frame_count*100)/size)
+                    random_jobs_no = math.ceil((len(jobs_frame_list) * job_percent)/100)
                     selected_jobs_frames = random.sample(jobs_frame_list, random_jobs_no)
+
+                    # Flatten and sort the selected frames
                     frames = sorted([item for sublist in selected_jobs_frames for item in sublist])
+
                 else:
                     seed = validated_data.pop("seed", None)
 
diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py
@@ -179,7 +179,7 @@ def _segments():
             if segment_size == 0:
                 raise ValueError("Segment size cannot be zero.")
 
-            overlap = 0
+            overlap = db_task.overlap
             segment_size = segment_step
             # if db_task.overlap is not None:
             #     overlap = min(db_task.overlap, segment_size  // 2)
@@ -1060,9 +1060,12 @@ def get_audio_duration(file_path):
 
     db_task.audio_total_duration = None
 
+    num_frames_per_millisecond = 0
     # calculate chunk size if it isn't specified
     if MEDIA_TYPE == "audio":
         segment_duration = db_task.segment_duration if db_task.segment_duration is not None else 600000
+        overlap_duration = 5*1000
+
         db_task.audio_total_duration = get_audio_duration(details['source_path'][0])
         # db_task.data.audio_total_duration = 720000 #get_audio_duration(details['source_path'][0])
         total_audio_frames = extractor.get_total_frames()
@@ -1075,6 +1078,7 @@ def get_audio_duration(file_path):
 
         num_frames_per_segment_duration = num_frames_per_millisecond*segment_duration
         db_task.segment_size = int(round(num_frames_per_segment_duration))
+        db_task.overlap = int(round(num_frames_per_millisecond * overlap_duration)) # we want to hardcode overlap for audio
 
         # num_segments = max(1, int(math.ceil(db_task.audio_total_duration / segment_duration)))
 
@@ -1206,9 +1210,23 @@ def get_audio_duration(file_path):
                             frame=frame, width=w, height=h)
                         for (path, frame), (w, h) in zip(chunk_paths, img_sizes)
                     ])
+
     if db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE:
+        def generate_chunks_with_overlap(extractor, chunk_size, overlap):
+            chunk = []
+            chunk_idx = 0
+            for frame in extractor:
+                chunk.append(frame)
+                if len(chunk) == chunk_size + overlap:  # Full chunk including overlap
+                    yield chunk_idx, chunk[:chunk_size]  # Yield the main chunk
+                    chunk_idx += 1
+                    chunk = chunk[chunk_size - overlap:]  # Retain the overlap portion for the next chunk
+            if chunk:  # Yield remaining frames as the last chunk
+                yield chunk_idx, chunk
+
         counter = itertools.count()
-        generator = itertools.groupby(extractor, lambda _: next(counter) // db_data.chunk_size)
+        # generator = itertools.groupby(extractor, lambda _: next(counter) // db_data.chunk_size)
+        generator = generate_chunks_with_overlap(extractor, chunk_size=db_data.chunk_size, overlap=db_task.overlap)
         generator = ((idx, list(chunk_data)) for idx, chunk_data in generator)
 
         def save_chunks(
@@ -1262,8 +1280,13 @@ def process_results(img_meta: list[tuple[str, int, tuple[int, int]]]):
 
         futures = queue.Queue(maxsize=settings.CVAT_CONCURRENT_CHUNK_PROCESSING)
         with concurrent.futures.ThreadPoolExecutor(max_workers=2*settings.CVAT_CONCURRENT_CHUNK_PROCESSING) as executor:
+            seen_frames = set()  # To track unique frames
             for chunk_idx, chunk_data in generator:
-                db_data.size += len(chunk_data)
+                unique_frames = [frame for frame in chunk_data if frame not in seen_frames]
+                seen_frames.update(unique_frames)
+                db_data.size += len(unique_frames)
+
+                # db_data.size += len(chunk_data)
                 if futures.full():
                     process_results(futures.get().result())
                 futures.put(executor.submit(save_chunks, executor, chunk_idx, chunk_data))
diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py
@@ -701,12 +701,12 @@ def __call__(self, request, start: int, stop: int, db_data: Optional[Data]):
 
         try:
             if self.type == 'chunk':
-                start_chunk = frame_provider.get_chunk_number(start)
-                stop_chunk = frame_provider.get_chunk_number(stop)
-                # pylint: disable=superfluous-parens
-                if not (start_chunk <= self.number <= stop_chunk):
-                    raise ValidationError('The chunk number should be in  the ' +
-                        f'[{start_chunk}, {stop_chunk}] range')
+                # start_chunk = frame_provider.get_chunk_number(start)
+                # stop_chunk = frame_provider.get_chunk_number(stop)
+                # # pylint: disable=superfluous-parens
+                # if not (start_chunk <= self.number <= stop_chunk):
+                #     raise ValidationError('The chunk number should be in  the ' +
+                #         f'[{start_chunk}, {stop_chunk}] range')
 
                 # TODO: av.FFmpegError processing
                 if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE:
diff --git a/cvat/apps/notifications/permissions.py b/cvat/apps/notifications/permissions.py
@@ -10,7 +10,6 @@ class Scopes(StrEnum):
     @classmethod
     def create(cls, request, view, obj, iam_context):
         permissions = []
-
         for scope in cls.get_scopes(request, view, obj):
             perm = cls.create_base_perm(request, view, scope, iam_context, obj)
             permissions.append(perm)
@@ -34,4 +33,7 @@ def get_scopes(request, view, obj):
         return []
 
     def get_resource(self):
-        return None
+        return {
+            'type': 'notifications',
+            'user_id': self.user_id,
+        }
diff --git a/cvat/apps/notifications/views.py b/cvat/apps/notifications/views.py
@@ -230,9 +230,12 @@ def FetchUserNotifications(self, request: Request):
                 if paginated_notifications is None:
                     return Response(
                         {
-                            "success": False,
+                            "success": True,
                             "message": "No notifications available on this page.",
-                            "data": None,
+                            "data": {
+                                "unread" : 0,
+                                "notifications": []
+                            },
                             "error": None
                         },
                         status = status.HTTP_400_BAD_REQUEST
diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py
diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
diff --git a/docker-compose.yml b/docker-compose.yml