Skip to content

Commit 0f5d039

Browse files
authored
Merge pull request #20 from midas-research/feat/job-overlap
overlap added while creating task and gt
2 parents b81a78d + 859597a commit 0f5d039

File tree

11 files changed

+250
-110
lines changed

11 files changed

+250
-110
lines changed

cvat/apps/dataset_manager/task.py

Lines changed: 115 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1031,6 +1031,7 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir):
10311031
labels_list = list(labels_queryset.values())
10321032

10331033
labels_mapping = {}
1034+
gt_jobs = []
10341035

10351036
for label in labels_list:
10361037
labels_mapping[label["id"]] = label
@@ -1044,36 +1045,128 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir):
10441045
for attribute in attributes_list:
10451046
labels_mapping[label["id"]]["attributes"][attribute["id"]] = attribute
10461047

1047-
slogger.glob.debug("JOB LABELS ATTRIBUTES")
1048-
slogger.glob.debug(json.dumps(attributes_list))
1049-
1050-
1051-
slogger.glob.debug("JOB LABELS")
1052-
slogger.glob.debug(json.dumps(labels_list))
1053-
1054-
# audio_file_path = os.path.join(temp_dir, str(job_id) + ".wav")
1055-
# with wave.open(audio_file_path, 'wb') as wave_file:
1056-
# wave_file.setnchannels(1)
1057-
# wave_file.setsampwidth(4)
1058-
# wave_file.setframerate(44100)
1059-
# wave_file.writeframes(concat_array)
1060-
10611048
annotation_audio_chunk_file_paths = chunk_annotation_audio(concat_array, temp_dir, annotations)
10621049

1063-
for i, annotation in enumerate(annotations):
1064-
entry = {
1065-
"path": os.path.basename(annotation_audio_chunk_file_paths[i]),
1050+
# handle Gt jobs
1051+
if job_details.segment.type == "specific_frames":
1052+
frames = job_details.segment.frames
1053+
start_frame = frames[0]
1054+
segment_size = job_details.segment.task.segment_size
1055+
overlap = job_details.segment.task.overlap
1056+
1057+
def generate_jobs(included_frames: list[int], segment_size: int, overlap: int) -> list[dict]:
1058+
if not included_frames:
1059+
return []
1060+
1061+
jobs = []
1062+
start_frame = included_frames[0]
1063+
1064+
while start_frame + segment_size <= included_frames[-1]:
1065+
end_frame = start_frame + segment_size - 1
1066+
1067+
# Check if both start_frame and end_frame exist in included_frames
1068+
start_exists = start_frame in included_frames
1069+
end_exists = end_frame in included_frames
1070+
1071+
if start_exists and end_exists:
1072+
jobs.append({
1073+
"start_frame": start_frame,
1074+
"end_frame": end_frame
1075+
})
1076+
1077+
# Move start_frame back by the overlap for the next job
1078+
start_frame = end_frame - overlap + 1
1079+
1080+
# Find the next valid start frame
1081+
while start_frame not in included_frames and start_frame <= included_frames[-1]:
1082+
start_frame += 1
1083+
1084+
# Break if we can't find a valid next start frame
1085+
if start_frame not in included_frames:
1086+
break
1087+
1088+
# Handle the last section if necessary
1089+
last_start = start_frame
1090+
if last_start in included_frames and last_start < included_frames[-1]:
1091+
jobs.append({
1092+
"start_frame": last_start,
1093+
"end_frame": included_frames[-1]
1094+
})
1095+
1096+
return jobs
1097+
1098+
gt_jobs = generate_jobs(included_frames=frames, segment_size=segment_size, overlap=overlap)
1099+
# fetch all jobs of this task
1100+
task_jobs = Job.objects.filter(segment__task__id=job_details.segment.task_id).order_by('id')
1101+
start = 0
1102+
for job_index, job in enumerate(task_jobs):
1103+
for i, gt_job in enumerate(gt_jobs):
1104+
if job.segment.start_frame == gt_job['start_frame'] and job.segment.stop_frame == gt_job['end_frame']:
1105+
diff_in_frame = gt_job['end_frame'] - gt_job['start_frame'] + 1
1106+
duration = int(((job_details.segment.task.audio_total_duration/job_details.segment.task.data.size) * diff_in_frame)/1000)
1107+
gt_jobs[i]['job_index'] = job_index
1108+
gt_jobs[i]['start'] = start
1109+
gt_jobs[i]['end'] = start + duration
1110+
start = start + duration
1111+
break
1112+
1113+
def process_annotations(annotations, gt_jobs, job_details, labels_mapping):
1114+
final_data = []
1115+
1116+
for i, annotation in enumerate(annotations):
1117+
start = annotation["points"][0]
1118+
end = annotation["points"][3]
1119+
1120+
if job_details.segment.type == "specific_frames":
1121+
overlapping_jobs = []
1122+
for gt_job in gt_jobs:
1123+
if not (end <= gt_job['start'] or start >= gt_job['end']):
1124+
overlapping_jobs.append(gt_job)
1125+
1126+
if len(overlapping_jobs) > 1:
1127+
for job in overlapping_jobs:
1128+
entry = create_entry(annotation, job_details, labels_mapping, i)
1129+
entry['job_id'] = job['job_index']
1130+
entry['start'] = 0 if start <= job['start'] else start - job['start']
1131+
entry['end'] = job['end'] - job['start'] if end >= job['end'] else end - job['start']
1132+
add_attributes(entry, annotation, labels_mapping)
1133+
final_data.append(entry)
1134+
else:
1135+
entry = create_entry(annotation, job_details, labels_mapping, i)
1136+
for gt_job in gt_jobs:
1137+
if gt_job['start'] <= start and gt_job['end'] >= end:
1138+
entry['job_id'] = gt_job['job_index']
1139+
entry['start'] = start - gt_job['start']
1140+
entry['end'] = end - gt_job['start']
1141+
break
1142+
add_attributes(entry, annotation, labels_mapping)
1143+
final_data.append(entry)
1144+
else:
1145+
# Handle normal jobs
1146+
entry = create_entry(annotation, job_details, labels_mapping, i)
1147+
entry['job_id'] = job_details.id
1148+
entry['start'] = start
1149+
entry['end'] = end
1150+
add_attributes(entry, annotation, labels_mapping)
1151+
final_data.append(entry)
1152+
1153+
return final_data
1154+
1155+
def create_entry(annotation, job_details, labels_mapping, index):
1156+
return {
1157+
"project_id": job_details.segment.task.project_id,
1158+
"task_id": job_details.segment.task_id,
1159+
"path": os.path.basename(annotation_audio_chunk_file_paths[index]),
10661160
"sentence": annotation.get("transcript", ""),
10671161
"age": annotation.get("age", ""),
10681162
"gender": annotation.get("gender", ""),
10691163
"accents": annotation.get("accent", ""),
10701164
"locale": annotation.get("locale", ""),
10711165
"emotion": annotation.get("emotion", ""),
10721166
"label": labels_mapping[annotation["label_id"]]["name"],
1073-
"start": annotation["points"][0],
1074-
"end": annotation["points"][3]
10751167
}
10761168

1169+
def add_attributes(entry, annotation, labels_mapping):
10771170
attributes = annotation.get("attributes", [])
10781171
for idx, attr in enumerate(attributes):
10791172
annotation_attribute_id = attr.get("spec_id", "")
@@ -1085,12 +1178,7 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir):
10851178
entry[f"attribute_{idx+1}_name"] = attribute_name
10861179
entry[f"attribute_{idx+1}_value"] = attribute_val
10871180

1088-
final_data.append(entry)
1089-
1090-
slogger.glob.debug("JOB ANNOTATION DATA")
1091-
slogger.glob.debug(json.dumps(final_data))
1092-
slogger.glob.debug("All ANNOTATIONs DATA")
1093-
slogger.glob.debug(json.dumps(annotations))
1181+
final_data = process_annotations(annotations, gt_jobs, job_details, labels_mapping)
10941182
return final_data, annotation_audio_chunk_file_paths
10951183

10961184
def convert_annotation_data_format(data, format_name):
@@ -1247,7 +1335,8 @@ def export_audino_job(job_id, dst_file, format_name, server_url=None, save_image
12471335
df = pd.DataFrame(final_data)
12481336

12491337
# sorting by start column in ascending order
1250-
df = df.sort_values(by='start')
1338+
if 'job_id' in df.columns:
1339+
df = df.sort_values(by='job_id')
12511340

12521341
# Saving the metadata file
12531342
meta_data_file_path = os.path.join(temp_dir_base, str(job_id) + ".tsv")

cvat/apps/engine/frame_provider.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,8 @@ def get_chunk_number(self, frame_number):
156156

157157
def _validate_chunk_number(self, chunk_number):
158158
chunk_number_ = int(chunk_number)
159-
if chunk_number_ < 0 or chunk_number_ >= math.ceil(self._db_data.size / self._db_data.chunk_size):
160-
raise ValidationError('requested chunk does not exist')
159+
# if chunk_number_ < 0 or chunk_number_ >= math.ceil(self._db_data.size / self._db_data.chunk_size):
160+
# raise ValidationError('requested chunk does not exist')
161161

162162
return chunk_number_
163163

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package notifications
2+
3+
default allow = false
4+
5+
allow {
6+
# Allow authenticated users to view their notifications
7+
input.scope == "view"
8+
input.auth.user.id != null
9+
}
10+
11+
allow {
12+
# Allow marking notifications as read
13+
input.scope == "mark_as_read"
14+
input.auth.user.id != null
15+
}

cvat/apps/engine/serializers.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,7 @@ class JobReadSerializer(serializers.ModelSerializer):
612612
target_storage = StorageSerializer(required=False, allow_null=True)
613613
source_storage = StorageSerializer(required=False, allow_null=True)
614614
task_flags = TaskFlagsSerializer(source="segment.task.flags", read_only=True)
615+
overlap = serializers.ReadOnlyField(source="segment.task.overlap")
615616

616617
class Meta:
617618
model = models.Job
@@ -620,7 +621,7 @@ class Meta:
620621
'start_frame', 'stop_frame', 'data_chunk_size', 'data_compressed_chunk_type',
621622
'created_date', 'updated_date', 'issues', 'labels', 'type', 'organization',
622623
'target_storage', 'source_storage', 'ai_audio_annotation_status',
623-
'ai_audio_annotation_task_id', 'ai_audio_annotation_error_msg', 'task_flags')
624+
'ai_audio_annotation_task_id', 'ai_audio_annotation_error_msg', 'task_flags', 'overlap')
624625
read_only_fields = fields
625626

626627
def to_representation(self, instance):
@@ -701,6 +702,7 @@ def create(self, validated_data):
701702
size = task.data.size
702703
valid_frame_ids = task.data.get_valid_frame_indices()
703704
segment_size = task.segment_size
705+
overlap = task.overlap
704706

705707
frame_selection_method = validated_data.pop("frame_selection_method", None)
706708
if frame_selection_method == models.JobFrameSelectionMethod.RANDOM_UNIFORM:
@@ -712,26 +714,27 @@ def create(self, validated_data):
712714
)
713715

714716
if task.data.original_chunk_type == DataChoice.AUDIO:
715-
num_segments = size // segment_size
716-
jobs_frame_list = []
717-
for i in range(num_segments):
718-
start = i * segment_size
719-
end = (i+1) * segment_size - 1
720-
array = [j for j in range(start,end+1)]
721-
jobs_frame_list.append(array)
717+
effective_increment = segment_size - overlap
722718

723-
# if there's a remainder, create the last array
724-
if size % segment_size != 0:
725-
start = num_segments * segment_size
726-
end = size - 1
727-
array = [j for j in range(start,end+1)]
719+
# Create overlapping segments
720+
jobs_frame_list = []
721+
start = 0
722+
while start < size:
723+
end = min(start + segment_size - 1, size - 1) # last frame does not exceed the total size
724+
array = [j for j in range(start, end + 1)]
728725
jobs_frame_list.append(array)
726+
start += effective_increment # Move to the next start position considering the overlap
729727

730-
#Random select from the list
728+
# Randomly select from the list
731729
import math, random
732-
random_jobs_no = math.ceil(frame_count / segment_size)
730+
731+
job_percent = math.ceil((frame_count*100)/size)
732+
random_jobs_no = math.ceil((len(jobs_frame_list) * job_percent)/100)
733733
selected_jobs_frames = random.sample(jobs_frame_list, random_jobs_no)
734+
735+
# Flatten and sort the selected frames
734736
frames = sorted([item for sublist in selected_jobs_frames for item in sublist])
737+
735738
else:
736739
seed = validated_data.pop("seed", None)
737740

cvat/apps/engine/task.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def _segments():
179179
if segment_size == 0:
180180
raise ValueError("Segment size cannot be zero.")
181181

182-
overlap = 0
182+
overlap = db_task.overlap
183183
segment_size = segment_step
184184
# if db_task.overlap is not None:
185185
# overlap = min(db_task.overlap, segment_size // 2)
@@ -1060,9 +1060,12 @@ def get_audio_duration(file_path):
10601060

10611061
db_task.audio_total_duration = None
10621062

1063+
num_frames_per_millisecond = 0
10631064
# calculate chunk size if it isn't specified
10641065
if MEDIA_TYPE == "audio":
10651066
segment_duration = db_task.segment_duration if db_task.segment_duration is not None else 600000
1067+
overlap_duration = 5*1000
1068+
10661069
db_task.audio_total_duration = get_audio_duration(details['source_path'][0])
10671070
# db_task.data.audio_total_duration = 720000 #get_audio_duration(details['source_path'][0])
10681071
total_audio_frames = extractor.get_total_frames()
@@ -1075,6 +1078,7 @@ def get_audio_duration(file_path):
10751078

10761079
num_frames_per_segment_duration = num_frames_per_millisecond*segment_duration
10771080
db_task.segment_size = int(round(num_frames_per_segment_duration))
1081+
db_task.overlap = int(round(num_frames_per_millisecond * overlap_duration)) # we want to hardcode overlap for audio
10781082

10791083
# num_segments = max(1, int(math.ceil(db_task.audio_total_duration / segment_duration)))
10801084

@@ -1206,9 +1210,23 @@ def get_audio_duration(file_path):
12061210
frame=frame, width=w, height=h)
12071211
for (path, frame), (w, h) in zip(chunk_paths, img_sizes)
12081212
])
1213+
12091214
if db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE:
1215+
def generate_chunks_with_overlap(extractor, chunk_size, overlap):
1216+
chunk = []
1217+
chunk_idx = 0
1218+
for frame in extractor:
1219+
chunk.append(frame)
1220+
if len(chunk) == chunk_size + overlap: # Full chunk including overlap
1221+
yield chunk_idx, chunk[:chunk_size] # Yield the main chunk
1222+
chunk_idx += 1
1223+
chunk = chunk[chunk_size - overlap:] # Retain the overlap portion for the next chunk
1224+
if chunk: # Yield remaining frames as the last chunk
1225+
yield chunk_idx, chunk
1226+
12101227
counter = itertools.count()
1211-
generator = itertools.groupby(extractor, lambda _: next(counter) // db_data.chunk_size)
1228+
# generator = itertools.groupby(extractor, lambda _: next(counter) // db_data.chunk_size)
1229+
generator = generate_chunks_with_overlap(extractor, chunk_size=db_data.chunk_size, overlap=db_task.overlap)
12121230
generator = ((idx, list(chunk_data)) for idx, chunk_data in generator)
12131231

12141232
def save_chunks(
@@ -1262,8 +1280,13 @@ def process_results(img_meta: list[tuple[str, int, tuple[int, int]]]):
12621280

12631281
futures = queue.Queue(maxsize=settings.CVAT_CONCURRENT_CHUNK_PROCESSING)
12641282
with concurrent.futures.ThreadPoolExecutor(max_workers=2*settings.CVAT_CONCURRENT_CHUNK_PROCESSING) as executor:
1283+
seen_frames = set() # To track unique frames
12651284
for chunk_idx, chunk_data in generator:
1266-
db_data.size += len(chunk_data)
1285+
unique_frames = [frame for frame in chunk_data if frame not in seen_frames]
1286+
seen_frames.update(unique_frames)
1287+
db_data.size += len(unique_frames)
1288+
1289+
# db_data.size += len(chunk_data)
12671290
if futures.full():
12681291
process_results(futures.get().result())
12691292
futures.put(executor.submit(save_chunks, executor, chunk_idx, chunk_data))

cvat/apps/engine/views.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -701,12 +701,12 @@ def __call__(self, request, start: int, stop: int, db_data: Optional[Data]):
701701

702702
try:
703703
if self.type == 'chunk':
704-
start_chunk = frame_provider.get_chunk_number(start)
705-
stop_chunk = frame_provider.get_chunk_number(stop)
706-
# pylint: disable=superfluous-parens
707-
if not (start_chunk <= self.number <= stop_chunk):
708-
raise ValidationError('The chunk number should be in the ' +
709-
f'[{start_chunk}, {stop_chunk}] range')
704+
# start_chunk = frame_provider.get_chunk_number(start)
705+
# stop_chunk = frame_provider.get_chunk_number(stop)
706+
# # pylint: disable=superfluous-parens
707+
# if not (start_chunk <= self.number <= stop_chunk):
708+
# raise ValidationError('The chunk number should be in the ' +
709+
# f'[{start_chunk}, {stop_chunk}] range')
710710

711711
# TODO: av.FFmpegError processing
712712
if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE:

cvat/apps/notifications/permissions.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ class Scopes(StrEnum):
1010
@classmethod
1111
def create(cls, request, view, obj, iam_context):
1212
permissions = []
13-
1413
for scope in cls.get_scopes(request, view, obj):
1514
perm = cls.create_base_perm(request, view, scope, iam_context, obj)
1615
permissions.append(perm)
@@ -34,4 +33,7 @@ def get_scopes(request, view, obj):
3433
return []
3534

3635
def get_resource(self):
37-
return None
36+
return {
37+
'type': 'notifications',
38+
'user_id': self.user_id,
39+
}

cvat/apps/notifications/views.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,9 +230,12 @@ def FetchUserNotifications(self, request: Request):
230230
if paginated_notifications is None:
231231
return Response(
232232
{
233-
"success": False,
233+
"success": True,
234234
"message": "No notifications available on this page.",
235-
"data": None,
235+
"data": {
236+
"unread" : 0,
237+
"notifications": []
238+
},
236239
"error": None
237240
},
238241
status = status.HTTP_400_BAD_REQUEST

0 commit comments

Comments
 (0)