From 4d619704612a6b1ae4bb9494a251f3eff8ff2ab0 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Wed, 19 Mar 2025 16:31:16 +0530 Subject: [PATCH 01/22] email_report --- backend/dataset/admin.py | 2 +- backend/projects/utils.py | 8 ++++ backend/shoonya_backend/settings.py | 1 + backend/workspaces/tasks.py | 28 ++++++++++--- backend/workspaces/views.py | 62 +++++++++++++++++++++++++++++ 5 files changed, 95 insertions(+), 6 deletions(-) diff --git a/backend/dataset/admin.py b/backend/dataset/admin.py index 140171494..0ecdb9a6d 100644 --- a/backend/dataset/admin.py +++ b/backend/dataset/admin.py @@ -1,4 +1,4 @@ -import resource +# import resource from django.contrib import admin from import_export.admin import ImportExportActionModelAdmin from .resources import * diff --git a/backend/projects/utils.py b/backend/projects/utils.py index c2b877007..ba7e1bd01 100644 --- a/backend/projects/utils.py +++ b/backend/projects/utils.py @@ -199,6 +199,14 @@ def get_audio_segments_count(annotation_result): return count +def get_bounding_box_count(annotation_label_result): + count = 0 + for result in annotation_label_result: + if result["type"] == "rectangle": + count += 1 + + return count + def audio_word_count(annotation_result): word_count = 0 diff --git a/backend/shoonya_backend/settings.py b/backend/shoonya_backend/settings.py index 2915ce894..d37a1f5ea 100644 --- a/backend/shoonya_backend/settings.py +++ b/backend/shoonya_backend/settings.py @@ -38,6 +38,7 @@ ALLOWED_HOSTS = ["127.0.0.1", "localhost", "0.0.0.0", "*"] else: ALLOWED_HOSTS = [ + "127.0.0.1", "shoonya.ai4bharat.org", "0.0.0.0", "backend.shoonya.ai4bharat.org", diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index 4720d6a6d..3e856e0d5 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -29,6 +29,7 @@ get_audio_transcription_duration, calculate_word_error_rate_between_two_audio_transcription_annotation, get_audio_segments_count, + get_bounding_box_count, ocr_word_count, ) from tasks.views import SentenceOperationViewSet @@ -765,7 +766,7 @@ def send_user_reports_mail_ws( ) email.send() - +# send_user_analysis_reports_mail_ws @shared_task(queue="reports") def send_project_analysis_reports_mail_ws( pk, @@ -1085,14 +1086,14 @@ def send_project_analysis_reports_mail_ws( "Average Word Error Rate A/R": round(avg_word_error_rate_ar, 2), "Average Word Error Rate R/S": round(avg_word_error_rate_rs, 2), "Project Progress": round(project_progress, 3), - } - + } + if project_type in get_audio_project_types(): del result["Annotated Tasks Word Count"] del result["Reviewed Tasks Word Count"] del result["Exported Tasks Word Count"] del result["SuperChecked Tasks Word Count"] - + elif is_translation_project or project_type in [ "SemanticTextualSimilarity_Scale5", "OCRTranscriptionEditing", @@ -1117,7 +1118,13 @@ def send_project_analysis_reports_mail_ws( del result["Total Raw Audio Duration"] del result["Average Word Error Rate A/R"] del result["Average Word Error Rate R/S"] - + + # for OCRTranscriptionEditing 2188 line + if project_type in "OCRTranscriptionEditing": + total_label_count = sum( + [get_bounding_box_count(each_anno.result) for each_anno in labeled_tasks] + ) + result["Total Label Count"] = total_label_count final_result.append(result) df = pd.DataFrame.from_dict(final_result) @@ -2176,6 +2183,13 @@ def send_user_analysis_reports_mail_ws( total_raw_duration = "0:00:00" avg_segment_duration = 0 avg_segments_per_task = 0 + + # for OCRTranscriptionEditing project type to 1097 line + if project_type == "OCRTranscriptionEditing": + total_label_count = 0 + for each_anno in labeled_annotations: + total_label_count += get_bounding_box_count(each_anno.result) + if project_type in get_audio_project_types(): total_duration_list = [] total_raw_duration_list = [] @@ -2257,6 +2271,10 @@ def send_user_analysis_reports_mail_ws( "Avg Segment Duration": round(avg_segment_duration, 2), "Average Segments Per Task": round(avg_segments_per_task, 2), } + # Add Total Label Count only for OCRTranscriptionEditing projects + if project_type == "OCRTranscriptionEditing": + result["Total Label Count"] = total_label_count + else: result = { "Annotator": name, diff --git a/backend/workspaces/views.py b/backend/workspaces/views.py index 2318b1866..4bd840d67 100644 --- a/backend/workspaces/views.py +++ b/backend/workspaces/views.py @@ -32,6 +32,7 @@ get_audio_transcription_duration, audio_word_count, get_audio_segments_count, + get_bounding_box_count, calculate_word_error_rate_between_two_audio_transcription_annotation, get_translation_dataset_project_types, convert_hours_to_seconds, @@ -612,6 +613,62 @@ def project_analytics(self, request, pk=None): total_word_error_rate_rs_list = [] total_word_error_rate_ar_list = [] total_raw_duration_list = [] + + # for OcrTranscriptionEditing project type 669 line + if project_type in "OCRTranscriptionEditing": + total_bounding_boxes = 0 + + for each_task in labeled_tasks: + try: + annotate_annotation = Annotation.objects.filter( + task=each_task, annotation_type=ANNOTATOR_ANNOTATION + )[0] + total_bounding_boxes += get_bounding_box_count(annotate_annotation.result) + except Exception: + pass + + for each_task in reviewed_tasks: + try: + review_annotation = Annotation.objects.filter( + task=each_task, annotation_type=REVIEWER_ANNOTATION + )[0] + total_bounding_boxes += get_bounding_box_count(review_annotation.result) + except Exception: + pass + + for each_task in exported_tasks: + try: + total_bounding_boxes += get_bounding_box_count(each_task.correct_annotation.result) + except Exception: + pass + + for each_task in superchecked_tasks: + try: + supercheck_annotation = Annotation.objects.filter( + task=each_task, annotation_type=SUPER_CHECKER_ANNOTATION + )[0] + total_bounding_boxes += get_bounding_box_count(supercheck_annotation.result) + except Exception: + pass + + result["Total Bounding Boxes"] = total_bounding_boxes + + # Remove unrelated fields for OCRTranscriptionEditing + fields_to_remove = [ + "Annotated Tasks Audio Duration", + "Reviewed Tasks Audio Duration", + "Exported Tasks Audio Duration", + "SuperChecked Tasks Audio Duration", + "Total Raw Audio Duration", + "Average Word Error Rate A/R", + "Average Word Error Rate R/S", + ] + + for field in fields_to_remove: + result.pop(field, None) # Using .pop() to avoid KeyErrors if a field doesn't exist + # End Here OcrTranscriptionEditing project type: 617 line + + if project_type in get_audio_project_types(): for each_task in labeled_tasks: try: @@ -1218,6 +1275,11 @@ def user_analytics(self, request, pk=None): pass total_word_count = sum(total_word_count_list) + + + + + elif "OCRTranscription" in project_type: total_word_count = 0 for each_anno in labeled_annotations: From e55ca77ab1e9c1fdb290a05153f04b70ff2e06de Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Thu, 20 Mar 2025 13:54:02 +0530 Subject: [PATCH 02/22] correct_payment_email --- backend/shoonya_backend/settings.py | 1 - backend/workspaces/tasks.py | 23 +++++++----- backend/workspaces/views.py | 58 +---------------------------- 3 files changed, 15 insertions(+), 67 deletions(-) diff --git a/backend/shoonya_backend/settings.py b/backend/shoonya_backend/settings.py index d37a1f5ea..2915ce894 100644 --- a/backend/shoonya_backend/settings.py +++ b/backend/shoonya_backend/settings.py @@ -38,7 +38,6 @@ ALLOWED_HOSTS = ["127.0.0.1", "localhost", "0.0.0.0", "*"] else: ALLOWED_HOSTS = [ - "127.0.0.1", "shoonya.ai4bharat.org", "0.0.0.0", "backend.shoonya.ai4bharat.org", diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index 3e856e0d5..868fdde60 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -646,6 +646,8 @@ def send_user_reports_mail_ws( ws_anno_list = [] ws_reviewer_list = [] ws_superchecker_list = [] + total_bounding_boxes = 0 # Initialize bounding box count + for project in proj_objs: anno_list = project.annotators.all() reviewer_list = project.annotation_reviewers.all() @@ -668,6 +670,11 @@ def send_user_reports_mail_ws( ws_anno_list.extend(anno_ids) ws_reviewer_list.extend(reviewer_ids) ws_superchecker_list.extend(superchecker_ids) + + # If the project type is "OCRTranscriptionEditing", count bounding boxes + if project_type == "OCRTranscriptionEditing": + annotation_label_result = project.annotations.all().values("type") + total_bounding_boxes += get_bounding_box_count(annotation_label_result) ws_anno_list = list(set(ws_anno_list)) ws_reviewer_list = list(set(ws_reviewer_list)) @@ -756,7 +763,11 @@ def send_user_reports_mail_ws( else "" ) ) - + # Include bounding box count if project is OCRTranscriptionEditing + if project_type == "OCRTranscriptionEditing": + message += f"\nTotal Bounding Boxes: {total_bounding_boxes}" + + email = EmailMessage( f"{workspace.workspace_name}" + " Payment Reports", message, @@ -1118,14 +1129,6 @@ def send_project_analysis_reports_mail_ws( del result["Total Raw Audio Duration"] del result["Average Word Error Rate A/R"] del result["Average Word Error Rate R/S"] - - # for OCRTranscriptionEditing 2188 line - if project_type in "OCRTranscriptionEditing": - total_label_count = sum( - [get_bounding_box_count(each_anno.result) for each_anno in labeled_tasks] - ) - result["Total Label Count"] = total_label_count - final_result.append(result) df = pd.DataFrame.from_dict(final_result) @@ -2184,7 +2187,7 @@ def send_user_analysis_reports_mail_ws( avg_segment_duration = 0 avg_segments_per_task = 0 - # for OCRTranscriptionEditing project type to 1097 line + # for OCRTranscriptionEditing project type to 1121 line if project_type == "OCRTranscriptionEditing": total_label_count = 0 for each_anno in labeled_annotations: diff --git a/backend/workspaces/views.py b/backend/workspaces/views.py index 4bd840d67..8f0ffb268 100644 --- a/backend/workspaces/views.py +++ b/backend/workspaces/views.py @@ -613,61 +613,7 @@ def project_analytics(self, request, pk=None): total_word_error_rate_rs_list = [] total_word_error_rate_ar_list = [] total_raw_duration_list = [] - - # for OcrTranscriptionEditing project type 669 line - if project_type in "OCRTranscriptionEditing": - total_bounding_boxes = 0 - - for each_task in labeled_tasks: - try: - annotate_annotation = Annotation.objects.filter( - task=each_task, annotation_type=ANNOTATOR_ANNOTATION - )[0] - total_bounding_boxes += get_bounding_box_count(annotate_annotation.result) - except Exception: - pass - - for each_task in reviewed_tasks: - try: - review_annotation = Annotation.objects.filter( - task=each_task, annotation_type=REVIEWER_ANNOTATION - )[0] - total_bounding_boxes += get_bounding_box_count(review_annotation.result) - except Exception: - pass - - for each_task in exported_tasks: - try: - total_bounding_boxes += get_bounding_box_count(each_task.correct_annotation.result) - except Exception: - pass - - for each_task in superchecked_tasks: - try: - supercheck_annotation = Annotation.objects.filter( - task=each_task, annotation_type=SUPER_CHECKER_ANNOTATION - )[0] - total_bounding_boxes += get_bounding_box_count(supercheck_annotation.result) - except Exception: - pass - - result["Total Bounding Boxes"] = total_bounding_boxes - - # Remove unrelated fields for OCRTranscriptionEditing - fields_to_remove = [ - "Annotated Tasks Audio Duration", - "Reviewed Tasks Audio Duration", - "Exported Tasks Audio Duration", - "SuperChecked Tasks Audio Duration", - "Total Raw Audio Duration", - "Average Word Error Rate A/R", - "Average Word Error Rate R/S", - ] - - for field in fields_to_remove: - result.pop(field, None) # Using .pop() to avoid KeyErrors if a field doesn't exist - # End Here OcrTranscriptionEditing project type: 617 line - + if project_type in get_audio_project_types(): for each_task in labeled_tasks: @@ -1277,7 +1223,7 @@ def user_analytics(self, request, pk=None): total_word_count = sum(total_word_count_list) - + elif "OCRTranscription" in project_type: From 4cfe472e93ef9da2741ce679f2da55e78ff705d2 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Fri, 21 Mar 2025 10:08:19 +0530 Subject: [PATCH 03/22] user_ana_done --- backend/workspaces/views.py | 53 ++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/backend/workspaces/views.py b/backend/workspaces/views.py index 8f0ffb268..a16fcd113 100644 --- a/backend/workspaces/views.py +++ b/backend/workspaces/views.py @@ -63,6 +63,7 @@ get_supercheck_reports, ) from utils.filter_tasks_by_ann_type import filter_tasks_by_ann_type +logger = logging.getLogger(__name__) # Create your views here. @@ -1223,7 +1224,57 @@ def user_analytics(self, request, pk=None): total_word_count = sum(total_word_count_list) - + # for OcrTranscriptionEditing project type 1277 line + elif "OCRTranscriptionEditing" in project_type: + total_bounding_boxes_annotated = 0 + total_bounding_boxes_reviewed = 0 + total_bounding_boxes_superchecked = 0 + + # Fetch tasks related to the project + tasks = Task.objects.filter(project_id__in=proj_ids) + + # For Annotator + if reports_type == "annotator": + for each_task in tasks: + try: + annotate_annotation = Annotation.objects.filter( + task=each_task, annotation_type=ANNOTATOR_ANNOTATION + ).first() # Using .first() for safety + if annotate_annotation: + total_bounding_boxes_annotated += get_bounding_box_count(annotate_annotation.result) + except Exception as e: + logger.error(f"Error in Annotator Bounding Box Counting: {e}") + + # For Reviewer + if reports_type == "review": + for each_task in tasks: + try: + review_annotation = Annotation.objects.filter( + task=each_task, annotation_type=REVIEWER_ANNOTATION + ).first() + if review_annotation: + total_bounding_boxes_reviewed += get_bounding_box_count(review_annotation.result) + except Exception as e: + logger.error(f"Error in Reviewer Bounding Box Counting: {e}") + + # For Super Checker + if reports_type == "supercheck": + for each_task in tasks: + try: + supercheck_annotation = Annotation.objects.filter( + task=each_task, annotation_type=SUPER_CHECKER_ANNOTATION + ).first() + if supercheck_annotation: + total_bounding_boxes_superchecked += get_bounding_box_count(supercheck_annotation.result) + except Exception as e: + logger.error(f"Error in Superchecker Bounding Box Counting: {e}") + + # Add results to the final report + result["Bounding Boxes (Annotated)"] = total_bounding_boxes_annotated + result["Bounding Boxes (Reviewed)"] = total_bounding_boxes_reviewed + result["Bounding Boxes (SuperChecked)"] = total_bounding_boxes_superchecked + # end here OcrTranscriptionEditing project type: 1227 line + elif "OCRTranscription" in project_type: From d1de2c34eec76e1527cf7cf8e75fd95b6fa1d962 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Fri, 21 Mar 2025 12:56:18 +0530 Subject: [PATCH 04/22] logging_import --- backend/workspaces/views.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/workspaces/views.py b/backend/workspaces/views.py index a16fcd113..8d15e698d 100644 --- a/backend/workspaces/views.py +++ b/backend/workspaces/views.py @@ -63,6 +63,7 @@ get_supercheck_reports, ) from utils.filter_tasks_by_ann_type import filter_tasks_by_ann_type +import logging logger = logging.getLogger(__name__) # Create your views here. From ef9a0661280eaae130674e534996ef04078e98a3 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Fri, 21 Mar 2025 13:28:25 +0530 Subject: [PATCH 05/22] logging_import --- backend/shoonya_backend/settings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/shoonya_backend/settings.py b/backend/shoonya_backend/settings.py index 2915ce894..d37a1f5ea 100644 --- a/backend/shoonya_backend/settings.py +++ b/backend/shoonya_backend/settings.py @@ -38,6 +38,7 @@ ALLOWED_HOSTS = ["127.0.0.1", "localhost", "0.0.0.0", "*"] else: ALLOWED_HOSTS = [ + "127.0.0.1", "shoonya.ai4bharat.org", "0.0.0.0", "backend.shoonya.ai4bharat.org", From a4c0808ebd4cc0d824cca864c1657d95d4684b78 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Fri, 21 Mar 2025 13:53:23 +0530 Subject: [PATCH 06/22] label_count --- backend/workspaces/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/workspaces/views.py b/backend/workspaces/views.py index 8d15e698d..68b71b20f 100644 --- a/backend/workspaces/views.py +++ b/backend/workspaces/views.py @@ -63,7 +63,7 @@ get_supercheck_reports, ) from utils.filter_tasks_by_ann_type import filter_tasks_by_ann_type -import logging +import logging_ logger = logging.getLogger(__name__) # Create your views here. From 6a6382d9dd69eb0e0329a7110aabb178302a52d4 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Fri, 21 Mar 2025 14:03:43 +0530 Subject: [PATCH 07/22] label_count1 --- backend/workspaces/views.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/backend/workspaces/views.py b/backend/workspaces/views.py index 68b71b20f..7818be8ed 100644 --- a/backend/workspaces/views.py +++ b/backend/workspaces/views.py @@ -63,8 +63,10 @@ get_supercheck_reports, ) from utils.filter_tasks_by_ann_type import filter_tasks_by_ann_type -import logging_ -logger = logging.getLogger(__name__) + + +# import logging +# logger = logging.getLogger(__name__) # Create your views here. @@ -1244,7 +1246,8 @@ def user_analytics(self, request, pk=None): if annotate_annotation: total_bounding_boxes_annotated += get_bounding_box_count(annotate_annotation.result) except Exception as e: - logger.error(f"Error in Annotator Bounding Box Counting: {e}") + # .error(f"Error in Annotator Bounding Box Counting: {e}") + pass # For Reviewer if reports_type == "review": @@ -1256,7 +1259,8 @@ def user_analytics(self, request, pk=None): if review_annotation: total_bounding_boxes_reviewed += get_bounding_box_count(review_annotation.result) except Exception as e: - logger.error(f"Error in Reviewer Bounding Box Counting: {e}") + # logger.error(f"Error in Reviewer Bounding Box Counting: {e}") + pass # For Super Checker if reports_type == "supercheck": @@ -1268,7 +1272,8 @@ def user_analytics(self, request, pk=None): if supercheck_annotation: total_bounding_boxes_superchecked += get_bounding_box_count(supercheck_annotation.result) except Exception as e: - logger.error(f"Error in Superchecker Bounding Box Counting: {e}") + # logger.error(f"Error in Superchecker Bounding Box Counting: {e}") + pass # Add results to the final report result["Bounding Boxes (Annotated)"] = total_bounding_boxes_annotated From 6ba832468400fc1d605a96ba42109121aa9d84e1 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Mon, 24 Mar 2025 12:44:31 +0530 Subject: [PATCH 08/22] updated --- backend/workspaces/tasks.py | 19 ++++++----- backend/workspaces/views.py | 65 +++++-------------------------------- 2 files changed, 19 insertions(+), 65 deletions(-) diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index 868fdde60..26ee8efbb 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -646,7 +646,7 @@ def send_user_reports_mail_ws( ws_anno_list = [] ws_reviewer_list = [] ws_superchecker_list = [] - total_bounding_boxes = 0 # Initialize bounding box count + # total_bounding_boxes = 0 # Initialize bounding box count for project in proj_objs: anno_list = project.annotators.all() @@ -671,10 +671,10 @@ def send_user_reports_mail_ws( ws_reviewer_list.extend(reviewer_ids) ws_superchecker_list.extend(superchecker_ids) - # If the project type is "OCRTranscriptionEditing", count bounding boxes - if project_type == "OCRTranscriptionEditing": - annotation_label_result = project.annotations.all().values("type") - total_bounding_boxes += get_bounding_box_count(annotation_label_result) + # # If the project type is "OCRTranscriptionEditing", count bounding boxes + # if project_type == "OCRTranscriptionEditing": + # annotation_label_result = project.annotations.all().values("type") + # total_bounding_boxes += get_bounding_box_count(annotation_label_result) ws_anno_list = list(set(ws_anno_list)) ws_reviewer_list = list(set(ws_reviewer_list)) @@ -763,9 +763,9 @@ def send_user_reports_mail_ws( else "" ) ) - # Include bounding box count if project is OCRTranscriptionEditing - if project_type == "OCRTranscriptionEditing": - message += f"\nTotal Bounding Boxes: {total_bounding_boxes}" + # # Include bounding box count if project is OCRTranscriptionEditing + # if project_type == "OCRTranscriptionEditing": + # message += f"\nTotal Bounding Boxes: {total_bounding_boxes}" email = EmailMessage( @@ -776,8 +776,9 @@ def send_user_reports_mail_ws( attachments=[(filename, content, content_type)], ) email.send() - # send_user_analysis_reports_mail_ws + + @shared_task(queue="reports") def send_project_analysis_reports_mail_ws( pk, diff --git a/backend/workspaces/views.py b/backend/workspaces/views.py index 7818be8ed..c9efceb7b 100644 --- a/backend/workspaces/views.py +++ b/backend/workspaces/views.py @@ -1179,6 +1179,7 @@ def user_analytics(self, request, pk=None): labeled, avg_lead_time, total_word_count, + total_bounding_boxes, total_duration, total_raw_duration, avg_segment_duration, @@ -1224,69 +1225,17 @@ def user_analytics(self, request, pk=None): except: pass - total_word_count = sum(total_word_count_list) - - - # for OcrTranscriptionEditing project type 1277 line - elif "OCRTranscriptionEditing" in project_type: - total_bounding_boxes_annotated = 0 - total_bounding_boxes_reviewed = 0 - total_bounding_boxes_superchecked = 0 - - # Fetch tasks related to the project - tasks = Task.objects.filter(project_id__in=proj_ids) - - # For Annotator - if reports_type == "annotator": - for each_task in tasks: - try: - annotate_annotation = Annotation.objects.filter( - task=each_task, annotation_type=ANNOTATOR_ANNOTATION - ).first() # Using .first() for safety - if annotate_annotation: - total_bounding_boxes_annotated += get_bounding_box_count(annotate_annotation.result) - except Exception as e: - # .error(f"Error in Annotator Bounding Box Counting: {e}") - pass - - # For Reviewer - if reports_type == "review": - for each_task in tasks: - try: - review_annotation = Annotation.objects.filter( - task=each_task, annotation_type=REVIEWER_ANNOTATION - ).first() - if review_annotation: - total_bounding_boxes_reviewed += get_bounding_box_count(review_annotation.result) - except Exception as e: - # logger.error(f"Error in Reviewer Bounding Box Counting: {e}") - pass - - # For Super Checker - if reports_type == "supercheck": - for each_task in tasks: - try: - supercheck_annotation = Annotation.objects.filter( - task=each_task, annotation_type=SUPER_CHECKER_ANNOTATION - ).first() - if supercheck_annotation: - total_bounding_boxes_superchecked += get_bounding_box_count(supercheck_annotation.result) - except Exception as e: - # logger.error(f"Error in Superchecker Bounding Box Counting: {e}") - pass + total_word_count = sum(total_word_count_list) - # Add results to the final report - result["Bounding Boxes (Annotated)"] = total_bounding_boxes_annotated - result["Bounding Boxes (Reviewed)"] = total_bounding_boxes_reviewed - result["Bounding Boxes (SuperChecked)"] = total_bounding_boxes_superchecked - # end here OcrTranscriptionEditing project type: 1227 line elif "OCRTranscription" in project_type: total_word_count = 0 + total_bounding_boxes=0 for each_anno in labeled_annotations: total_word_count += ocr_word_count(each_anno.result) + total_bounding_boxes+= get_bounding_box_count(each_anno.result) total_duration = "0:00:00" total_raw_duration = "0:00:00" @@ -1374,6 +1323,7 @@ def user_analytics(self, request, pk=None): "Average Annotation Time (In Seconds)": round(avg_lead_time, 2), "Avg Segment Duration": round(avg_segment_duration, 2), "Average Segments Per Task": round(avg_segments_per_task, 2), + "Total Bounding Boxes": total_bounding_boxes, } else: result = { @@ -1392,20 +1342,23 @@ def user_analytics(self, request, pk=None): "Average Annotation Time (In Seconds)": round(avg_lead_time, 2), "Avg Segment Duration": round(avg_segment_duration, 2), "Average Segments Per Task": round(avg_segments_per_task, 2), + "Total Bounding Boxes": total_bounding_boxes, } if project_type in get_audio_project_types(): del result["Word Count"] + del result["Total Bounding Boxes"] elif is_translation_project or project_type in [ "SemanticTextualSimilarity_Scale5", - "OCRTranscriptionEditing", "OCRTranscription", + "OCRTranscriptionEditing" ]: del result["Total Segments Duration"] del result["Total Raw Audio Duration"] del result["Avg Segment Duration"] del result["Average Segments Per Task"] else: + del result["Total Bounding Boxes"] del result["Word Count"] del result["Total Segments Duration"] del result["Total Raw Audio Duration"] From 3bd7a853b9d262fb44325425a401bcc363a9e2c4 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Mon, 24 Mar 2025 13:02:20 +0530 Subject: [PATCH 09/22] updated1 --- backend/organizations/views.py | 1 + backend/workspaces/tasks.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/backend/organizations/views.py b/backend/organizations/views.py index 09781b624..09bc012d7 100644 --- a/backend/organizations/views.py +++ b/backend/organizations/views.py @@ -812,6 +812,7 @@ def user_analytics(self, request, pk=None): no_of_projects, no_of_workspaces_objs, total_word_count, + total_duration, total_raw_duration, avg_segment_duration, diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index 26ee8efbb..440777294 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -1872,8 +1872,10 @@ def un_pack_annotation_tasks( total_word_count = sum(total_word_count_list) elif "OCRTranscription" in project_type: total_word_count = 0 + total_bounding_boxes=0 for each_anno in labeled_annotations: total_word_count += ocr_word_count(each_anno.result) + total_bounding_boxes+= get_bounding_box_count(each_anno.result) total_duration = "0:00:00" total_raw_duration = 0.0 @@ -1914,6 +1916,7 @@ def un_pack_annotation_tasks( labeled, avg_lead_time, total_word_count, + total_bounding_boxes, total_duration, total_raw_duration, avg_segment_duration, From c23fb82a32d802e81b5c9dd39af3ca6002083a34 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Mon, 24 Mar 2025 16:29:40 +0530 Subject: [PATCH 10/22] organizatio_user_analytics_done --- backend/organizations/tasks.py | 13 +++++++++++++ backend/organizations/views.py | 5 ++++- backend/workspaces/tasks.py | 2 ++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py index b9141e82b..235063b4c 100644 --- a/backend/organizations/tasks.py +++ b/backend/organizations/tasks.py @@ -28,6 +28,7 @@ get_audio_transcription_duration, get_audio_segments_count, ocr_word_count, + get_bounding_box_count, calculate_word_error_rate_between_two_audio_transcription_annotation, ) from workspaces.tasks import ( @@ -834,6 +835,7 @@ def get_counts( labeled, avg_lead_time, total_word_count, + total_bounding_boxes, total_duration, total_raw_duration, avg_segment_duration, @@ -875,10 +877,16 @@ def get_counts( pass total_word_count = sum(total_word_count_list) + elif "OCRTranscription" in project_type: total_word_count = 0 + total_bounding_boxes=0 for each_anno in labeled_annotations: total_word_count += ocr_word_count(each_anno.result) + total_bounding_boxes+= get_bounding_box_count(each_anno.result) + + + total_duration = "0:00:00" avg_segment_duration = 0 @@ -948,6 +956,7 @@ def get_counts( project_count, no_of_workspaces_objs, total_word_count, + total_bounding_boxes, total_duration, total_raw_duration, avg_segment_duration, @@ -1556,6 +1565,7 @@ def send_user_analytics_mail_org( no_of_projects, no_of_workspaces_objs, total_word_count, + total_bounding_boxes, total_duration, total_raw_duration, avg_segment_duration, @@ -1591,6 +1601,7 @@ def send_user_analytics_mail_org( "Skipped": total_skipped_tasks_count, "Draft": total_draft_tasks_count, "Word Count": total_word_count, + "Total Bounding Boxes": total_bounding_boxes, "Total Segments Duration": total_duration, "Total Raw Audio Duration": total_raw_duration, "Average Annotation Time (In Seconds)": round(avg_lead_time, 2), @@ -1627,6 +1638,7 @@ def send_user_analytics_mail_org( "Skipped": total_skipped_tasks_count, "Draft": total_draft_tasks_count, "Word Count": total_word_count, + "Total Bounding Boxes": total_bounding_boxes, "Total Segments Duration": total_duration, "Average Annotation Time (In Seconds)": round(avg_lead_time, 2), "Participation Type": participation_type, @@ -1636,6 +1648,7 @@ def send_user_analytics_mail_org( } if project_type in get_audio_project_types(): + del temp_result["Total Bounding Boxes"] del temp_result["Word Count"] elif is_translation_project or project_type in [ "SemanticTextualSimilarity_Scale5", diff --git a/backend/organizations/views.py b/backend/organizations/views.py index 09bc012d7..38d09cdce 100644 --- a/backend/organizations/views.py +++ b/backend/organizations/views.py @@ -812,7 +812,7 @@ def user_analytics(self, request, pk=None): no_of_projects, no_of_workspaces_objs, total_word_count, - + total_bounding_boxes, total_duration, total_raw_duration, avg_segment_duration, @@ -848,6 +848,7 @@ def user_analytics(self, request, pk=None): "Skipped": total_skipped_tasks_count, "Draft": total_draft_tasks_count, "Word Count": total_word_count, + "Total Bounding Boxes": total_bounding_boxes, "Total Segments Duration": total_duration, "Total Raw Audio Duration": total_raw_duration, "Average Annotation Time (In Seconds)": round(avg_lead_time, 2), @@ -884,6 +885,7 @@ def user_analytics(self, request, pk=None): "Skipped": total_skipped_tasks_count, "Draft": total_draft_tasks_count, "Word Count": total_word_count, + "Total Bounding Boxes": total_bounding_boxes, "Total Segments Duration": total_duration, "Average Annotation Time (In Seconds)": round(avg_lead_time, 2), "Participation Type": participation_type, @@ -894,6 +896,7 @@ def user_analytics(self, request, pk=None): if project_type in get_audio_project_types(): del temp_result["Word Count"] + del temp_result["Total Bounding Boxes"] elif is_translation_project or project_type in [ "SemanticTextualSimilarity_Scale5", "OCRTranscriptionEditing", diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index 440777294..fdbe1ec4f 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -1876,6 +1876,8 @@ def un_pack_annotation_tasks( for each_anno in labeled_annotations: total_word_count += ocr_word_count(each_anno.result) total_bounding_boxes+= get_bounding_box_count(each_anno.result) + + total_duration = "0:00:00" total_raw_duration = 0.0 From 84bce0925dfae5e9b81fdb06fc05b17ed1e318ca Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Mon, 24 Mar 2025 16:38:11 +0530 Subject: [PATCH 11/22] organizatio_user_analytics_done1 --- backend/dataset/admin.py | 2 +- backend/shoonya_backend/settings.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/dataset/admin.py b/backend/dataset/admin.py index 0ecdb9a6d..140171494 100644 --- a/backend/dataset/admin.py +++ b/backend/dataset/admin.py @@ -1,4 +1,4 @@ -# import resource +import resource from django.contrib import admin from import_export.admin import ImportExportActionModelAdmin from .resources import * diff --git a/backend/shoonya_backend/settings.py b/backend/shoonya_backend/settings.py index d37a1f5ea..2915ce894 100644 --- a/backend/shoonya_backend/settings.py +++ b/backend/shoonya_backend/settings.py @@ -38,7 +38,6 @@ ALLOWED_HOSTS = ["127.0.0.1", "localhost", "0.0.0.0", "*"] else: ALLOWED_HOSTS = [ - "127.0.0.1", "shoonya.ai4bharat.org", "0.0.0.0", "backend.shoonya.ai4bharat.org", From a05120155c623dfa1793969e0df8dc37f69e36f6 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Mon, 24 Mar 2025 16:41:28 +0530 Subject: [PATCH 12/22] organizatio_user_analytics_done2 --- backend/workspaces/tasks.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index fdbe1ec4f..e9f2a458c 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -2183,6 +2183,13 @@ def send_user_analysis_reports_mail_ws( pass total_word_count = sum(total_word_count_list) + # for OCRTranscriptionEditing project type to 1121 line + if project_type == "OCRTranscriptionEditing": + total_label_count = 0 + for each_anno in labeled_annotations: + total_label_count += get_bounding_box_count(each_anno.result) + + elif "OCRTranscription" in project_type: total_word_count = 0 for each_anno in labeled_annotations: @@ -2193,11 +2200,7 @@ def send_user_analysis_reports_mail_ws( avg_segment_duration = 0 avg_segments_per_task = 0 - # for OCRTranscriptionEditing project type to 1121 line - if project_type == "OCRTranscriptionEditing": - total_label_count = 0 - for each_anno in labeled_annotations: - total_label_count += get_bounding_box_count(each_anno.result) + if project_type in get_audio_project_types(): total_duration_list = [] From d42b02a8e080a57c520e1caa96163e6d11d241a0 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Mon, 24 Mar 2025 17:00:35 +0530 Subject: [PATCH 13/22] organizatio_user_analytics_done3 --- backend/workspaces/tasks.py | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index e9f2a458c..0e1e05436 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -671,11 +671,6 @@ def send_user_reports_mail_ws( ws_reviewer_list.extend(reviewer_ids) ws_superchecker_list.extend(superchecker_ids) - # # If the project type is "OCRTranscriptionEditing", count bounding boxes - # if project_type == "OCRTranscriptionEditing": - # annotation_label_result = project.annotations.all().values("type") - # total_bounding_boxes += get_bounding_box_count(annotation_label_result) - ws_anno_list = list(set(ws_anno_list)) ws_reviewer_list = list(set(ws_reviewer_list)) ws_superchecker_list = list(set(ws_superchecker_list)) @@ -762,11 +757,7 @@ def send_user_reports_mail_ws( if start_date else "" ) - ) - # # Include bounding box count if project is OCRTranscriptionEditing - # if project_type == "OCRTranscriptionEditing": - # message += f"\nTotal Bounding Boxes: {total_bounding_boxes}" - + ) email = EmailMessage( f"{workspace.workspace_name}" + " Payment Reports", @@ -2182,18 +2173,14 @@ def send_user_analysis_reports_mail_ws( except: pass - total_word_count = sum(total_word_count_list) - # for OCRTranscriptionEditing project type to 1121 line - if project_type == "OCRTranscriptionEditing": - total_label_count = 0 - for each_anno in labeled_annotations: - total_label_count += get_bounding_box_count(each_anno.result) - + total_word_count = sum(total_word_count_list) elif "OCRTranscription" in project_type: total_word_count = 0 + total_bounding_boxes = 0 for each_anno in labeled_annotations: total_word_count += ocr_word_count(each_anno.result) + total_bounding_boxes += get_bounding_box_count(each_anno.result) total_duration = "0:00:00" total_raw_duration = "0:00:00" @@ -2282,11 +2269,7 @@ def send_user_analysis_reports_mail_ws( "Average Annotation Time (In Seconds)": round(avg_lead_time, 2), "Avg Segment Duration": round(avg_segment_duration, 2), "Average Segments Per Task": round(avg_segments_per_task, 2), - } - # Add Total Label Count only for OCRTranscriptionEditing projects - if project_type == "OCRTranscriptionEditing": - result["Total Label Count"] = total_label_count - + } else: result = { "Annotator": name, @@ -2305,6 +2288,10 @@ def send_user_analysis_reports_mail_ws( "Avg Segment Duration": round(avg_segment_duration, 2), "Average Segments Per Task": round(avg_segments_per_task, 2), } + if "OCRTranscription" in project_type: + result["Total Bounding Boxes"] = total_bounding_boxes + + if project_type in get_audio_project_types(): del result["Word Count"] From 29d05f2109ecbefedf756a7f157103be60265271 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Tue, 25 Mar 2025 13:24:05 +0530 Subject: [PATCH 14/22] updated --- backend/dataset/admin.py | 2 +- backend/organizations/tasks.py | 9 ++++--- backend/organizations/views.py | 4 --- backend/shoonya_backend/settings.py | 1 + backend/workspaces/tasks.py | 38 ++++++++++++++++++++++++++--- 5 files changed, 43 insertions(+), 11 deletions(-) diff --git a/backend/dataset/admin.py b/backend/dataset/admin.py index 140171494..0ecdb9a6d 100644 --- a/backend/dataset/admin.py +++ b/backend/dataset/admin.py @@ -1,4 +1,4 @@ -import resource +# import resource from django.contrib import admin from import_export.admin import ImportExportActionModelAdmin from .resources import * diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py index 235063b4c..c79a7bd93 100644 --- a/backend/organizations/tasks.py +++ b/backend/organizations/tasks.py @@ -784,6 +784,9 @@ def get_counts( accepted_wt_minor_changes = 0 accepted_wt_major_changes = 0 labeled = 0 + # total_bounding_boxes = 0 + + if tgt_language == None: if project_progress_stage == None: projects_objs = Project.objects.filter( @@ -880,10 +883,10 @@ def get_counts( elif "OCRTranscription" in project_type: total_word_count = 0 - total_bounding_boxes=0 + total_bounding_boxes = 0 for each_anno in labeled_annotations: total_word_count += ocr_word_count(each_anno.result) - total_bounding_boxes+= get_bounding_box_count(each_anno.result) + total_bounding_boxes += get_bounding_box_count(each_anno.result) @@ -956,7 +959,7 @@ def get_counts( project_count, no_of_workspaces_objs, total_word_count, - total_bounding_boxes, + # total_bounding_boxes, total_duration, total_raw_duration, avg_segment_duration, diff --git a/backend/organizations/views.py b/backend/organizations/views.py index 38d09cdce..09781b624 100644 --- a/backend/organizations/views.py +++ b/backend/organizations/views.py @@ -812,7 +812,6 @@ def user_analytics(self, request, pk=None): no_of_projects, no_of_workspaces_objs, total_word_count, - total_bounding_boxes, total_duration, total_raw_duration, avg_segment_duration, @@ -848,7 +847,6 @@ def user_analytics(self, request, pk=None): "Skipped": total_skipped_tasks_count, "Draft": total_draft_tasks_count, "Word Count": total_word_count, - "Total Bounding Boxes": total_bounding_boxes, "Total Segments Duration": total_duration, "Total Raw Audio Duration": total_raw_duration, "Average Annotation Time (In Seconds)": round(avg_lead_time, 2), @@ -885,7 +883,6 @@ def user_analytics(self, request, pk=None): "Skipped": total_skipped_tasks_count, "Draft": total_draft_tasks_count, "Word Count": total_word_count, - "Total Bounding Boxes": total_bounding_boxes, "Total Segments Duration": total_duration, "Average Annotation Time (In Seconds)": round(avg_lead_time, 2), "Participation Type": participation_type, @@ -896,7 +893,6 @@ def user_analytics(self, request, pk=None): if project_type in get_audio_project_types(): del temp_result["Word Count"] - del temp_result["Total Bounding Boxes"] elif is_translation_project or project_type in [ "SemanticTextualSimilarity_Scale5", "OCRTranscriptionEditing", diff --git a/backend/shoonya_backend/settings.py b/backend/shoonya_backend/settings.py index 2915ce894..d37a1f5ea 100644 --- a/backend/shoonya_backend/settings.py +++ b/backend/shoonya_backend/settings.py @@ -38,6 +38,7 @@ ALLOWED_HOSTS = ["127.0.0.1", "localhost", "0.0.0.0", "*"] else: ALLOWED_HOSTS = [ + "127.0.0.1", "shoonya.ai4bharat.org", "0.0.0.0", "backend.shoonya.ai4bharat.org", diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index 0e1e05436..c6f0026f8 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -646,7 +646,6 @@ def send_user_reports_mail_ws( ws_anno_list = [] ws_reviewer_list = [] ws_superchecker_list = [] - # total_bounding_boxes = 0 # Initialize bounding box count for project in proj_objs: anno_list = project.annotators.all() @@ -1261,8 +1260,12 @@ def get_supercheck_reports(proj_ids, userid, start_date, end_date, project_type= is_translation_project = True if "translation" in project_type_lower else False validated_word_count_list = [] + validated_bounding_boxes_list = [] validated_with_changes_word_count_list = [] + validated_with_changes_bounding_boxes_list=[] rejected_word_count_list = [] + rejected_bounding_boxes_list=[] + validated_audio_duration_list = [] validated_with_changes_audio_duration_list = [] rejected_audio_duration_list = [] @@ -1286,15 +1289,24 @@ def get_supercheck_reports(proj_ids, userid, start_date, end_date, project_type= rejected_word_count_list.append(anno.task.data["word_count"]) except: pass + + elif "OCRTranscription" in project_type: for anno in validated_objs: validated_word_count_list.append(ocr_word_count(anno.result)) + validated_bounding_boxes_list+= get_bounding_box_count(anno.result) for anno in validated_with_changes_objs: validated_with_changes_word_count_list.append( ocr_word_count(anno.result) ) + validated_with_changes_bounding_boxes_list+= get_bounding_box_count(anno.result) for anno in rejected_objs: rejected_word_count_list.append(ocr_word_count(anno.result)) + rejected_bounding_boxes_list+= get_bounding_box_count(anno.result) + + + + elif project_type in get_audio_project_types(): for anno in validated_objs: try: @@ -1342,6 +1354,13 @@ def get_supercheck_reports(proj_ids, userid, start_date, end_date, project_type= validated_word_count = sum(validated_word_count_list) validated_with_changes_word_count = sum(validated_with_changes_word_count_list) rejected_word_count = sum(rejected_word_count_list) + + validated_bounding_boxes_count = sum(validated_bounding_boxes_list) + validated_with_changes_bounding_boxes_count = sum(validated_with_changes_bounding_boxes_list) + rejected_bounding_boxes_count = sum(rejected_bounding_boxes_list) + + + validated_audio_duration = convert_seconds_to_hours( sum(validated_audio_duration_list) ) @@ -1581,14 +1600,20 @@ def get_review_reports( total_audio_duration_list = [] total_raw_audio_duration_list = [] total_word_count_list = [] + total_bounding_boxes_list = 0 total_word_error_rate_ar_list = [] total_word_error_rate_rs_list = [] if is_translation_project or project_type == "SemanticTextualSimilarity_Scale5": for anno in total_rev_annos_accepted: total_word_count_list.append(anno.task.data["word_count"]) + + elif "OCRTranscription" in project_type: for anno in total_rev_annos_accepted: total_word_count_list.append(ocr_word_count(anno.result)) + total_bounding_boxes_list+= get_bounding_box_count(anno.result) + + elif project_type in get_audio_project_types(): for anno in total_rev_annos_accepted: try: @@ -1616,6 +1641,8 @@ def get_review_reports( pass total_word_count = sum(total_word_count_list) + total_bounding_boxes=total_bounding_boxes_list + total_audio_duration = convert_seconds_to_hours(sum(total_audio_duration_list)) total_raw_audio_duration = convert_seconds_to_hours( sum(total_raw_audio_duration_list) @@ -1700,6 +1727,7 @@ def get_review_reports( "Rejected": accepted_rejected_tasks.count(), "Average Rejection Loop Value": round(avg_rejection_loop_value, 2), "Tasks Rejected Maximum Time": tasks_rejected_max_times, + "Total Bounding Boxes": total_bounding_boxes, } if project_type != None: @@ -1709,6 +1737,7 @@ def get_review_reports( "OCRTranscription", ]: result["Total Word Count"] = total_word_count + result["Total Bounding Boxes"]= total_bounding_boxes elif project_type in get_audio_project_types(): result["Total Segments Duration"] = total_audio_duration result["Total Raw Audio Duration"] = total_raw_audio_duration @@ -1733,6 +1762,7 @@ def get_review_reports( "Draft": draft_tasks_count, "Average Rejection Loop Value": round(avg_rejection_loop_value, 2), "Tasks Rejected Maximum Time": tasks_rejected_max_times, + "Total Bounding Boxes": total_bounding_boxes, } if project_type != None: @@ -1742,6 +1772,7 @@ def get_review_reports( "OCRTranscription", ]: result["Total Word Count"] = total_word_count + # result["Total Bounding Boxes"]= total_bounding_boxes, elif project_type in get_audio_project_types(): result["Total Segments Duration"] = total_audio_duration result["Total Raw Audio Duration"] = total_raw_audio_duration @@ -1861,12 +1892,13 @@ def un_pack_annotation_tasks( pass total_word_count = sum(total_word_count_list) + elif "OCRTranscription" in project_type: total_word_count = 0 total_bounding_boxes=0 for each_anno in labeled_annotations: total_word_count += ocr_word_count(each_anno.result) - total_bounding_boxes+= get_bounding_box_count(each_anno.result) + total_bounding_boxes += get_bounding_box_count(each_anno.result) @@ -2128,7 +2160,7 @@ def send_user_analysis_reports_mail_ws( labeled, avg_lead_time, total_word_count, - total_duration, + total_bounding_boxes, total_raw_duration, avg_segment_duration, avg_segments_per_task, From 6522ddf55a5817989e97cede71b02157752018aa Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Wed, 26 Mar 2025 10:54:18 +0530 Subject: [PATCH 15/22] updated2 --- backend/organizations/tasks.py | 8 ++------ backend/workspaces/tasks.py | 23 +++++++++++++++-------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py index c79a7bd93..f1dcd4c12 100644 --- a/backend/organizations/tasks.py +++ b/backend/organizations/tasks.py @@ -883,10 +883,10 @@ def get_counts( elif "OCRTranscription" in project_type: total_word_count = 0 - total_bounding_boxes = 0 + # total_bounding_boxes = 0 for each_anno in labeled_annotations: total_word_count += ocr_word_count(each_anno.result) - total_bounding_boxes += get_bounding_box_count(each_anno.result) + # total_bounding_boxes += get_bounding_box_count(each_anno.result) @@ -1568,7 +1568,6 @@ def send_user_analytics_mail_org( no_of_projects, no_of_workspaces_objs, total_word_count, - total_bounding_boxes, total_duration, total_raw_duration, avg_segment_duration, @@ -1604,7 +1603,6 @@ def send_user_analytics_mail_org( "Skipped": total_skipped_tasks_count, "Draft": total_draft_tasks_count, "Word Count": total_word_count, - "Total Bounding Boxes": total_bounding_boxes, "Total Segments Duration": total_duration, "Total Raw Audio Duration": total_raw_duration, "Average Annotation Time (In Seconds)": round(avg_lead_time, 2), @@ -1641,7 +1639,6 @@ def send_user_analytics_mail_org( "Skipped": total_skipped_tasks_count, "Draft": total_draft_tasks_count, "Word Count": total_word_count, - "Total Bounding Boxes": total_bounding_boxes, "Total Segments Duration": total_duration, "Average Annotation Time (In Seconds)": round(avg_lead_time, 2), "Participation Type": participation_type, @@ -1651,7 +1648,6 @@ def send_user_analytics_mail_org( } if project_type in get_audio_project_types(): - del temp_result["Total Bounding Boxes"] del temp_result["Word Count"] elif is_translation_project or project_type in [ "SemanticTextualSimilarity_Scale5", diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index c6f0026f8..4a788537c 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -1260,10 +1260,12 @@ def get_supercheck_reports(proj_ids, userid, start_date, end_date, project_type= is_translation_project = True if "translation" in project_type_lower else False validated_word_count_list = [] - validated_bounding_boxes_list = [] validated_with_changes_word_count_list = [] - validated_with_changes_bounding_boxes_list=[] rejected_word_count_list = [] + + # Bounding_boxes + validated_bounding_boxes_list = [] + validated_with_changes_bounding_boxes_list=[] rejected_bounding_boxes_list=[] validated_audio_duration_list = [] @@ -1294,15 +1296,16 @@ def get_supercheck_reports(proj_ids, userid, start_date, end_date, project_type= elif "OCRTranscription" in project_type: for anno in validated_objs: validated_word_count_list.append(ocr_word_count(anno.result)) - validated_bounding_boxes_list+= get_bounding_box_count(anno.result) + validated_bounding_boxes_list += get_bounding_box_count(anno.result) + for anno in validated_with_changes_objs: validated_with_changes_word_count_list.append( ocr_word_count(anno.result) ) - validated_with_changes_bounding_boxes_list+= get_bounding_box_count(anno.result) + validated_with_changes_bounding_boxes_list += get_bounding_box_count(anno.result) for anno in rejected_objs: rejected_word_count_list.append(ocr_word_count(anno.result)) - rejected_bounding_boxes_list+= get_bounding_box_count(anno.result) + rejected_bounding_boxes_list += get_bounding_box_count(anno.result) @@ -1404,10 +1407,14 @@ def get_supercheck_reports(proj_ids, userid, start_date, end_date, project_type= "OCRTranscription", ]: result["Validated Word Count"] = validated_word_count - result[ - "Validated With Changes Word Count" - ] = validated_with_changes_word_count + result["Validated With Changes Word Count"] = validated_with_changes_word_count result["Rejected Word Count"] = rejected_word_count + + result["Validated bounding boxes Count"] = validated_bounding_boxes_count + result["Validated With Changes bounding boxes Count"] = validated_with_changes_bounding_boxes_count + result["Rejected bounding boxes Count"] = rejected_bounding_boxes_count + + elif project_type in get_audio_project_types(): result["Validated Audio Duration"] = validated_audio_duration result[ From 5668d0f47b93ac03e6d5c340134b322a3e7926b6 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Wed, 26 Mar 2025 12:27:34 +0530 Subject: [PATCH 16/22] payment_report --- backend/workspaces/tasks.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index 4a788537c..4ce093119 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -159,6 +159,8 @@ def get_all_annotation_reports( total_audio_duration_list = [] total_raw_audio_duration_list = [] total_word_count_list = [] + total_bounding_boxes_count_list = [] + only_tasks = False if is_translation_project: for anno in submitted_tasks: @@ -169,6 +171,7 @@ def get_all_annotation_reports( elif "OCRTranscription" in project_type: for anno in submitted_tasks: total_word_count_list.append(ocr_word_count(anno.result)) + total_bounding_boxes_count_list.append(get_bounding_box_count(anno.result)) elif ( project_type in get_audio_project_types() or project_type == "AllAudioProjects" ): @@ -184,6 +187,8 @@ def get_all_annotation_reports( only_tasks = True total_word_count = sum(total_word_count_list) + total_bounding_boxes_count = sum(total_bounding_boxes_count_list) + total_audio_duration = convert_seconds_to_hours(sum(total_audio_duration_list)) total_raw_audio_duration = convert_seconds_to_hours( sum(total_raw_audio_duration_list) @@ -202,6 +207,7 @@ def get_all_annotation_reports( "Total Segments Duration": total_audio_duration, "Total Raw Audio Duration": total_raw_audio_duration, "Word Count": total_word_count, + "Total Bounding Boxes Count": total_bounding_boxes_count, "Submitted Tasks": submitted_tasks_count, "Language": user_lang, "Average Word Error Rate Annotator Vs Reviewer": ar_wer_score @@ -230,6 +236,7 @@ def get_all_annotation_reports( if project_type in get_audio_project_types() or project_type == "AllAudioProjects": del result["Word Count"] + del result["Total Bounding Boxes Count"] elif only_tasks: del result["Total Segments Duration"] del result["Total Raw Audio Duration"] @@ -366,6 +373,8 @@ def get_all_review_reports( total_audio_duration_list = [] total_raw_audio_duration_list = [] total_word_count_list = [] + total_bounding_boxes_count_list = [] + only_tasks = False if is_translation_project: for anno in submitted_tasks: @@ -376,6 +385,7 @@ def get_all_review_reports( elif "OCRTranscription" in project_type: for anno in submitted_tasks: total_word_count_list.append(ocr_word_count(anno.result)) + total_bounding_boxes_count_list.append(get_bounding_box_count(anno.result)) elif ( project_type in get_audio_project_types() or project_type == "AllAudioProjects" ): @@ -389,7 +399,10 @@ def get_all_review_reports( pass else: only_tasks = True + total_word_count = sum(total_word_count_list) + total_bounding_boxes_count = sum(total_bounding_boxes_count_list) + total_audio_duration = convert_seconds_to_hours(sum(total_audio_duration_list)) total_raw_audio_duration = convert_seconds_to_hours( sum(total_raw_audio_duration_list) @@ -413,6 +426,7 @@ def get_all_review_reports( "Total Segments Duration": total_audio_duration, "Total Raw Audio Duration": total_raw_audio_duration, "Word Count": total_word_count, + "total bounding boxes count" : total_bounding_boxes_count, "Submitted Tasks": submitted_tasks_count, "Language": user_lang, "Average Word Error Rate Reviewer Vs Superchecker": rs_wer_score @@ -438,6 +452,7 @@ def get_all_review_reports( if project_type in get_audio_project_types() or project_type == "AllAudioProjects": del result["Word Count"] + del result["total bounding boxes count"] elif only_tasks: del result["Total Segments Duration"] del result["Total Raw Audio Duration"] @@ -508,6 +523,8 @@ def get_all_supercheck_reports( else False ) validated_word_count_list = [] + validated_bounding_boxes_count_list = [] + validated_audio_duration_list = [] validated_raw_audio_duration_list = [] only_tasks = False @@ -520,6 +537,7 @@ def get_all_supercheck_reports( elif "OCRTranscription" in project_type: for anno in submitted_tasks: validated_word_count_list.append(ocr_word_count(anno.result)) + validated_bounding_boxes_count_list.append(get_bounding_box_count(anno.result)) elif ( project_type in get_audio_project_types() or project_type == "AllAudioProjects" ): @@ -537,6 +555,8 @@ def get_all_supercheck_reports( only_tasks = True validated_word_count = sum(validated_word_count_list) + validated_bounding_boxes_count = sum(validated_bounding_boxes_count_list) + validated_audio_duration = convert_seconds_to_hours( sum(validated_audio_duration_list) ) @@ -557,6 +577,7 @@ def get_all_supercheck_reports( "Total Segments Duration": validated_audio_duration, "Total Raw Audio Duration": validated_raw_audio_duration, "Word Count": validated_word_count, + "validated bounding boxes count" : validated_bounding_boxes_count, "Submitted Tasks": submitted_tasks_count, "Language": user_lang, "Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs @@ -567,6 +588,8 @@ def get_all_supercheck_reports( if project_type in get_audio_project_types() or project_type == "AllAudioProjects": del result["Word Count"] + del result["validated bounding boxes count"] + elif only_tasks: del result["Total Segments Duration"] del result["Total Raw Audio Duration"] From 6359e862e4fa45970b5d0fd1c475a2139647cf87 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Wed, 26 Mar 2025 17:40:55 +0530 Subject: [PATCH 17/22] payment_report1 --- backend/dataset/admin.py | 2 +- backend/shoonya_backend/settings.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/dataset/admin.py b/backend/dataset/admin.py index 0ecdb9a6d..140171494 100644 --- a/backend/dataset/admin.py +++ b/backend/dataset/admin.py @@ -1,4 +1,4 @@ -# import resource +import resource from django.contrib import admin from import_export.admin import ImportExportActionModelAdmin from .resources import * diff --git a/backend/shoonya_backend/settings.py b/backend/shoonya_backend/settings.py index d37a1f5ea..2915ce894 100644 --- a/backend/shoonya_backend/settings.py +++ b/backend/shoonya_backend/settings.py @@ -38,7 +38,6 @@ ALLOWED_HOSTS = ["127.0.0.1", "localhost", "0.0.0.0", "*"] else: ALLOWED_HOSTS = [ - "127.0.0.1", "shoonya.ai4bharat.org", "0.0.0.0", "backend.shoonya.ai4bharat.org", From 2b72fb52a5fd0b756f23bea0f44c4a28b63de328 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Thu, 27 Mar 2025 11:40:20 +0530 Subject: [PATCH 18/22] payment_report2 --- .gitignore | 4 ++++ backend/organizations/tasks.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 25b9ed05b..d972cd233 100644 --- a/.gitignore +++ b/.gitignore @@ -59,3 +59,7 @@ $RECYCLE.BIN/ # Logs **/logs/* + +# resources from admin.py in dataset +**/import resources/* +**/"127.0.0.1"/* diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py index f1dcd4c12..c5610f8b4 100644 --- a/backend/organizations/tasks.py +++ b/backend/organizations/tasks.py @@ -28,7 +28,7 @@ get_audio_transcription_duration, get_audio_segments_count, ocr_word_count, - get_bounding_box_count, + # get_bounding_box_count, calculate_word_error_rate_between_two_audio_transcription_annotation, ) from workspaces.tasks import ( @@ -838,7 +838,7 @@ def get_counts( labeled, avg_lead_time, total_word_count, - total_bounding_boxes, + # total_bounding_boxes, total_duration, total_raw_duration, avg_segment_duration, From a56065bec2365637ad596841a5618eb58e81e57d Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Thu, 27 Mar 2025 16:35:42 +0530 Subject: [PATCH 19/22] payment_report_org --- .gitignore | 4 ++-- backend/dataset/admin.py | 2 +- backend/organizations/tasks.py | 25 ++++++++++++++++++++++++- backend/shoonya_backend/settings.py | 1 + 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index d972cd233..809e3b6d9 100644 --- a/.gitignore +++ b/.gitignore @@ -61,5 +61,5 @@ $RECYCLE.BIN/ **/logs/* # resources from admin.py in dataset -**/import resources/* -**/"127.0.0.1"/* +/backend/shoonya_backend/settings.py/ +/backend/dataset/admin.py diff --git a/backend/dataset/admin.py b/backend/dataset/admin.py index 140171494..0ecdb9a6d 100644 --- a/backend/dataset/admin.py +++ b/backend/dataset/admin.py @@ -1,4 +1,4 @@ -import resource +# import resource from django.contrib import admin from import_export.admin import ImportExportActionModelAdmin from .resources import * diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py index c5610f8b4..b001c975b 100644 --- a/backend/organizations/tasks.py +++ b/backend/organizations/tasks.py @@ -28,7 +28,7 @@ get_audio_transcription_duration, get_audio_segments_count, ocr_word_count, - # get_bounding_box_count, + get_bounding_box_count, calculate_word_error_rate_between_two_audio_transcription_annotation, ) from workspaces.tasks import ( @@ -161,16 +161,20 @@ def get_all_annotation_reports( total_audio_duration_list = [] total_raw_audio_duration_list = [] total_word_count_list = [] + total_bounding_boxes_count_list = [] + only_tasks = False if is_translation_project: for anno in submitted_tasks: try: total_word_count_list.append(anno.task.data["word_count"]) + total_bounding_boxes_count_list.append(get_bounding_box_count(anno.result)) except: pass elif "OCRTranscription" in project_type: for anno in submitted_tasks: total_word_count_list.append(ocr_word_count(anno.result)) + total_bounding_boxes_count_list.append(get_bounding_box_count(anno.result)) elif ( project_type in get_audio_project_types() or project_type == "AllAudioProjects" ): @@ -186,6 +190,8 @@ def get_all_annotation_reports( only_tasks = True total_word_count = sum(total_word_count_list) + total_bounding_boxes_count = sum(total_bounding_boxes_count_list) + total_audio_duration = convert_seconds_to_hours(sum(total_audio_duration_list)) total_raw_audio_duration = convert_seconds_to_hours( sum(total_raw_audio_duration_list) @@ -203,6 +209,7 @@ def get_all_annotation_reports( "Total Segments Duration": total_audio_duration, "Total Raw Audio Duration": total_raw_audio_duration, "Word Count": total_word_count, + "Total Bounding Boxes Count": total_bounding_boxes_count, "Submitted Tasks": submitted_tasks_count, "Language": user_lang, "Average Word Error Rate Annotator Vs Reviewer": ar_wer_score @@ -231,6 +238,7 @@ def get_all_annotation_reports( if project_type in get_audio_project_types() or project_type == "AllAudioProjects": del result["Word Count"] + del result["Total Bounding Boxes Count"] elif only_tasks: del result["Total Segments Duration"] del result["Total Raw Audio Duration"] @@ -367,6 +375,8 @@ def get_all_review_reports( total_audio_duration_list = [] total_raw_audio_duration_list = [] total_word_count_list = [] + total_bounding_boxes_count_list = [] + only_tasks = False if is_translation_project: for anno in submitted_tasks: @@ -377,6 +387,8 @@ def get_all_review_reports( elif "OCRTranscription" in project_type: for anno in submitted_tasks: total_word_count_list.append(ocr_word_count(anno.result)) + total_bounding_boxes_count_list.append(get_bounding_box_count(anno.result)) + elif ( project_type in get_audio_project_types() or project_type == "AllAudioProjects" ): @@ -392,6 +404,8 @@ def get_all_review_reports( only_tasks = True total_word_count = sum(total_word_count_list) + total_bounding_boxes_count = sum(total_bounding_boxes_count_list) + total_audio_duration = convert_seconds_to_hours(sum(total_audio_duration_list)) total_raw_audio_duration = convert_seconds_to_hours( sum(total_raw_audio_duration_list) @@ -415,6 +429,7 @@ def get_all_review_reports( "Total Segments Duration": total_audio_duration, "Total Raw Audio Duration": total_raw_audio_duration, "Word Count": total_word_count, + "total bounding boxes count" : total_bounding_boxes_count, "Submitted Tasks": submitted_tasks_count, "Language": user_lang, "Average Word Error Rate Reviewer Vs Superchecker": rs_wer_score @@ -440,6 +455,7 @@ def get_all_review_reports( if project_type in get_audio_project_types() or project_type == "AllAudioProjects": del result["Word Count"] + del result["total bounding boxes count"] elif only_tasks: del result["Total Segments Duration"] del result["Total Raw Audio Duration"] @@ -509,6 +525,8 @@ def get_all_supercheck_reports( else False ) validated_word_count_list = [] + validated_bounding_boxes_count_list = [] + validated_audio_duration_list = [] validated_raw_audio_duration_list = [] only_tasks = False @@ -516,6 +534,7 @@ def get_all_supercheck_reports( for anno in submitted_tasks: try: validated_word_count_list.append(anno.task.data["word_count"]) + validated_bounding_boxes_count_list.append(get_bounding_box_count(anno.result)) except: pass elif "OCRTranscription" in project_type: @@ -538,6 +557,8 @@ def get_all_supercheck_reports( only_tasks = True validated_word_count = sum(validated_word_count_list) + validated_bounding_boxes_count = sum(validated_bounding_boxes_count_list) + validated_audio_duration = convert_seconds_to_hours( sum(validated_audio_duration_list) ) @@ -558,6 +579,7 @@ def get_all_supercheck_reports( "Total Segments Duration": validated_audio_duration, "Total Raw Audio Duration": validated_raw_audio_duration, "Word Count": validated_word_count, + "validated bounding boxes count" : validated_bounding_boxes_count, "Submitted Tasks": submitted_tasks_count, "Language": user_lang, "Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs @@ -568,6 +590,7 @@ def get_all_supercheck_reports( if project_type in get_audio_project_types() or project_type == "AllAudioProjects": del result["Word Count"] + del result["validated bounding boxes count"] elif only_tasks: del result["Total Segments Duration"] del result["Total Raw Audio Duration"] diff --git a/backend/shoonya_backend/settings.py b/backend/shoonya_backend/settings.py index 2915ce894..d37a1f5ea 100644 --- a/backend/shoonya_backend/settings.py +++ b/backend/shoonya_backend/settings.py @@ -38,6 +38,7 @@ ALLOWED_HOSTS = ["127.0.0.1", "localhost", "0.0.0.0", "*"] else: ALLOWED_HOSTS = [ + "127.0.0.1", "shoonya.ai4bharat.org", "0.0.0.0", "backend.shoonya.ai4bharat.org", From 1571732233f681317cb7ea8c232b643ccf8759e2 Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Thu, 27 Mar 2025 16:37:09 +0530 Subject: [PATCH 20/22] payment_report_org --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 809e3b6d9..ec2535a86 100644 --- a/.gitignore +++ b/.gitignore @@ -61,5 +61,5 @@ $RECYCLE.BIN/ **/logs/* # resources from admin.py in dataset -/backend/shoonya_backend/settings.py/ -/backend/dataset/admin.py +backend/dataset/admin.py +backend/shoonya_backend/settings.py From 9250d3d95e539509aac050e7abe0e6dac929643f Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Thu, 27 Mar 2025 16:50:30 +0530 Subject: [PATCH 21/22] payment_report_org --- backend/dataset/admin.py | 2 +- backend/shoonya_backend/settings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/dataset/admin.py b/backend/dataset/admin.py index 0ecdb9a6d..140171494 100644 --- a/backend/dataset/admin.py +++ b/backend/dataset/admin.py @@ -1,4 +1,4 @@ -# import resource +import resource from django.contrib import admin from import_export.admin import ImportExportActionModelAdmin from .resources import * diff --git a/backend/shoonya_backend/settings.py b/backend/shoonya_backend/settings.py index d37a1f5ea..43bc8a1cb 100644 --- a/backend/shoonya_backend/settings.py +++ b/backend/shoonya_backend/settings.py @@ -38,7 +38,7 @@ ALLOWED_HOSTS = ["127.0.0.1", "localhost", "0.0.0.0", "*"] else: ALLOWED_HOSTS = [ - "127.0.0.1", + "shoonya.ai4bharat.org", "0.0.0.0", "backend.shoonya.ai4bharat.org", From 14418d5b50793381d0fbb2ad71213851d485669d Mon Sep 17 00:00:00 2001 From: munishmangla98 Date: Thu, 27 Mar 2025 16:51:39 +0530 Subject: [PATCH 22/22] payment_report_org --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index ec2535a86..cf20eb5b2 100644 --- a/.gitignore +++ b/.gitignore @@ -61,5 +61,5 @@ $RECYCLE.BIN/ **/logs/* # resources from admin.py in dataset -backend/dataset/admin.py -backend/shoonya_backend/settings.py +# backend/dataset/admin.py +# backend/shoonya_backend/settings.py