From ea482d5bfd6359dc49d406067d17794451272fa2 Mon Sep 17 00:00:00 2001 From: Ishan Gujarathi Date: Fri, 19 Apr 2024 11:27:32 +0530 Subject: [PATCH 01/44] added a feature to remove user both as worskpace member and workspace manager when user is marked as inactive at organization level --- backend/users/views.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/backend/users/views.py b/backend/users/views.py index b02aeb403..ccd44f389 100644 --- a/backend/users/views.py +++ b/backend/users/views.py @@ -59,6 +59,7 @@ from rest_framework_simplejwt.tokens import RefreshToken from dotenv import load_dotenv import logging +from workspaces.views import WorkspaceusersViewSet logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) @@ -675,6 +676,39 @@ def user_details_update(self, request, pk=None): user = User.objects.get(id=pk) serializer = UserUpdateSerializer(user, request.data, partial=True) + existing_is_active = user.is_active + is_active_payload = request.data.get("is_active", None) + + if existing_is_active == is_active_payload: + pass + else: + if is_active_payload is False: + workspaces = Workspace.objects.filter( + Q(members=user) | Q(managers=user) + ).distinct() + + workspacecustomviewset_obj = WorkspaceCustomViewSet() + request.data["ids"] = [user.id] + + workspaceusersviewset_obj = WorkspaceusersViewSet() + request.data["user_id"] = user.id + + for workspace in workspaces: + workspacecustomviewset_obj.unassign_manager( + request=request, pk=workspace.pk + ) + + workspaceusersviewset_obj.remove_members( + request=request, pk=workspace.pk + ) + + return Response( + { + "message": "User removed from all workspaces both as workspace member and workspace manager" + }, + status=status.HTTP_200_OK, + ) + if request.data["role"] != user.role: new_role = int(request.data["role"]) old_role = int(user.role) From 8219ef735ad63cc2565ef5d480cba084da62bcfc Mon Sep 17 00:00:00 2001 From: Ishan Gujarathi Date: Tue, 23 Apr 2024 16:33:37 +0530 Subject: [PATCH 02/44] disabling user from daily emails when user is marked inactive at organization level --- backend/users/views.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/users/views.py b/backend/users/views.py index ccd44f389..2f6506ed0 100644 --- a/backend/users/views.py +++ b/backend/users/views.py @@ -683,6 +683,9 @@ def user_details_update(self, request, pk=None): pass else: if is_active_payload is False: + if user.enable_mail: + user.enable_mail = False + user.save() workspaces = Workspace.objects.filter( Q(members=user) | Q(managers=user) ).distinct() From 7b05c4d3a8b691ed1acb315d680b1f8f38823c18 Mon Sep 17 00:00:00 2001 From: ch20b063 Date: Thu, 25 Apr 2024 09:41:01 +0530 Subject: [PATCH 03/44] stopping_task_pr_1 --- backend/tasks/urls.py | 2 ++ backend/tasks/views.py | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/backend/tasks/urls.py b/backend/tasks/urls.py index d4f229e6f..f74f31963 100644 --- a/backend/tasks/urls.py +++ b/backend/tasks/urls.py @@ -6,6 +6,7 @@ AnnotationViewSet, PredictionViewSet, get_celery_tasks, + stopping_celery_tasks, ) router = routers.DefaultRouter() @@ -15,4 +16,5 @@ urlpatterns = [ path("get_celery_tasks/", get_celery_tasks), + path("stopping_celery_tasks/", stopping_celery_tasks), ] + router.urls diff --git a/backend/tasks/views.py b/backend/tasks/views.py index 43517ac06..20774687c 100644 --- a/backend/tasks/views.py +++ b/backend/tasks/views.py @@ -11,6 +11,11 @@ from django.utils import timezone from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger import json +from celery import Celery + +# from flower.api import Flower +# flower_app = Flower() +celery_app = Celery() from django.core.exceptions import ObjectDoesNotExist from django.http import StreamingHttpResponse, FileResponse @@ -2598,3 +2603,24 @@ def get_celery_tasks(request): page_size = int(request.GET.get("page_size", 10)) data = paginate_queryset(filtered_tasks, page_number, page_size) return JsonResponse(data["results"], safe=False) + + +def stopping_celery_tasks(req): + task_id = req.GET.get("task_id") + + if task_id is None: + return JsonResponse({"message": "Task ID is required"}, status=400) + + task = celery_app.AsyncResult(task_id) + + if task is None or task.state == "PENDING": + return JsonResponse({"message": "Task not found or not running"}, status=404) + + if task.state in ["SUCCESS", "FAILURE", "REVOKED"]: + return JsonResponse( + {"message": "Task already completed or revoked"}, status=400 + ) + + task.revoke(terminate=True) + + return JsonResponse({"message": "Task stopped successfully"}, status=200) From e8c8edcd27d45ed11236be27fa01ee309a097157 Mon Sep 17 00:00:00 2001 From: Ishan Gujarathi Date: Mon, 29 Apr 2024 17:31:54 +0530 Subject: [PATCH 04/44] fixed the bug regarding status not getting changed in the Active status column --- backend/users/views.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/users/views.py b/backend/users/views.py index 2f6506ed0..ccdc3bd5c 100644 --- a/backend/users/views.py +++ b/backend/users/views.py @@ -704,7 +704,8 @@ def user_details_update(self, request, pk=None): workspaceusersviewset_obj.remove_members( request=request, pk=workspace.pk ) - + user.is_active = False + user.save() return Response( { "message": "User removed from all workspaces both as workspace member and workspace manager" From 3faec989d8366d0e7b6df63c8b62c4cb7f0cc008 Mon Sep 17 00:00:00 2001 From: Pursottam6003 Date: Thu, 2 May 2024 17:19:52 +0530 Subject: [PATCH 05/44] updated the email for user analytics report --- backend/loging/tasks.py | 28 ++++++++-- backend/organizations/tasks.py | 68 +++++++++++++++-------- backend/utils/email_template.py | 98 +++++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+), 26 deletions(-) create mode 100644 backend/utils/email_template.py diff --git a/backend/loging/tasks.py b/backend/loging/tasks.py index 440c8047a..0cc76b84e 100644 --- a/backend/loging/tasks.py +++ b/backend/loging/tasks.py @@ -1,8 +1,9 @@ from celery import shared_task from datetime import datetime from azure.storage.blob import BlobServiceClient, generate_blob_sas, BlobSasPermissions -from django.core.mail import EmailMessage +from django.core.mail import EmailMessage, EmailMultiAlternatives from django.conf import settings +from utils.email_template import send_email_template_with_attachment from utils.blob_functions import ( extract_account_key, extract_account_name, @@ -29,14 +30,33 @@ def get_azure_credentials(connection_string): def send_email_with_url(user_email, attachment_url): try: message = "Here is the link to the generated document:" - email = EmailMessage( + compiled_msg_code = send_email_template_with_attachment( "Transliteration Logs", + user_email, message, + ) + msg = EmailMultiAlternatives( + "Transliteration Logs", + compiled_msg_code, settings.DEFAULT_FROM_EMAIL, [user_email], ) - email.attach("Generated Document", attachment_url, "text/plain") - email.send() + msg.attach_alternative(compiled_msg_code, "text/html") + # also attach the generated document + msg.attach("Generated Document", attachment_url, "text/plain") + msg.send() + # compiled_msg.attach("Generated Document", attachment_url, "text/plain") + # compiled_msg.send() + + + # email = EmailMessage( + # "Transliteration Logs", + # message, + # settings.DEFAULT_FROM_EMAIL, + # [user_email], + # ) + # email.attach("Generated Document", attachment_url, "text/plain") + # email.send() except Exception as e: print(f"Failed to send email: {str(e)}") raise e diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py index a364f876d..8bb28ab45 100644 --- a/backend/organizations/tasks.py +++ b/backend/organizations/tasks.py @@ -3,8 +3,9 @@ from celery import shared_task import pandas as pd from django.conf import settings -from django.core.mail import EmailMessage +from django.core.mail import EmailMessage, EmailMultiAlternatives from tasks.views import SentenceOperationViewSet +from utils.email_template import send_email_template_with_attachment from tasks.models import ( Task, @@ -529,31 +530,54 @@ def send_user_reports_mail_org( ] participation_types_string = ", ".join(participation_types) - message = ( - "Dear " - + str(user.username) - + ",\nYour user payment reports for " - + f"{organization.title}" - + " are ready.\n Thanks for contributing on Shoonya!" - + "\nProject Type: " - + f"{project_type}" - + "\nParticipation Types: " - + f"{participation_types_string}" - + ( - "\nStart Date: " + f"{start_date}" + "\nEnd Date: " + f"{end_date}" - if start_date - else "" - ) + message = f""" +

Your user analysis reports for AI4Bharat are now ready for review. Kindly check the attachment below

+
    +
  • Project Type: {project_type}
  • +
  • Participation Types:{participation_types_string}
  • +
  • Start Date: {start_date}
  • +
  • End Date: {end_date}
  • +
+""" + compiled_code = send_email_template_with_attachment( + "User Analytics Report", + user.email, + message ) - - email = EmailMessage( - f"{organization.title}" + " Payment Reports", - message, + msg = EmailMultiAlternatives( + "User Analytics Report", + compiled_code, settings.DEFAULT_FROM_EMAIL, [user.email], - attachments=[(filename, content, content_type)], ) - email.send() + msg.attach_alternative(compiled_code, "text/html") + msg.attach(filename, content, content_type) + msg.send() + # message = ( + # "Dear " + # + str(user.username) + # + ",\nYour user payment reports for " + # + f"{organization.title}" + # + " are ready.\n Thanks for contributing on Shoonya!" + # + "\nProject Type: " + # + f"{project_type}" + # + "\nParticipation Types: " + # + f"{participation_types_string}" + # + ( + # "\nStart Date: " + f"{start_date}" + "\nEnd Date: " + f"{end_date}" + # if start_date + # else "" + # ) + # ) + + # email = EmailMessage( + # f"{organization.title}" + " Payment Reports", + # message, + # settings.DEFAULT_FROM_EMAIL, + # [user.email], + # attachments=[(filename, content, content_type)], + # ) + # email.send() def get_counts( diff --git a/backend/utils/email_template.py b/backend/utils/email_template.py new file mode 100644 index 000000000..d1cbf33b0 --- /dev/null +++ b/backend/utils/email_template.py @@ -0,0 +1,98 @@ +from users.models import User +def send_email_template_with_attachment(subject,user_email,message): + + user = User.objects.get(email=user_email) + + style_string = """ + *{ margin: 0; + padding: 0; + } + body { + font-family: "Arial", sans-serif; + background-color: #f2f8f8; + margin: 0; + padding: 0; + padding-top: 2rem; + } + .container { + background-color: #fff; + border: solid 1px #e1e1e1; + border-radius: 2px; + padding: 1.4rem; + max-width: 380px; + margin: auto; + } + .header { + width: fit-content; + margin: auto; + } + h1 { + font-size: 1.2rem; + font-weight: 300; + margin: 1rem 0; + font-family: "Segoe UI", Tahoma, Geneva, Verdana, sans-serif; + } + p { + font-size: 0.8rem; + color: #222; + margin: 0.8rem 0; + } + .primary { + color: #18621f; + } + .footer { + margin-top: 1rem; + font-size: 0.9rem; + } + .footer > * { + font-size: inherit; + } + """ + + + html_code = f""" + + + + + + User Analytics (Topic) + + + +
+
+

{subject}

+
+
+
+
+
+

+ Dear {user.first_name} {user.last_name}, +

+

+ {message} +

+ Thanks for contributing on Shoonya! +

+

+ This email was intended for {user_email} If you received it by mistake, please delete it and notify the sender immediately. +

+
+
+
+

+ Best Regards,
+ Shoonya Admin +

+
+
+ + + """ + return html_code + + \ No newline at end of file From 5f44c3fe24eb2d949fc3e1368850f1a4e1fcbd6c Mon Sep 17 00:00:00 2001 From: Pursottam6003 Date: Thu, 2 May 2024 17:21:02 +0530 Subject: [PATCH 06/44] updated the black linting --- backend/loging/tasks.py | 1 - backend/organizations/tasks.py | 6 ++---- backend/utils/email_template.py | 10 ++++------ 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/backend/loging/tasks.py b/backend/loging/tasks.py index 0cc76b84e..c02c99218 100644 --- a/backend/loging/tasks.py +++ b/backend/loging/tasks.py @@ -48,7 +48,6 @@ def send_email_with_url(user_email, attachment_url): # compiled_msg.attach("Generated Document", attachment_url, "text/plain") # compiled_msg.send() - # email = EmailMessage( # "Transliteration Logs", # message, diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py index 8bb28ab45..1db8df175 100644 --- a/backend/organizations/tasks.py +++ b/backend/organizations/tasks.py @@ -540,9 +540,7 @@ def send_user_reports_mail_org( """ compiled_code = send_email_template_with_attachment( - "User Analytics Report", - user.email, - message + "User Analytics Report", user.email, message ) msg = EmailMultiAlternatives( "User Analytics Report", @@ -554,7 +552,7 @@ def send_user_reports_mail_org( msg.attach(filename, content, content_type) msg.send() # message = ( - # "Dear " + # "Dear " # + str(user.username) # + ",\nYour user payment reports for " # + f"{organization.title}" diff --git a/backend/utils/email_template.py b/backend/utils/email_template.py index d1cbf33b0..81f989ca0 100644 --- a/backend/utils/email_template.py +++ b/backend/utils/email_template.py @@ -1,8 +1,9 @@ from users.models import User -def send_email_template_with_attachment(subject,user_email,message): - + + +def send_email_template_with_attachment(subject, user_email, message): user = User.objects.get(email=user_email) - + style_string = """ *{ margin: 0; padding: 0; @@ -49,7 +50,6 @@ def send_email_template_with_attachment(subject,user_email,message): } """ - html_code = f""" @@ -94,5 +94,3 @@ def send_email_template_with_attachment(subject,user_email,message): """ return html_code - - \ No newline at end of file From 2b6d8197b9e13e42c21952d382d0a39d771bfe8a Mon Sep 17 00:00:00 2001 From: ch20b063 Date: Fri, 3 May 2024 08:28:51 +0530 Subject: [PATCH 07/44] deleting and resuming task --- backend/tasks/urls.py | 4 ++++ backend/tasks/views.py | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/backend/tasks/urls.py b/backend/tasks/urls.py index f74f31963..ceabbf4c7 100644 --- a/backend/tasks/urls.py +++ b/backend/tasks/urls.py @@ -7,6 +7,8 @@ PredictionViewSet, get_celery_tasks, stopping_celery_tasks, + resume_celery_task, + delete_celery_task, ) router = routers.DefaultRouter() @@ -17,4 +19,6 @@ urlpatterns = [ path("get_celery_tasks/", get_celery_tasks), path("stopping_celery_tasks/", stopping_celery_tasks), + path("resume_celery_task/", resume_celery_task), + path("delete_celery_task/", delete_celery_task), ] + router.urls diff --git a/backend/tasks/views.py b/backend/tasks/views.py index 20774687c..e5430aadc 100644 --- a/backend/tasks/views.py +++ b/backend/tasks/views.py @@ -2605,6 +2605,7 @@ def get_celery_tasks(request): return JsonResponse(data["results"], safe=False) +@api_view(["GET"]) def stopping_celery_tasks(req): task_id = req.GET.get("task_id") @@ -2624,3 +2625,39 @@ def stopping_celery_tasks(req): task.revoke(terminate=True) return JsonResponse({"message": "Task stopped successfully"}, status=200) + + +@api_view(["GET"]) +def resume_celery_task(req): + task_id = req.GET.get("task_id") + + if task_id is None: + return JsonResponse({"message": "Task ID is required"}, status=400) + + task = celery_app.AsyncResult(task_id) + + if task is None or task.state not in ["REVOKED", "FAILURE"]: + return JsonResponse( + {"message": "Task not found or cannot be resumed"}, status=400 + ) + + task.revive() + + return JsonResponse({"message": "Task resumed successfully"}, status=200) + + +@api_view(["GET"]) +def delete_celery_task(req): + task_id = req.GET.get("task_id") + + if task_id is None: + return JsonResponse({"message": "Task ID is required"}, status=400) + + task = celery_app.AsyncResult(task_id) + + if task is None: + return JsonResponse({"message": "Task not found"}, status=404) + + task.forget() + + return JsonResponse({"message": "Task deleted successfully"}, status=200) From 37dab455cc811a1c8c1932d65cc67157355856b8 Mon Sep 17 00:00:00 2001 From: Pursottam6003 Date: Mon, 6 May 2024 22:45:44 +0530 Subject: [PATCH 08/44] updated the email template for the backend code --- backend/organizations/tasks.py | 101 +++++++++++++------------------ backend/users/models.py | 21 +++++-- backend/users/views.py | 41 ++++++++++--- backend/utils/email_template.py | 104 +++++++++++++++++++++++++++++--- 4 files changed, 188 insertions(+), 79 deletions(-) diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py index 1db8df175..86296c834 100644 --- a/backend/organizations/tasks.py +++ b/backend/organizations/tasks.py @@ -3,7 +3,7 @@ from celery import shared_task import pandas as pd from django.conf import settings -from django.core.mail import EmailMessage, EmailMultiAlternatives +from django.core.mail import EmailMultiAlternatives from tasks.views import SentenceOperationViewSet from utils.email_template import send_email_template_with_attachment @@ -516,7 +516,7 @@ def send_user_reports_mail_org( content = df.to_csv(index=False) content_type = "text/csv" - filename = f"{organization.title}_user_analytics.csv" + filename = f"{organization.title}_payments_analytics.csv" participation_types = [ "Full Time" @@ -531,7 +531,7 @@ def send_user_reports_mail_org( participation_types_string = ", ".join(participation_types) message = f""" -

Your user analysis reports for AI4Bharat are now ready for review. Kindly check the attachment below

+

Your {organization.title} Payments Report under AI4Bharat Organisation are now ready for review. Kindly check the attachment below

  • Project Type: {project_type}
  • Participation Types:{participation_types_string}
  • @@ -540,10 +540,10 @@ def send_user_reports_mail_org(
""" compiled_code = send_email_template_with_attachment( - "User Analytics Report", user.email, message + "Payment Reports", user.username, message ) msg = EmailMultiAlternatives( - "User Analytics Report", + f"{organization.title} Payment Reports", compiled_code, settings.DEFAULT_FROM_EMAIL, [user.email], @@ -551,31 +551,6 @@ def send_user_reports_mail_org( msg.attach_alternative(compiled_code, "text/html") msg.attach(filename, content, content_type) msg.send() - # message = ( - # "Dear " - # + str(user.username) - # + ",\nYour user payment reports for " - # + f"{organization.title}" - # + " are ready.\n Thanks for contributing on Shoonya!" - # + "\nProject Type: " - # + f"{project_type}" - # + "\nParticipation Types: " - # + f"{participation_types_string}" - # + ( - # "\nStart Date: " + f"{start_date}" + "\nEnd Date: " + f"{end_date}" - # if start_date - # else "" - # ) - # ) - - # email = EmailMessage( - # f"{organization.title}" + " Payment Reports", - # message, - # settings.DEFAULT_FROM_EMAIL, - # [user.email], - # attachments=[(filename, content, content_type)], - # ) - # email.send() def get_counts( @@ -1267,25 +1242,26 @@ def send_project_analytics_mail_org( content = df.to_csv(index=False) content_type = "text/csv" filename = f"{organization.title}_project_analytics.csv" + message = f""" +

Your {organization.title} Project Analytics Report under AI4Bharat Organisation are now ready for review. Kindly check the attachment below

- message = ( - "Dear " - + str(user.username) - + ",\nYour project analysis reports for " - + f"{organization.title}" - + " are ready.\n Thanks for contributing on Shoonya!" - + "\nProject Type: " - + f"{project_type}" +
    +
  • Project Type: {project_type}
  • +
  • Language: {selected_language}
  • +
+""" + compiled_code = send_email_template_with_attachment( + "Project Analytics", user.username, message ) - - email = EmailMessage( - f"{organization.title}" + " Project Analytics", - message, + msg = EmailMultiAlternatives( + f"{organization.title} Project Analytics", + compiled_code, settings.DEFAULT_FROM_EMAIL, [user.email], - attachments=[(filename, content, content_type)], ) - email.send() + msg.attach_alternative(compiled_code, "text/html") + msg.attach(filename, content, content_type) + msg.send() @shared_task(queue="reports") @@ -1474,21 +1450,30 @@ def send_user_analytics_mail_org( content_type = "text/csv" filename = f"{organization.title}_user_analytics.csv" - message = ( - "Dear " - + str(user.username) - + ",\nYour user analysis reports for " - + f"{organization.title}" - + " are ready.\n Thanks for contributing on Shoonya!" - + "\nProject Type: " - + f"{project_type}" + project_progress_stage_name = "All Stage" + if project_progress_stage == ANNOTATION_STAGE: + project_progress_stage_name = "Annotation" + elif project_progress_stage == REVIEW_STAGE: + project_progress_stage_name = "Review" + else: + project_progress_stage_name = "Super Check" + message = f""" +

Your {organization.title} User Analytics Report under AI4Bharat Organisation are now ready for review. Kindly check the attachment below

+
    +
  • Project Type: {project_type}
  • +
  • Progress Stage:{project_progress_stage_name}
  • +
  • Target Language:{tgt_language}
  • +
+""" + compiled_code = send_email_template_with_attachment( + "User Analytics", user.username, message ) - - email = EmailMessage( - f"{organization.title}" + " User Analytics", - message, + msg = EmailMultiAlternatives( + f"{organization.title} User Analytics", + compiled_code, settings.DEFAULT_FROM_EMAIL, [user.email], - attachments=[(filename, content, content_type)], ) - email.send() + msg.attach_alternative(compiled_code, "text/html") + msg.attach(filename, content, content_type) + msg.send() diff --git a/backend/users/models.py b/backend/users/models.py index c391d5f23..b93172f33 100644 --- a/backend/users/models.py +++ b/backend/users/models.py @@ -9,7 +9,7 @@ import jwt from datetime import datetime, timedelta -from django.core.mail import send_mail +from django.core.mail import send_mail, EmailMultiAlternatives from django.db import models from django.db.models.signals import post_delete from django.dispatch import receiver @@ -26,6 +26,7 @@ from .utils import hash_upload from .managers import UserManager +from utils.email_template import send_email_template # List of Indic languages LANG_CHOICES = ( @@ -282,12 +283,24 @@ def send_mail_to_change_password(self, email, key): prefix = os.getenv("FRONTEND_URL_FOR_RESET_PASSWORD") link = f"{prefix}/#/forget-password/confirm/{key}/{sent_token}" try: - send_mail( - "Reset password link for shoonya", - f"Hello! Please click on the following link to reset your password - {link}", + subject = "Reset Password Link For Shoonya" + message = f"

Hello! Please click on the following link to reset your password - {link}

" + + compiled_code = send_email_template(subject, message) + msg = EmailMultiAlternatives( + subject, + compiled_code, settings.DEFAULT_FROM_EMAIL, [email], ) + msg.attach_alternative(compiled_code, "text/html") + msg.send() + # send_mail( + # "Reset password link for shoonya", + # f"Hello! Please click on the following link to reset your password - {link}", + # settings.DEFAULT_FROM_EMAIL, + # [email], + # ) except SMTPAuthenticationError: raise Exception( "Failed to authenticate with the SMTP server. Check your email settings." diff --git a/backend/users/views.py b/backend/users/views.py index 320a06901..a9e0c868d 100644 --- a/backend/users/views.py +++ b/backend/users/views.py @@ -31,6 +31,7 @@ from organizations.decorators import is_organization_owner from users.models import LANG_CHOICES, User, CustomPeriodicTask from rest_framework.decorators import action +from utils.email_template import send_email_template from tasks.models import ( Task, ANNOTATOR_ANNOTATION, @@ -54,7 +55,7 @@ from datetime import datetime import calendar from django.conf import settings -from django.core.mail import send_mail +from django.core.mail import send_mail, EmailMultiAlternatives from workspaces.views import WorkspaceCustomViewSet from .utils import generate_random_string, get_role_name from rest_framework_simplejwt.tokens import RefreshToken @@ -676,19 +677,43 @@ def update_email(self, request): old_email_update_code = generate_random_string(10) new_email_verification_code = generate_random_string(10) - send_mail( - "Email Verification", - f"Your email verification code is:{old_email_update_code}", + subject = "Email Verification" + message = f"

Your email verification code is:{old_email_update_code}

" + + compiled_code = send_email_template(subject, message) + + msg = EmailMultiAlternatives( + subject, + message, settings.DEFAULT_FROM_EMAIL, [user.email], ) - - send_mail( - "Email Verification", - f"Your email verification code is:{new_email_verification_code}", + msg.attach_alternative(compiled_code, "text/html") + msg.send() + + # send_mail( + # "Email Verification", + # f"Your email verification code is:{old_email_update_code}", + # settings.DEFAULT_FROM_EMAIL, + # [user.email], + # ) + + # send_mail( + # "Email Verification", + # f"Your email verification code is:{new_email_verification_code}", + # settings.DEFAULT_FROM_EMAIL, + # [unverified_email], + # ) + + message = f"Your email verification code is: {new_email_verification_code} " + msg1 = EmailMultiAlternatives( + subject, + message, settings.DEFAULT_FROM_EMAIL, [unverified_email], ) + msg1.attach_alternative(compiled_code, "text/html") + msg1.send() user.unverified_email = unverified_email user.old_email_update_code = old_email_update_code diff --git a/backend/utils/email_template.py b/backend/utils/email_template.py index 81f989ca0..a1f15efb9 100644 --- a/backend/utils/email_template.py +++ b/backend/utils/email_template.py @@ -1,9 +1,4 @@ -from users.models import User - - -def send_email_template_with_attachment(subject, user_email, message): - user = User.objects.get(email=user_email) - +def send_email_template_with_attachment(subject, username, message): style_string = """ *{ margin: 0; padding: 0; @@ -56,7 +51,7 @@ def send_email_template_with_attachment(subject, user_email, message): - User Analytics (Topic) + {subject} @@ -71,7 +66,7 @@ def send_email_template_with_attachment(subject, user_email, message):

- Dear {user.first_name} {user.last_name}, + Dear {username},

{message} @@ -79,7 +74,98 @@ def send_email_template_with_attachment(subject, user_email, message): Thanks for contributing on Shoonya!

- This email was intended for {user_email} If you received it by mistake, please delete it and notify the sender immediately. + This email was intended for {username} If you received it by mistake, please delete it and notify the sender immediately. +

+
+ +
+

+ Best Regards,
+ Shoonya Admin +

+
+ + + + """ + return html_code + + +def send_email_template(subject, message): + style_string = """ + *{ margin: 0; + padding: 0; + } + body { + font-family: "Arial", sans-serif; + background-color: #f2f8f8; + margin: 0; + padding: 0; + padding-top: 2rem; + } + .container { + background-color: #fff; + border: solid 1px #e1e1e1; + border-radius: 2px; + padding: 1.4rem; + max-width: 380px; + margin: auto; + } + .header { + width: fit-content; + margin: auto; + } + h1 { + font-size: 1.2rem; + font-weight: 300; + margin: 1rem 0; + font-family: "Segoe UI", Tahoma, Geneva, Verdana, sans-serif; + } + p { + font-size: 0.8rem; + color: #222; + margin: 0.8rem 0; + } + .primary { + color: #18621f; + } + .footer { + margin-top: 1rem; + font-size: 0.9rem; + } + .footer > * { + font-size: inherit; + } + """ + + html_code = f""" + + + + + + {subject} + + + +
+
+

{subject}

+
+
+
+
+
+

+ Dear User, +

+ + {message} + +

+ This is an automated email. Please do not reply to this email.

From 42966df6c94a12f4e7aab187a8775a6624b3d2bc Mon Sep 17 00:00:00 2001 From: Pursottam6003 Date: Mon, 6 May 2024 23:06:59 +0530 Subject: [PATCH 09/44] changed the time formatting --- backend/organizations/tasks.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py index 86296c834..225888893 100644 --- a/backend/organizations/tasks.py +++ b/backend/organizations/tasks.py @@ -529,14 +529,16 @@ def send_user_reports_mail_org( for participation_type in participation_types ] participation_types_string = ", ".join(participation_types) - + # Format the start_date and end_date + start_date = start_date.strftime("%Y-%m-%d %H:%M:%S %Z") + end_date = end_date.strftime("%Y-%m-%d %H:%M:%S %Z") message = f"""

Your {organization.title} Payments Report under AI4Bharat Organisation are now ready for review. Kindly check the attachment below

  • Project Type: {project_type}
  • Participation Types:{participation_types_string}
  • -
  • Start Date: {start_date}
  • -
  • End Date: {end_date}
  • +
  • Start Date: {start_date} UTC
  • +
  • End Date: {end_date} UTC
""" compiled_code = send_email_template_with_attachment( From 40a96bebda9660b3e2a394e182a35160dca1d959 Mon Sep 17 00:00:00 2001 From: Ishan Gujarathi Date: Wed, 8 May 2024 16:14:06 +0530 Subject: [PATCH 10/44] added a feature to remove user from frozen users list when user is marked active again being marked inactive --- backend/users/views.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/backend/users/views.py b/backend/users/views.py index 8b864a487..cbaa32455 100644 --- a/backend/users/views.py +++ b/backend/users/views.py @@ -902,6 +902,19 @@ def user_details_update(self, request, pk=None): }, status=status.HTTP_200_OK, ) + else: + if is_active_payload is True: + workspaces = Workspace.objects.filter( + Q(members=user) | Q(managers=user) + ).distinct() + + workspaceusersviewset_obj = WorkspaceusersViewSet() + request.data["user_id"] = user.id + + for workspace in workspaces: + workspaceusersviewset_obj.remove_frozen_user( + request=request, pk=workspace.pk + ) if request.data["role"] != user.role: new_role = int(request.data["role"]) From f0d05bcf3d7cb72a86e860fcca3d7284dce88fe2 Mon Sep 17 00:00:00 2001 From: Pursottam6003 Date: Mon, 13 May 2024 19:20:44 +0530 Subject: [PATCH 11/44] updated the changes for new project creation --- backend/projects/annotation_registry.py | 12 +++++++ backend/projects/views.py | 44 +++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/backend/projects/annotation_registry.py b/backend/projects/annotation_registry.py index 2dddab5d3..071f33678 100644 --- a/backend/projects/annotation_registry.py +++ b/backend/projects/annotation_registry.py @@ -164,6 +164,18 @@ "type": ["labels", "textarea", "textarea", "textarea"], }, }, + "StandardizedTranscriptionEditing": { + "transcribed_json": { + "to_name": "audio_url", + "from_name": [ + "labels", + "verbatim_transcribed_json", + "acoustic_normalised_transcribed_json", + "standardised_transcription", + ], + "type": ["labels", "textarea", "textarea", "textarea"], + }, + }, } diff --git a/backend/projects/views.py b/backend/projects/views.py index 579e26ee1..a1221c772 100644 --- a/backend/projects/views.py +++ b/backend/projects/views.py @@ -914,6 +914,50 @@ def convert_prediction_json_to_annotation_result(pk, proj_type): # mainly label_dict and text_dict are sent as result result.append(label_dict) result.append(text_dict) + elif proj_type == "StandardisedTranscriptionEditing": + # convert the prediction_json to a concatinated transcribed_json + data_item = SpeechConversation.objects.get(pk=pk) + prediction_json = ( + json.loads(data_item.prediction_json) + if isinstance(data_item.prediction_json, str) + else data_item.prediction_json + ) + speakers_json = data_item.speakers_json + audio_duration = data_item.audio_duration + # converting prediction_json to result (wherever it exists) for every task. + if prediction_json == None: + return result + # Initialize variables + concatenated_text = "" + min_start_time = float("inf") + max_end_time = float("-inf") + + for idx, val in enumerate(prediction_json): + # Concatenate the text + concatenated_text += val["text"] + " " + + # Update the minimum start time and maximum end time + min_start_time = min(min_start_time, val["start"]) + max_end_time = max(max_end_time, val["end"]) + + # Create a single dictionary to store the result + result_dict = { + "origin": "manual", + "to_name": "audio_url", + "from_name": "transcribed_json", + "original_length": audio_duration, + "id": f"shoonya_{generate_random_string(13)}", + "type": "textarea", + "value": { + "start": min_start_time, + "end": max_end_time, + "text": [concatenated_text], # Remove trailing space + }, + } + + # Clear the result array and append the single result dictionary + result.clear() + result.append(result_dict) elif ( proj_type == "OCRTranscriptionEditing" or proj_type == "OCRSegmentCategorizationEditing" From ef047a74843c3ad5b0874859c4d86035b399b634 Mon Sep 17 00:00:00 2001 From: Pursottam6003 Date: Sun, 19 May 2024 00:25:15 +0530 Subject: [PATCH 12/44] made changes to create project --- backend/dataset/models.py | 7 ++- backend/organizations/views.py | 1 + backend/projects/models.py | 1 - backend/projects/project_registry.yaml | 23 ++++++++++ backend/projects/tasks.py | 2 +- backend/projects/views.py | 62 +++++++++++++++++++++++++- 6 files changed, 91 insertions(+), 5 deletions(-) diff --git a/backend/dataset/models.py b/backend/dataset/models.py index ec16c9903..ac5d4a4db 100644 --- a/backend/dataset/models.py +++ b/backend/dataset/models.py @@ -484,7 +484,12 @@ class SpeechConversation(DatasetBase): blank=True, help_text=("Prepopulated prediction for the implemented models"), ) - + final_transcribed_json = models.JSONField( + verbose_name="final_transcribed_json", + null=True, + blank=True, + help_text=("Field where data from this standardised_transcription_editing type will be exported."), + ) def __str__(self): return str(self.id) diff --git a/backend/organizations/views.py b/backend/organizations/views.py index 7d5e7b726..46848571e 100644 --- a/backend/organizations/views.py +++ b/backend/organizations/views.py @@ -2713,6 +2713,7 @@ def cumulative_tasks_count(self, request, pk=None): "AudioSegmentation", "AudioTranscription", "AudioTranscriptionEditing", + "StandardisedTranscriptionEditing" "ContextualSentenceVerification", "ContextualSentenceVerificationAndDomainClassification", "ContextualTranslationEditing", diff --git a/backend/projects/models.py b/backend/projects/models.py index da5034cd4..f7eb1d487 100644 --- a/backend/projects/models.py +++ b/backend/projects/models.py @@ -249,7 +249,6 @@ class Project(models.Model): "Maximum no. of tasks assigned to a user which are at unlabeled stage, as a threshold for pulling new tasks" ), ) - # enable_task_reviews = models.BooleanField( # verbose_name="enable_task_reviews", # default=False, diff --git a/backend/projects/project_registry.yaml b/backend/projects/project_registry.yaml index b4c0c8d85..853f8d4ad 100644 --- a/backend/projects/project_registry.yaml +++ b/backend/projects/project_registry.yaml @@ -395,3 +395,26 @@ Audio: fields: annotations: - transcribed_json + StandardizedTranscriptionEditing: + project_mode: "Annotation" + label_studio_jsx_file: "audio/acoustic_transcription.jsx" + input_dataset: + class: SpeechConversation + fields: + - audio_url + - reference_raw_transcript + - audio_duration + - scenario + - domain + - speakers_json + display_fields: + - scenario + - audio_url + prediction: machine_transcribed_json + output_dataset: + class: SpeechConversation + save_type: in_place + fields: + annotations: + - transcribed_json + diff --git a/backend/projects/tasks.py b/backend/projects/tasks.py index 8f0bd51c1..dd5eddccd 100644 --- a/backend/projects/tasks.py +++ b/backend/projects/tasks.py @@ -314,7 +314,7 @@ def filter_data_items( #### CELERY SHARED TASKS -@shared_task +# @shared_task def create_parameters_for_task_creation( project_type, dataset_instance_ids, diff --git a/backend/projects/views.py b/backend/projects/views.py index a1221c772..56f3f006b 100644 --- a/backend/projects/views.py +++ b/backend/projects/views.py @@ -1039,6 +1039,7 @@ def convert_annotation_result_to_formatted_json( is_SpeechConversation, is_OCRSegmentCategorizationOROCRSegmentCategorizationEditing, is_acoustic=False, + is_StandardisedTranscriptionEditing=False, ): transcribed_json = [] acoustic_transcribed_json = [] @@ -1132,6 +1133,37 @@ def convert_annotation_result_to_formatted_json( acoustic_transcribed_json_modified = json.dumps( acoustic_transcribed_json, ensure_ascii=False ) + + elif is_StandardisedTranscriptionEditing: + ''' + in need to convert in this format + text": "dummy format", + "end_time": "00:00:10.448", + "speaker_id": "Speaker 0", + "start_time": "00:00:00.000", + "id": 1, + "acoustic_normalised_text : text + ''' + + for idx1 in range(0, len(annotation_result), 2): + formatted_result_dict = {} + text_dict = {} + acoustic_text_dict = {} + if isinstance(annotation_result[idx1], str): + annotation_result[idx1] = json.loads(annotation_result[idx1]) + if isinstance(annotation_result[idx1 + 1], str): + annotation_result[idx1 + 1] = json.loads(annotation_result[idx1 + 1]) + text_dict = annotation_result[idx1] + acoustic_text_dict = annotation_result[idx1 + 1] + formatted_result_dict["text"] = text_dict["value"]["text"][0] + formatted_result_dict["start_time"] = text_dict["value"]["start"] + formatted_result_dict["end_time"] = text_dict["value"]["end"] + formatted_result_dict["speaker_id"] = text_dict["value"]["speaker_id"] + formatted_result_dict["id"] = text_dict["id"] + formatted_result_dict["acoustic_normalised_text"] = acoustic_text_dict["value"]["text"][0] + transcribed_json.append(formatted_result_dict) + + else: dicts = 2 if is_OCRSegmentCategorizationOROCRSegmentCategorizationEditing else 3 for idx1 in range(0, len(annotation_result), dicts): @@ -1200,6 +1232,13 @@ def convert_annotation_result_to_formatted_json( "acoustic_normalised_transcribed_json": acoustic_transcribed_json_modified, "standardised_transcription": standardised_transcription, } + + if is_StandardisedTranscriptionEditing: + return { + "verbatim_transcribed_json": transcribed_json_modified, + "acoustic_normalised_transcribed_json": acoustic_transcribed_json_modified, + "standardised_transcription": standardised_transcription, + } return transcribed_json_modified @@ -2188,7 +2227,7 @@ def create(self, request, *args, **kwargs): proj.save() # Function call to create the paramters for the sampling and filtering of sentences - create_parameters_for_task_creation.delay( + create_parameters_for_task_creation( project_type=project_type, dataset_instance_ids=dataset_instance_ids, filter_string=filter_string, @@ -2409,7 +2448,21 @@ def assign_new_tasks(self, request, pk, *args, **kwargs): "AudioTranscriptionEditing", "OCRTranscriptionEditing", "OCRSegmentCategorizationEditing", - ]: + "StandardisedTranscriptionEditing", + ]: + + if project.project_type == "StandardisedTranscriptionEditing": + try: + #gather trascribed_json + result = convert_annotation_result_to_formatted_json( + task.input_data.id, project.project_type, is_StandardisedTranscriptionEditing=True + ) + except Exception as e: + print( + f"The prediction json of the data item-{task.input_data.id} is corrupt." + ) + task.delete() + continue try: result = convert_prediction_json_to_annotation_result( task.input_data.id, project.project_type @@ -2423,6 +2476,7 @@ def assign_new_tasks(self, request, pk, *args, **kwargs): annotator_anno_count = Annotation_model.objects.filter( task_id=task, annotation_type=ANNOTATOR_ANNOTATION ).count() + if annotator_anno_count < project.required_annotators_per_task: cur_user_anno_count = Annotation_model.objects.filter( task_id=task, @@ -4135,6 +4189,7 @@ def download(self, request, pk=None, *args, **kwargs): project_type == "OCRSegmentCategorizationEditing" ) is_OCRSegmentCategorization = project_type == "OCRSegmentCategorization" + is_StandardizedTranscriptionEditing = project_type = "StandardizedTranscriptionEditing" for task in tasks: curr_task = process_task( task, @@ -4153,6 +4208,9 @@ def download(self, request, pk=None, *args, **kwargs): is_ConversationTranslation, is_ConversationVerification, ) + elif is_StandardizedTranscriptionEditing: + pass + elif dataset_type in ["SpeechConversation", "OCRDocument"]: is_SpeechConversation = dataset_type == "SpeechConversation" if is_SpeechConversation: From cf8c38649f8e9049f0a0414a5258eaf1d0f80b93 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Mon, 27 May 2024 10:23:23 +0530 Subject: [PATCH 13/44] added changes for StandardizedTranscriptionEditing project type --- ...eechconversation_final_transcribed_json.py | 22 ++ backend/dataset/models.py | 5 +- .../0053_alter_project_project_type.py | 63 ++++++ backend/projects/project_registry.yaml | 2 +- backend/projects/tasks.py | 25 ++- backend/projects/utils.py | 86 +++++++- backend/projects/views.py | 204 +++++++++--------- backend/tasks/views.py | 118 ++++++---- .../migrations/0034_alter_user_is_approved.py | 21 ++ .../convert_result_to_chitralekha_format.py | 50 +++-- 10 files changed, 430 insertions(+), 166 deletions(-) create mode 100644 backend/dataset/migrations/0047_speechconversation_final_transcribed_json.py create mode 100644 backend/projects/migrations/0053_alter_project_project_type.py create mode 100644 backend/users/migrations/0034_alter_user_is_approved.py diff --git a/backend/dataset/migrations/0047_speechconversation_final_transcribed_json.py b/backend/dataset/migrations/0047_speechconversation_final_transcribed_json.py new file mode 100644 index 000000000..1c9837814 --- /dev/null +++ b/backend/dataset/migrations/0047_speechconversation_final_transcribed_json.py @@ -0,0 +1,22 @@ +# Generated by Django 3.2.14 on 2024-05-21 06:02 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("dataset", "0046_merge_20240416_2233"), + ] + + operations = [ + migrations.AddField( + model_name="speechconversation", + name="final_transcribed_json", + field=models.JSONField( + blank=True, + help_text="Field where data from this standardised_transcription_editing type will be exported.", + null=True, + verbose_name="final_transcribed_json", + ), + ), + ] diff --git a/backend/dataset/models.py b/backend/dataset/models.py index ac5d4a4db..c1432cbd3 100644 --- a/backend/dataset/models.py +++ b/backend/dataset/models.py @@ -488,8 +488,11 @@ class SpeechConversation(DatasetBase): verbose_name="final_transcribed_json", null=True, blank=True, - help_text=("Field where data from this standardised_transcription_editing type will be exported."), + help_text=( + "Field where data from this standardised_transcription_editing type will be exported." + ), ) + def __str__(self): return str(self.id) diff --git a/backend/projects/migrations/0053_alter_project_project_type.py b/backend/projects/migrations/0053_alter_project_project_type.py new file mode 100644 index 000000000..1e0aee436 --- /dev/null +++ b/backend/projects/migrations/0053_alter_project_project_type.py @@ -0,0 +1,63 @@ +# Generated by Django 3.2.14 on 2024-05-21 06:02 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("projects", "0052_alter_project_project_type"), + ] + + operations = [ + migrations.AlterField( + model_name="project", + name="project_type", + field=models.CharField( + choices=[ + ("MonolingualTranslation", "MonolingualTranslation"), + ("TranslationEditing", "TranslationEditing"), + ( + "SemanticTextualSimilarity_Scale5", + "SemanticTextualSimilarity_Scale5", + ), + ("ContextualTranslationEditing", "ContextualTranslationEditing"), + ("OCRTranscription", "OCRTranscription"), + ("OCRTranscriptionEditing", "OCRTranscriptionEditing"), + ("OCRSegmentCategorization", "OCRSegmentCategorization"), + ( + "OCRSegmentCategorizationEditing", + "OCRSegmentCategorizationEditing", + ), + ("MonolingualCollection", "MonolingualCollection"), + ("SentenceSplitting", "SentenceSplitting"), + ( + "ContextualSentenceVerification", + "ContextualSentenceVerification", + ), + ( + "ContextualSentenceVerificationAndDomainClassification", + "ContextualSentenceVerificationAndDomainClassification", + ), + ("ConversationTranslation", "ConversationTranslation"), + ( + "ConversationTranslationEditing", + "ConversationTranslationEditing", + ), + ("ConversationVerification", "ConversationVerification"), + ("AudioTranscription", "AudioTranscription"), + ("AudioSegmentation", "AudioSegmentation"), + ("AudioTranscriptionEditing", "AudioTranscriptionEditing"), + ( + "AcousticNormalisedTranscriptionEditing", + "AcousticNormalisedTranscriptionEditing", + ), + ( + "StandardizedTranscriptionEditing", + "StandardizedTranscriptionEditing", + ), + ], + help_text="Project Type indicating the annotation task", + max_length=100, + ), + ), + ] diff --git a/backend/projects/project_registry.yaml b/backend/projects/project_registry.yaml index 853f8d4ad..e2de18292 100644 --- a/backend/projects/project_registry.yaml +++ b/backend/projects/project_registry.yaml @@ -416,5 +416,5 @@ Audio: save_type: in_place fields: annotations: - - transcribed_json + - final_transcribed_json diff --git a/backend/projects/tasks.py b/backend/projects/tasks.py index dd5eddccd..90e28df17 100644 --- a/backend/projects/tasks.py +++ b/backend/projects/tasks.py @@ -314,7 +314,7 @@ def filter_data_items( #### CELERY SHARED TASKS -# @shared_task +@shared_task def create_parameters_for_task_creation( project_type, dataset_instance_ids, @@ -452,6 +452,9 @@ def export_project_in_place( is_AcousticNormalisedTranscriptionEditing = ( project_type == "AcousticNormalisedTranscriptionEditing" ) + is_StandardizedTranscriptionEditing = ( + project_type == "StandardizedTranscriptionEditing" + ) is_ConversationVerification = project.project_type == "ConversationVerification" bboxes_relation_json = [] annotated_document_details_json = {} @@ -464,7 +467,10 @@ def export_project_in_place( print(error) export_excluded_task_ids.append(task.id) continue - if is_AcousticNormalisedTranscriptionEditing: + if ( + is_AcousticNormalisedTranscriptionEditing + or is_StandardizedTranscriptionEditing + ): try: ta_transcribed_json = json.loads(ta["verbatim_transcribed_json"]) except json.JSONDecodeError: @@ -507,7 +513,11 @@ def export_project_in_place( # We need to store the rating in integer format if field == "rating": setattr(data_item, field, int(ta[field])) - elif field == "transcribed_json" or field == "prediction_json": + elif ( + field == "transcribed_json" + or field == "prediction_json" + or field == "final_transcribed_json" + ): speakers_details = data_item.speakers_json for idx in range(len(ta_transcribed_json)): ta_labels[idx]["text"] = ta_transcribed_json[idx] @@ -522,7 +532,10 @@ def export_project_in_place( temp = deepcopy(ta_labels[idx]) temp["text"] = ta_acoustic_transcribed_json[idx] ta_acoustic_transcribed_json[idx] = temp - if is_AcousticNormalisedTranscriptionEditing: + if ( + is_AcousticNormalisedTranscriptionEditing + or is_StandardizedTranscriptionEditing + ): try: standardised_transcription = json.loads( ta["standardised_transcription"] @@ -538,6 +551,10 @@ def export_project_in_place( "acoustic_normalised_transcribed_json": ta_acoustic_transcribed_json, "standardised_transcription": standardised_transcription, } + if is_StandardizedTranscriptionEditing: + setattr( + data_item, "final_transcribed_json", ta_transcribed_json + ) setattr(data_item, field, ta_transcribed_json) else: setattr(data_item, field, ta_labels) diff --git a/backend/projects/utils.py b/backend/projects/utils.py index 9408d44ce..c9416e398 100644 --- a/backend/projects/utils.py +++ b/backend/projects/utils.py @@ -13,7 +13,7 @@ from users.models import User from django.forms import model_to_dict -from dataset.models import Conversation +from dataset.models import Conversation, SpeechConversation from tasks.models import ( Annotation, ANNOTATED, @@ -419,10 +419,26 @@ def process_speech_results( ): from projects.views import convert_annotation_result_to_formatted_json + is_StandardizedTranscriptionEditing = ( + project_type == "StandardizedTranscriptionEditing" + ) + if is_audio_segmentation: task["data"]["prediction_json"] = convert_annotation_result_to_formatted_json( annotation_result, speakers_json, True, False, False ) + elif is_StandardizedTranscriptionEditing: + task["data"][ + "final_transcribed_json" + ] = convert_annotation_result_to_formatted_json( + annotation_result, + speakers_json, + True, + False, + False, + True, + ) + task["data"]["transcribed_json"] = task["data"]["final_transcribed_json"] else: task["data"]["transcribed_json"] = convert_annotation_result_to_formatted_json( annotation_result, @@ -516,3 +532,71 @@ def process_task( task_dict["data"] = data return OrderedDict(task_dict) + + +def convert_time_to_seconds(time_str): + # Split the time string into hours, minutes, seconds, and milliseconds + hours, minutes, seconds_milliseconds = time_str.split(":") + seconds, milliseconds = seconds_milliseconds.split(".") + + # Convert each component to integers + hours = int(hours) + minutes = int(minutes) + seconds = int(seconds) + milliseconds = int(milliseconds) + + # Calculate the total time in seconds + total_seconds = (hours * 3600) + (minutes * 60) + seconds + (milliseconds / 1000.0) + + return total_seconds + + +def parse_json_for_ste(input_data_id): + data_item = SpeechConversation.objects.get(pk=input_data_id) + input_data = ( + json.loads(data_item.transcribed_json) + if isinstance(data_item.transcribed_json, str) + else data_item.transcribed_json + ) + if not input_data: + return [] + acoustic_normalised = json.loads(input_data["acoustic_normalised_transcribed_json"]) + standardised_transcription = json.loads(input_data["standardised_transcription"]) + result = [] + id_counter = 1 + + # Function to convert float seconds to hh:mm:ss.ms format + def format_time(seconds): + td = datetime.timedelta(seconds=seconds) + return str(td) + + # Combine all transcriptions into one list + for item in acoustic_normalised: + result.append( + { + "text": item["text"], + "end_time": format_time(item["end"]), + "speaker_id": f"{item['speaker_id']}", + "start_time": format_time(item["start"]), + "id": id_counter, + "acoustic_normalised_text": item["text"], + } + ) + id_counter += 1 + + for item in standardised_transcription: + result.append( + { + "acoustic_standardized_text": item["text"], + "end_time": format_time(item["end"]), + "speaker_id": f"Speaker {item['speaker_id']}", + "start_time": format_time(item["start"]), + "id": id_counter, + } + ) + id_counter += 1 + + # Sort the result by start_time and then by the presence of 'acoustic_normalised_text' + result.sort(key=lambda x: (x["id"], "acoustic_normalised_text" not in x)) + + return result diff --git a/backend/projects/views.py b/backend/projects/views.py index 56f3f006b..ca7f0b33c 100644 --- a/backend/projects/views.py +++ b/backend/projects/views.py @@ -25,6 +25,8 @@ process_speech_tasks, process_ocr_tasks, process_task, + convert_time_to_seconds, + parse_json_for_ste, ) from django.http import HttpResponse, JsonResponse from rest_framework import status, viewsets @@ -914,46 +916,48 @@ def convert_prediction_json_to_annotation_result(pk, proj_type): # mainly label_dict and text_dict are sent as result result.append(label_dict) result.append(text_dict) - elif proj_type == "StandardisedTranscriptionEditing": - # convert the prediction_json to a concatinated transcribed_json - data_item = SpeechConversation.objects.get(pk=pk) - prediction_json = ( - json.loads(data_item.prediction_json) - if isinstance(data_item.prediction_json, str) - else data_item.prediction_json - ) - speakers_json = data_item.speakers_json - audio_duration = data_item.audio_duration - # converting prediction_json to result (wherever it exists) for every task. - if prediction_json == None: - return result - # Initialize variables - concatenated_text = "" - min_start_time = float("inf") - max_end_time = float("-inf") - - for idx, val in enumerate(prediction_json): - # Concatenate the text - concatenated_text += val["text"] + " " - - # Update the minimum start time and maximum end time - min_start_time = min(min_start_time, val["start"]) - max_end_time = max(max_end_time, val["end"]) - - # Create a single dictionary to store the result - result_dict = { - "origin": "manual", - "to_name": "audio_url", - "from_name": "transcribed_json", - "original_length": audio_duration, - "id": f"shoonya_{generate_random_string(13)}", - "type": "textarea", - "value": { - "start": min_start_time, - "end": max_end_time, - "text": [concatenated_text], # Remove trailing space - }, - } + # elif proj_type == "StandardisedTranscriptionEditing": + # # convert the prediction_json to a concatinated transcribed_json + # data_item = SpeechConversation.objects.get(pk=pk) + # prediction_json = ( + # json.loads(data_item.prediction_json) + # if isinstance(data_item.prediction_json, str) + # else data_item.prediction_json + # ) + # speakers_json = data_item.speakers_json + # audio_duration = data_item.audio_duration + # # converting prediction_json to result (wherever it exists) for every task. + # if prediction_json == None: + # return result + # # Initialize variables + # concatenated_text = "" + # min_start_time = float("inf") + # max_end_time = float("-inf") + # + # for idx, val in enumerate(prediction_json): + # # Concatenate the text + # concatenated_text += val["text"] + " " + # + # # Update the minimum start time and maximum end time + # min_start_time = min(min_start_time, val["start"]) + # max_end_time = max(max_end_time, val["end"]) + # if concatenated_text: + # concatenated_text.strip() + # + # # Create a single dictionary to store the result + # result_dict = { + # "origin": "manual", + # "to_name": "audio_url", + # "from_name": "transcribed_json", + # "original_length": audio_duration, + # "id": f"shoonya_{generate_random_string(13)}", + # "type": "textarea", + # "value": { + # "start": min_start_time, + # "end": max_end_time, + # "text": [concatenated_text], # Remove trailing space + # }, + # } # Clear the result array and append the single result dictionary result.clear() @@ -1045,7 +1049,49 @@ def convert_annotation_result_to_formatted_json( acoustic_transcribed_json = [] standardised_transcription = "" transcribed_json_modified, acoustic_transcribed_json_modified = [], [] - if is_SpeechConversation: + if is_StandardisedTranscriptionEditing: + verbatim_transcribed_json = [] + acoustic_normalised_transcribed_json = [] + standardised_transcription = [] + + for item in annotation_result: + if isinstance(item, str): + item = json.loads(item) + if "text" in item: + verbatim_transcribed_json.append( + { + "speaker_id": item["speaker_id"], + "start": convert_time_to_seconds(item["start_time"]), + "end": convert_time_to_seconds(item["end_time"]), + "text": item["text"], + } + ) + if "acoustic_normalised_text" in item: + acoustic_normalised_transcribed_json.append( + { + "speaker_id": item["speaker_id"], + "start": convert_time_to_seconds(item["start_time"]), + "end": convert_time_to_seconds(item["end_time"]), + "text": item["acoustic_normalised_text"], + } + ) + if "acoustic_standardized_text" in item: + standardised_transcription.append( + { + "speaker_id": item["speaker_id"], + "start": convert_time_to_seconds(item["start_time"]), + "end": convert_time_to_seconds(item["end_time"]), + "text": item["acoustic_standardized_text"], + } + ) + + complete_json = { + "verbatim_transcribed_json": verbatim_transcribed_json, + "acoustic_normalised_transcribed_json": acoustic_normalised_transcribed_json, + "standardised_transcription": standardised_transcription, + } + transcribed_json.append(complete_json) + elif is_SpeechConversation: ids_formatted = {} for idx1 in range(len(annotation_result)): formatted_result_dict = {} @@ -1133,37 +1179,6 @@ def convert_annotation_result_to_formatted_json( acoustic_transcribed_json_modified = json.dumps( acoustic_transcribed_json, ensure_ascii=False ) - - elif is_StandardisedTranscriptionEditing: - ''' - in need to convert in this format - text": "dummy format", - "end_time": "00:00:10.448", - "speaker_id": "Speaker 0", - "start_time": "00:00:00.000", - "id": 1, - "acoustic_normalised_text : text - ''' - - for idx1 in range(0, len(annotation_result), 2): - formatted_result_dict = {} - text_dict = {} - acoustic_text_dict = {} - if isinstance(annotation_result[idx1], str): - annotation_result[idx1] = json.loads(annotation_result[idx1]) - if isinstance(annotation_result[idx1 + 1], str): - annotation_result[idx1 + 1] = json.loads(annotation_result[idx1 + 1]) - text_dict = annotation_result[idx1] - acoustic_text_dict = annotation_result[idx1 + 1] - formatted_result_dict["text"] = text_dict["value"]["text"][0] - formatted_result_dict["start_time"] = text_dict["value"]["start"] - formatted_result_dict["end_time"] = text_dict["value"]["end"] - formatted_result_dict["speaker_id"] = text_dict["value"]["speaker_id"] - formatted_result_dict["id"] = text_dict["id"] - formatted_result_dict["acoustic_normalised_text"] = acoustic_text_dict["value"]["text"][0] - transcribed_json.append(formatted_result_dict) - - else: dicts = 2 if is_OCRSegmentCategorizationOROCRSegmentCategorizationEditing else 3 for idx1 in range(0, len(annotation_result), dicts): @@ -1232,13 +1247,6 @@ def convert_annotation_result_to_formatted_json( "acoustic_normalised_transcribed_json": acoustic_transcribed_json_modified, "standardised_transcription": standardised_transcription, } - - if is_StandardisedTranscriptionEditing: - return { - "verbatim_transcribed_json": transcribed_json_modified, - "acoustic_normalised_transcribed_json": acoustic_transcribed_json_modified, - "standardised_transcription": standardised_transcription, - } return transcribed_json_modified @@ -2227,7 +2235,7 @@ def create(self, request, *args, **kwargs): proj.save() # Function call to create the paramters for the sampling and filtering of sentences - create_parameters_for_task_creation( + create_parameters_for_task_creation.delay( project_type=project_type, dataset_instance_ids=dataset_instance_ids, filter_string=filter_string, @@ -2448,14 +2456,22 @@ def assign_new_tasks(self, request, pk, *args, **kwargs): "AudioTranscriptionEditing", "OCRTranscriptionEditing", "OCRSegmentCategorizationEditing", - "StandardisedTranscriptionEditing", - ]: - - if project.project_type == "StandardisedTranscriptionEditing": + "StandardizedTranscriptionEditing", + ]: + if project.project_type == "StandardizedTranscriptionEditing": try: - #gather trascribed_json - result = convert_annotation_result_to_formatted_json( - task.input_data.id, project.project_type, is_StandardisedTranscriptionEditing=True + # gather trascribed_json + result = parse_json_for_ste(task.input_data.id) + except Exception as e: + print( + f"The prediction json of the data item-{task.input_data.id} is corrupt." + ) + task.delete() + continue + else: + try: + result = convert_prediction_json_to_annotation_result( + task.input_data.id, project.project_type ) except Exception as e: print( @@ -2463,16 +2479,6 @@ def assign_new_tasks(self, request, pk, *args, **kwargs): ) task.delete() continue - try: - result = convert_prediction_json_to_annotation_result( - task.input_data.id, project.project_type - ) - except Exception as e: - print( - f"The prediction json of the data item-{task.input_data.id} is corrupt." - ) - task.delete() - continue annotator_anno_count = Annotation_model.objects.filter( task_id=task, annotation_type=ANNOTATOR_ANNOTATION ).count() @@ -4189,7 +4195,6 @@ def download(self, request, pk=None, *args, **kwargs): project_type == "OCRSegmentCategorizationEditing" ) is_OCRSegmentCategorization = project_type == "OCRSegmentCategorization" - is_StandardizedTranscriptionEditing = project_type = "StandardizedTranscriptionEditing" for task in tasks: curr_task = process_task( task, @@ -4208,9 +4213,6 @@ def download(self, request, pk=None, *args, **kwargs): is_ConversationTranslation, is_ConversationVerification, ) - elif is_StandardizedTranscriptionEditing: - pass - elif dataset_type in ["SpeechConversation", "OCRDocument"]: is_SpeechConversation = dataset_type == "SpeechConversation" if is_SpeechConversation: diff --git a/backend/tasks/views.py b/backend/tasks/views.py index fe621f5e4..af0d1071f 100644 --- a/backend/tasks/views.py +++ b/backend/tasks/views.py @@ -1758,6 +1758,14 @@ def partial_update(self, request, pk=None): == "AcousticNormalisedTranscriptionEditing" else False ) + + is_StandardizedTranscriptionEditing = ( + True + if annotation_obj.task.project_id.project_type + == "StandardizedTranscriptionEditing" + else False + ) + is_ocr_sc_or_sce = ( True if annotation_obj.task.project_id.project_type @@ -1789,12 +1797,15 @@ def partial_update(self, request, pk=None): ) = self.convert_chitralekha_format_to_LSF( request.data["result"], annotation_obj.task, - is_acoustic_project_type, - is_acoustic_project_type - and annotation_obj.task.project_id.metadata_json[ - "acoustic_enabled_stage" - ] - == 1, + is_acoustic_project_type or is_StandardizedTranscriptionEditing, + ( + is_acoustic_project_type + and annotation_obj.task.project_id.metadata_json[ + "acoustic_enabled_stage" + ] + == 1 + ) + or is_StandardizedTranscriptionEditing, ) else: annotation_obj.result = request.data["result"] @@ -1845,12 +1856,15 @@ def partial_update(self, request, pk=None): ) = self.convert_chitralekha_format_to_LSF( request.data["result"], annotation_obj.task, - is_acoustic_project_type, - is_acoustic_project_type - and annotation_obj.task.project_id.metadata_json[ - "acoustic_enabled_stage" - ] - == 1, + is_acoustic_project_type or is_StandardizedTranscriptionEditing, + ( + is_acoustic_project_type + and annotation_obj.task.project_id.metadata_json[ + "acoustic_enabled_stage" + ] + == 1 + ) + or is_StandardizedTranscriptionEditing, ) annotation_status = request.data["annotation_status"] if empty_flag == True and annotation_status in [ @@ -1918,12 +1932,15 @@ def partial_update(self, request, pk=None): ) = self.convert_chitralekha_format_to_LSF( request.data["result"], annotation_obj.task, - is_acoustic_project_type, - is_acoustic_project_type - and annotation_obj.task.project_id.metadata_json[ - "acoustic_enabled_stage" - ] - <= 2, + is_acoustic_project_type or is_StandardizedTranscriptionEditing, + ( + is_acoustic_project_type + and annotation_obj.task.project_id.metadata_json[ + "acoustic_enabled_stage" + ] + == 2 + ) + or is_StandardizedTranscriptionEditing, ) else: annotation_obj.result = request.data["result"] @@ -2013,12 +2030,15 @@ def partial_update(self, request, pk=None): ) = self.convert_chitralekha_format_to_LSF( request.data["result"], annotation_obj.task, - is_acoustic_project_type, - is_acoustic_project_type - and annotation_obj.task.project_id.metadata_json[ - "acoustic_enabled_stage" - ] - <= 2, + is_acoustic_project_type or is_StandardizedTranscriptionEditing, + ( + is_acoustic_project_type + and annotation_obj.task.project_id.metadata_json[ + "acoustic_enabled_stage" + ] + == 2 + ) + or is_StandardizedTranscriptionEditing, ) annotation_status = request.data["annotation_status"] if empty_flag == True and annotation_status in [ @@ -2113,12 +2133,15 @@ def partial_update(self, request, pk=None): ) = self.convert_chitralekha_format_to_LSF( request.data["result"], annotation_obj.task, - is_acoustic_project_type, - is_acoustic_project_type - and annotation_obj.task.project_id.metadata_json[ - "acoustic_enabled_stage" - ] - <= 3, + is_acoustic_project_type or is_StandardizedTranscriptionEditing, + ( + is_acoustic_project_type + and annotation_obj.task.project_id.metadata_json[ + "acoustic_enabled_stage" + ] + == 3 + ) + or is_StandardizedTranscriptionEditing, ) else: annotation_obj.result = request.data["result"] @@ -2199,12 +2222,15 @@ def partial_update(self, request, pk=None): ) = self.convert_chitralekha_format_to_LSF( request.data["result"], annotation_obj.task, - is_acoustic_project_type, - is_acoustic_project_type - and annotation_obj.task.project_id.metadata_json[ - "acoustic_enabled_stage" - ] - <= 3, + is_acoustic_project_type or is_StandardizedTranscriptionEditing, + ( + is_acoustic_project_type + and annotation_obj.task.project_id.metadata_json[ + "acoustic_enabled_stage" + ] + == 3 + ) + or is_StandardizedTranscriptionEditing, ) if empty_flag == True and annotation_status in [ LABELED, @@ -2324,19 +2350,33 @@ def convert_chitralekha_format_to_LSF( if result == None or len(result) == 0: return modified_result, empty_text_flag for idx, val in enumerate(result): - if "standardised_transcription" in val: + if "acoustic_standardized_text" in val: if acoustic_enabled: standardised_dict = { "id": f"chitralekha_{idx}s{generate_random_string(13 - len(str(idx)))}", "origin": "manual", "to_name": "audio_url", - "from_name": "standardised_transcription", + "from_name": "acoustic_standardised_transcribed_json", "original_length": audio_duration, "type": "textarea", "value": { - "text": [val["standardised_transcription"]], + "start": self.convert_formatted_time_to_fractional( + val["start_time"] + ), + "end": self.convert_formatted_time_to_fractional( + val["end_time"] + ), + "text": [val["acoustic_standardized_text"]], }, } + label_dict_st = deepcopy(standardised_dict) + label_dict_st["type"] = "labels" + del label_dict_st["value"]["text"] + label_dict_st["value"]["labels"] = ( + [val["speaker_id"]] if "speaker_id" in val else [] + ) + label_dict_st["from_name"] = "labels" + modified_result.append(label_dict_st) modified_result.append(standardised_dict) continue if "type" in val or "value" in val: diff --git a/backend/users/migrations/0034_alter_user_is_approved.py b/backend/users/migrations/0034_alter_user_is_approved.py new file mode 100644 index 000000000..7cfe9702a --- /dev/null +++ b/backend/users/migrations/0034_alter_user_is_approved.py @@ -0,0 +1,21 @@ +# Generated by Django 3.2.14 on 2024-05-22 02:45 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("users", "0033_rename_approved_by_user_invited_by"), + ] + + operations = [ + migrations.AlterField( + model_name="user", + name="is_approved", + field=models.BooleanField( + default=False, + help_text="Indicates whether user is approved by the admin or not.", + verbose_name="is_approved", + ), + ), + ] diff --git a/backend/utils/convert_result_to_chitralekha_format.py b/backend/utils/convert_result_to_chitralekha_format.py index 4e1c2072c..33e48cb6a 100644 --- a/backend/utils/convert_result_to_chitralekha_format.py +++ b/backend/utils/convert_result_to_chitralekha_format.py @@ -20,8 +20,8 @@ def create_memory(result): memory[key]["labels_dict_idx"] = i elif dict_type == "acoustic_normalised_transcribed_json": memory[key]["acoustic_text_dict_idx"] = i - elif dict_type == "standardised_transcription": - memory["standardised_transcription"] = i + elif dict_type == "acoustic_standardised_transcribed_json": + memory[key]["acoustic_standardised_transcribed_json"] = i else: memory[key]["text_dict_idx"] = i return memory @@ -49,10 +49,11 @@ def convert_result_to_chitralekha_format(result, ann_id, project_type): speaker_id = "Speaker 0" seen.add(text_dict_idx) elif text_dict_idx == -1: - print( - f"The data is corrupt for annotation id-{ann_id}, data id- {result[i]['id']}. " - f"It does not contain a corresponding text dictionary." - ) + if project_type != "StandardizedTranscriptionEditing": + print( + f"The data is corrupt for annotation id-{ann_id}, data id- {result[i]['id']}. " + f"It does not contain a corresponding text dictionary." + ) continue else: label_dict = result[labels_dict_idx] @@ -96,19 +97,30 @@ def convert_result_to_chitralekha_format(result, ann_id, project_type): modified_result = ( sort_result_by_start_time(modified_result) if len(modified_result) > 0 else [] ) - if ( - project_type == "AcousticNormalisedTranscriptionEditing" - and "standardised_transcription" in memory.keys() - and result[memory["standardised_transcription"]]["value"]["text"] - ): - modified_result.append( - { - "standardised_transcription": result[ - memory["standardised_transcription"] - ]["value"]["text"][0] - } - ) - + if project_type == "StandardizedTranscriptionEditing": + standard_chitra_dict = {} + for i in range(len(result)): + if result[i]["id"] in memory: + if "acoustic_standardised_transcribed_json" in memory[result[i]["id"]]: + st_dict = result[ + memory[result[i]["id"]][ + "acoustic_standardised_transcribed_json" + ] + ] + if not standard_chitra_dict: + lb_dict = result[memory[result[i]["id"]]["labels_dict_idx"]] + standard_chitra_dict = { + "acoustic_standardized_text": st_dict["value"]["text"][0], + "end_time": convert_fractional_time_to_formatted( + st_dict["value"]["end"], ann_id, st_dict["id"] + ), + "speaker_id": lb_dict["value"]["labels"][0], + "start_time": convert_fractional_time_to_formatted( + st_dict["value"]["start"], ann_id, st_dict["id"] + ), + "id": count, + } + modified_result.append(standard_chitra_dict) return modified_result From 9dfdb52c104281a3968918f3c840d9916aaee1e0 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Mon, 27 May 2024 10:25:25 +0530 Subject: [PATCH 14/44] black linting --- backend/organizations/views.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/organizations/views.py b/backend/organizations/views.py index 46848571e..220834dd9 100644 --- a/backend/organizations/views.py +++ b/backend/organizations/views.py @@ -2713,8 +2713,7 @@ def cumulative_tasks_count(self, request, pk=None): "AudioSegmentation", "AudioTranscription", "AudioTranscriptionEditing", - "StandardisedTranscriptionEditing" - "ContextualSentenceVerification", + "StandardisedTranscriptionEditing" "ContextualSentenceVerification", "ContextualSentenceVerificationAndDomainClassification", "ContextualTranslationEditing", "ConversationTranslation", From 3b93f2d2bb44a65430706781280dd8bd921877a8 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Mon, 27 May 2024 11:34:29 +0530 Subject: [PATCH 15/44] black changes --- backend/organizations/views.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/organizations/views.py b/backend/organizations/views.py index 220834dd9..3109cf083 100644 --- a/backend/organizations/views.py +++ b/backend/organizations/views.py @@ -2713,7 +2713,8 @@ def cumulative_tasks_count(self, request, pk=None): "AudioSegmentation", "AudioTranscription", "AudioTranscriptionEditing", - "StandardisedTranscriptionEditing" "ContextualSentenceVerification", + "StandardisedTranscriptionEditing", + "ContextualSentenceVerification", "ContextualSentenceVerificationAndDomainClassification", "ContextualTranslationEditing", "ConversationTranslation", From 86967b6f18ac568043719c38c732c9f772a715ae Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Mon, 27 May 2024 16:30:48 +0530 Subject: [PATCH 16/44] modified the download endpoint --- backend/projects/utils.py | 2 +- backend/projects/views.py | 208 ++++++++++++++++++++++++-------------- 2 files changed, 133 insertions(+), 77 deletions(-) diff --git a/backend/projects/utils.py b/backend/projects/utils.py index c9416e398..a43d55bed 100644 --- a/backend/projects/utils.py +++ b/backend/projects/utils.py @@ -435,7 +435,7 @@ def process_speech_results( speakers_json, True, False, - False, + True, True, ) task["data"]["transcribed_json"] = task["data"]["final_transcribed_json"] diff --git a/backend/projects/views.py b/backend/projects/views.py index ca7f0b33c..4eedb6c32 100644 --- a/backend/projects/views.py +++ b/backend/projects/views.py @@ -1047,57 +1047,65 @@ def convert_annotation_result_to_formatted_json( ): transcribed_json = [] acoustic_transcribed_json = [] - standardised_transcription = "" + standardised_json_modified = [] transcribed_json_modified, acoustic_transcribed_json_modified = [], [] if is_StandardisedTranscriptionEditing: verbatim_transcribed_json = [] acoustic_normalised_transcribed_json = [] standardised_transcription = [] - for item in annotation_result: - if isinstance(item, str): - item = json.loads(item) - if "text" in item: - verbatim_transcribed_json.append( - { - "speaker_id": item["speaker_id"], - "start": convert_time_to_seconds(item["start_time"]), - "end": convert_time_to_seconds(item["end_time"]), - "text": item["text"], - } - ) - if "acoustic_normalised_text" in item: - acoustic_normalised_transcribed_json.append( - { - "speaker_id": item["speaker_id"], - "start": convert_time_to_seconds(item["start_time"]), - "end": convert_time_to_seconds(item["end_time"]), - "text": item["acoustic_normalised_text"], - } - ) - if "acoustic_standardized_text" in item: - standardised_transcription.append( - { - "speaker_id": item["speaker_id"], - "start": convert_time_to_seconds(item["start_time"]), - "end": convert_time_to_seconds(item["end_time"]), - "text": item["acoustic_standardized_text"], - } - ) - - complete_json = { - "verbatim_transcribed_json": verbatim_transcribed_json, - "acoustic_normalised_transcribed_json": acoustic_normalised_transcribed_json, - "standardised_transcription": standardised_transcription, - } - transcribed_json.append(complete_json) - elif is_SpeechConversation: + # for item in annotation_result: + # if isinstance(item, str): + # item = json.loads(item) + # if "text" in item: + # verbatim_transcribed_json.append( + # { + # "speaker_id": item["speaker_id"], + # "start": convert_time_to_seconds(item["start_time"]), + # "end": convert_time_to_seconds(item["end_time"]), + # "text": item["text"], + # } + # ) + # if "acoustic_normalised_text" in item: + # acoustic_normalised_transcribed_json.append( + # { + # "speaker_id": item["speaker_id"], + # "start": convert_time_to_seconds(item["start_time"]), + # "end": convert_time_to_seconds(item["end_time"]), + # "text": item["acoustic_normalised_text"], + # } + # ) + # if "acoustic_standardized_text" in item: + # standardised_transcription.append( + # { + # "speaker_id": item["speaker_id"], + # "start": convert_time_to_seconds(item["start_time"]), + # "end": convert_time_to_seconds(item["end_time"]), + # "text": item["acoustic_standardized_text"], + # } + # ) + # + # complete_json = { + # "verbatim_transcribed_json": verbatim_transcribed_json, + # "acoustic_normalised_transcribed_json": acoustic_normalised_transcribed_json, + # "standardised_transcription": standardised_transcription, + # } + # transcribed_json.append(complete_json) + if is_SpeechConversation: ids_formatted = {} for idx1 in range(len(annotation_result)): + if ( + "id" in annotation_result[idx1] + and annotation_result[idx1]["id"] in ids_formatted + ): + continue formatted_result_dict = {} labels_dict = {} text_dict = {} acoustic_text_dict = {} + st_labels_dict = {} + st_text_dict = {} + st_formatted_result_dict = {} if isinstance(annotation_result[idx1], str): annotation_result[idx1] = json.loads(annotation_result[idx1]) if annotation_result[idx1]["from_name"] == "labels": @@ -1110,11 +1118,7 @@ def convert_annotation_result_to_formatted_json( annotation_result[idx1], ensure_ascii=False ) acoustic_text_dict = annotation_result[idx1] - elif annotation_result[idx1]["from_name"] == "standardised_transcription": - standardised_transcription = annotation_result[idx1]["value"]["text"][0] - continue else: - text_dict = json.dumps(annotation_result[idx1], ensure_ascii=False) text_dict = annotation_result[idx1] for idx2 in range(idx1 + 1, len(annotation_result)): if annotation_result[idx1]["id"] == annotation_result[idx2]["id"]: @@ -1131,50 +1135,93 @@ def convert_annotation_result_to_formatted_json( text_dict = json.dumps( annotation_result[idx2], ensure_ascii=False ) - if not is_acoustic or ( - labels_dict and acoustic_text_dict and text_dict + if not is_StandardisedTranscriptionEditing: + if not is_acoustic or ( + labels_dict and acoustic_text_dict and text_dict + ): + break + elif is_StandardisedTranscriptionEditing: + if ( + annotation_result[idx2]["from_name"] == "labels" + and (idx2 + 1) < len(annotation_result) + and annotation_result[idx2 + 1]["from_name"] + == "acoustic_standardised_transcribed_json" ): - break + st_labels_dict = annotation_result[idx2] + elif ( + annotation_result[idx2]["from_name"] + == "acoustic_standardised_transcribed_json" + ): + st_text_dict = annotation_result[idx2] - if annotation_result[idx1]["id"] not in ids_formatted: - ids_formatted[annotation_result[idx1]["id"]] = "formatted" - if not labels_dict: + ids_formatted[annotation_result[idx1]["id"]] = "formatted" + if not labels_dict: + formatted_result_dict["speaker_id"] = None + else: + try: + formatted_result_dict["speaker_id"] = next( + speaker + for speaker in speakers_json + if speaker["name"] == labels_dict["value"]["labels"][0] + )["speaker_id"] + except (KeyError, StopIteration): formatted_result_dict["speaker_id"] = None + formatted_result_dict["start"] = labels_dict["value"]["start"] + formatted_result_dict["end"] = labels_dict["value"]["end"] + + if not text_dict: + formatted_result_dict["text"] = "" + else: + text_dict_json = ( + json.loads(text_dict) if isinstance(text_dict, str) else text_dict + ) + formatted_result_dict["text"] = text_dict_json["value"]["text"][0] + formatted_result_dict["start"] = text_dict_json["value"]["start"] + formatted_result_dict["end"] = text_dict_json["value"]["end"] + + transcribed_json.append(formatted_result_dict) + + if is_StandardisedTranscriptionEditing: + if not st_labels_dict: + st_formatted_result_dict["speaker_id"] = None else: try: - formatted_result_dict["speaker_id"] = next( + st_formatted_result_dict["speaker_id"] = next( speaker for speaker in speakers_json - if speaker["name"] == labels_dict["value"]["labels"][0] + if speaker["name"] == st_labels_dict["value"]["labels"][0] )["speaker_id"] except (KeyError, StopIteration): - formatted_result_dict["speaker_id"] = None - formatted_result_dict["start"] = labels_dict["value"]["start"] - formatted_result_dict["end"] = labels_dict["value"]["end"] + st_formatted_result_dict["speaker_id"] = None + st_formatted_result_dict["start"] = st_labels_dict["value"]["start"] + st_formatted_result_dict["end"] = st_labels_dict["value"]["end"] - if not text_dict: - formatted_result_dict["text"] = "" + if not st_text_dict: + st_formatted_result_dict["text"] = "" else: - text_dict_json = json.loads(text_dict) - formatted_result_dict["text"] = text_dict_json["value"]["text"][0] - formatted_result_dict["start"] = text_dict_json["value"]["start"] - formatted_result_dict["end"] = text_dict_json["value"]["end"] - - transcribed_json.append(formatted_result_dict) - - if is_acoustic: - acoustic_formatted_result_dict = deepcopy(formatted_result_dict) - acoustic_dict_json = ( - json.loads(acoustic_text_dict) - if isinstance(acoustic_text_dict, str) - else acoustic_text_dict + text_dict_json = ( + json.loads(st_text_dict) + if isinstance(st_text_dict, str) + else st_text_dict ) - acoustic_formatted_result_dict["text"] = ( - acoustic_dict_json["value"]["text"][0] - if acoustic_dict_json - else "" - ) - acoustic_transcribed_json.append(acoustic_formatted_result_dict) + st_formatted_result_dict["text"] = text_dict_json["value"]["text"][ + 0 + ] + st_formatted_result_dict["start"] = text_dict_json["value"]["start"] + st_formatted_result_dict["end"] = text_dict_json["value"]["end"] + standardised_json_modified.append(st_formatted_result_dict) + + if is_acoustic: + acoustic_formatted_result_dict = deepcopy(formatted_result_dict) + acoustic_dict_json = ( + json.loads(acoustic_text_dict) + if isinstance(acoustic_text_dict, str) + else acoustic_text_dict + ) + acoustic_formatted_result_dict["text"] = ( + acoustic_dict_json["value"]["text"][0] if acoustic_dict_json else "" + ) + acoustic_transcribed_json.append(acoustic_formatted_result_dict) if acoustic_transcribed_json: acoustic_transcribed_json_modified = json.dumps( acoustic_transcribed_json, ensure_ascii=False @@ -1240,12 +1287,21 @@ def convert_annotation_result_to_formatted_json( ) transcribed_json.append(formatted_result_dict) transcribed_json_modified = json.dumps(transcribed_json, ensure_ascii=False) + standardised_json_modified = json.dumps( + standardised_json_modified, ensure_ascii=False + ) if is_acoustic: + if is_StandardisedTranscriptionEditing: + return { + "verbatim_transcribed_json": transcribed_json_modified, + "acoustic_normalised_transcribed_json": acoustic_transcribed_json_modified, + "standardised_transcription": standardised_json_modified, + } return { "verbatim_transcribed_json": transcribed_json_modified, "acoustic_normalised_transcribed_json": acoustic_transcribed_json_modified, - "standardised_transcription": standardised_transcription, + "standardised_transcription": [], } return transcribed_json_modified From 3216c3e90a1bfd6aabb255a3ad6e81e3320a88b0 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Mon, 27 May 2024 16:33:49 +0530 Subject: [PATCH 17/44] removed commented lines --- backend/projects/views.py | 42 --------------------------------------- 1 file changed, 42 deletions(-) diff --git a/backend/projects/views.py b/backend/projects/views.py index 4eedb6c32..b5b904218 100644 --- a/backend/projects/views.py +++ b/backend/projects/views.py @@ -1049,48 +1049,6 @@ def convert_annotation_result_to_formatted_json( acoustic_transcribed_json = [] standardised_json_modified = [] transcribed_json_modified, acoustic_transcribed_json_modified = [], [] - if is_StandardisedTranscriptionEditing: - verbatim_transcribed_json = [] - acoustic_normalised_transcribed_json = [] - standardised_transcription = [] - - # for item in annotation_result: - # if isinstance(item, str): - # item = json.loads(item) - # if "text" in item: - # verbatim_transcribed_json.append( - # { - # "speaker_id": item["speaker_id"], - # "start": convert_time_to_seconds(item["start_time"]), - # "end": convert_time_to_seconds(item["end_time"]), - # "text": item["text"], - # } - # ) - # if "acoustic_normalised_text" in item: - # acoustic_normalised_transcribed_json.append( - # { - # "speaker_id": item["speaker_id"], - # "start": convert_time_to_seconds(item["start_time"]), - # "end": convert_time_to_seconds(item["end_time"]), - # "text": item["acoustic_normalised_text"], - # } - # ) - # if "acoustic_standardized_text" in item: - # standardised_transcription.append( - # { - # "speaker_id": item["speaker_id"], - # "start": convert_time_to_seconds(item["start_time"]), - # "end": convert_time_to_seconds(item["end_time"]), - # "text": item["acoustic_standardized_text"], - # } - # ) - # - # complete_json = { - # "verbatim_transcribed_json": verbatim_transcribed_json, - # "acoustic_normalised_transcribed_json": acoustic_normalised_transcribed_json, - # "standardised_transcription": standardised_transcription, - # } - # transcribed_json.append(complete_json) if is_SpeechConversation: ids_formatted = {} for idx1 in range(len(annotation_result)): From 8926ff3cc976177fb89072903695d27e34fdb430 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Thu, 30 May 2024 07:11:27 +0000 Subject: [PATCH 18/44] export changes --- backend/projects/annotation_registry.py | 2 +- .../audio/acoustic_transcription.jsx | 2 +- backend/projects/tasks.py | 74 +++++++++++++------ backend/projects/views.py | 2 +- 4 files changed, 55 insertions(+), 25 deletions(-) diff --git a/backend/projects/annotation_registry.py b/backend/projects/annotation_registry.py index 071f33678..1932ea32b 100644 --- a/backend/projects/annotation_registry.py +++ b/backend/projects/annotation_registry.py @@ -171,7 +171,7 @@ "labels", "verbatim_transcribed_json", "acoustic_normalised_transcribed_json", - "standardised_transcription", + "acoustic_standardised_transcribed_json", ], "type": ["labels", "textarea", "textarea", "textarea"], }, diff --git a/backend/projects/label_studio_jsx_files/audio/acoustic_transcription.jsx b/backend/projects/label_studio_jsx_files/audio/acoustic_transcription.jsx index a45d56507..69ab8092c 100644 --- a/backend/projects/label_studio_jsx_files/audio/acoustic_transcription.jsx +++ b/backend/projects/label_studio_jsx_files/audio/acoustic_transcription.jsx @@ -24,7 +24,7 @@