From ea482d5bfd6359dc49d406067d17794451272fa2 Mon Sep 17 00:00:00 2001
From: Ishan Gujarathi <ishan.gujarathi10@gmail.com>
Date: Fri, 19 Apr 2024 11:27:32 +0530
Subject: [PATCH 01/44] added a feature to remove user both as worskpace member
 and workspace manager when user is marked as inactive at organization level

---
 backend/users/views.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/backend/users/views.py b/backend/users/views.py
index b02aeb403..ccd44f389 100644
--- a/backend/users/views.py
+++ b/backend/users/views.py
@@ -59,6 +59,7 @@
 from rest_framework_simplejwt.tokens import RefreshToken
 from dotenv import load_dotenv
 import logging
+from workspaces.views import WorkspaceusersViewSet
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -675,6 +676,39 @@ def user_details_update(self, request, pk=None):
         user = User.objects.get(id=pk)
         serializer = UserUpdateSerializer(user, request.data, partial=True)
 
+        existing_is_active = user.is_active
+        is_active_payload = request.data.get("is_active", None)
+
+        if existing_is_active == is_active_payload:
+            pass
+        else:
+            if is_active_payload is False:
+                workspaces = Workspace.objects.filter(
+                    Q(members=user) | Q(managers=user)
+                ).distinct()
+
+                workspacecustomviewset_obj = WorkspaceCustomViewSet()
+                request.data["ids"] = [user.id]
+
+                workspaceusersviewset_obj = WorkspaceusersViewSet()
+                request.data["user_id"] = user.id
+
+                for workspace in workspaces:
+                    workspacecustomviewset_obj.unassign_manager(
+                        request=request, pk=workspace.pk
+                    )
+
+                    workspaceusersviewset_obj.remove_members(
+                        request=request, pk=workspace.pk
+                    )
+
+                return Response(
+                    {
+                        "message": "User removed from all workspaces both as workspace member and workspace manager"
+                    },
+                    status=status.HTTP_200_OK,
+                )
+
         if request.data["role"] != user.role:
             new_role = int(request.data["role"])
             old_role = int(user.role)

From 8219ef735ad63cc2565ef5d480cba084da62bcfc Mon Sep 17 00:00:00 2001
From: Ishan Gujarathi <ishan.gujarathi10@gmail.com>
Date: Tue, 23 Apr 2024 16:33:37 +0530
Subject: [PATCH 02/44] disabling user from daily emails when user is marked
 inactive at organization level

---
 backend/users/views.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/backend/users/views.py b/backend/users/views.py
index ccd44f389..2f6506ed0 100644
--- a/backend/users/views.py
+++ b/backend/users/views.py
@@ -683,6 +683,9 @@ def user_details_update(self, request, pk=None):
             pass
         else:
             if is_active_payload is False:
+                if user.enable_mail:
+                    user.enable_mail = False
+                    user.save()
                 workspaces = Workspace.objects.filter(
                     Q(members=user) | Q(managers=user)
                 ).distinct()

From 7b05c4d3a8b691ed1acb315d680b1f8f38823c18 Mon Sep 17 00:00:00 2001
From: ch20b063 <ch20b063@smail.iitm.ac.in>
Date: Thu, 25 Apr 2024 09:41:01 +0530
Subject: [PATCH 03/44] stopping_task_pr_1

---
 backend/tasks/urls.py  |  2 ++
 backend/tasks/views.py | 26 ++++++++++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/backend/tasks/urls.py b/backend/tasks/urls.py
index d4f229e6f..f74f31963 100644
--- a/backend/tasks/urls.py
+++ b/backend/tasks/urls.py
@@ -6,6 +6,7 @@
     AnnotationViewSet,
     PredictionViewSet,
     get_celery_tasks,
+    stopping_celery_tasks,
 )
 
 router = routers.DefaultRouter()
@@ -15,4 +16,5 @@
 
 urlpatterns = [
     path("get_celery_tasks/", get_celery_tasks),
+    path("stopping_celery_tasks/", stopping_celery_tasks),
 ] + router.urls
diff --git a/backend/tasks/views.py b/backend/tasks/views.py
index 43517ac06..20774687c 100644
--- a/backend/tasks/views.py
+++ b/backend/tasks/views.py
@@ -11,6 +11,11 @@
 from django.utils import timezone
 from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger
 import json
+from celery import Celery
+
+# from flower.api import Flower
+# flower_app = Flower()
+celery_app = Celery()
 
 from django.core.exceptions import ObjectDoesNotExist
 from django.http import StreamingHttpResponse, FileResponse
@@ -2598,3 +2603,24 @@ def get_celery_tasks(request):
     page_size = int(request.GET.get("page_size", 10))
     data = paginate_queryset(filtered_tasks, page_number, page_size)
     return JsonResponse(data["results"], safe=False)
+
+
+def stopping_celery_tasks(req):
+    task_id = req.GET.get("task_id")
+
+    if task_id is None:
+        return JsonResponse({"message": "Task ID is required"}, status=400)
+
+    task = celery_app.AsyncResult(task_id)
+
+    if task is None or task.state == "PENDING":
+        return JsonResponse({"message": "Task not found or not running"}, status=404)
+
+    if task.state in ["SUCCESS", "FAILURE", "REVOKED"]:
+        return JsonResponse(
+            {"message": "Task already completed or revoked"}, status=400
+        )
+
+    task.revoke(terminate=True)
+
+    return JsonResponse({"message": "Task stopped successfully"}, status=200)

From e8c8edcd27d45ed11236be27fa01ee309a097157 Mon Sep 17 00:00:00 2001
From: Ishan Gujarathi <ishan.gujarathi10@gmail.com>
Date: Mon, 29 Apr 2024 17:31:54 +0530
Subject: [PATCH 04/44] fixed the bug regarding status not getting changed in
 the Active status column

---
 backend/users/views.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/backend/users/views.py b/backend/users/views.py
index 2f6506ed0..ccdc3bd5c 100644
--- a/backend/users/views.py
+++ b/backend/users/views.py
@@ -704,7 +704,8 @@ def user_details_update(self, request, pk=None):
                     workspaceusersviewset_obj.remove_members(
                         request=request, pk=workspace.pk
                     )
-
+                user.is_active = False
+                user.save()
                 return Response(
                     {
                         "message": "User removed from all workspaces both as workspace member and workspace manager"

From 3faec989d8366d0e7b6df63c8b62c4cb7f0cc008 Mon Sep 17 00:00:00 2001
From: Pursottam6003 <rahulsah6003@gmail.com>
Date: Thu, 2 May 2024 17:19:52 +0530
Subject: [PATCH 05/44] updated the email for user analytics report

---
 backend/loging/tasks.py         | 28 ++++++++--
 backend/organizations/tasks.py  | 68 +++++++++++++++--------
 backend/utils/email_template.py | 98 +++++++++++++++++++++++++++++++++
 3 files changed, 168 insertions(+), 26 deletions(-)
 create mode 100644 backend/utils/email_template.py

diff --git a/backend/loging/tasks.py b/backend/loging/tasks.py
index 440c8047a..0cc76b84e 100644
--- a/backend/loging/tasks.py
+++ b/backend/loging/tasks.py
@@ -1,8 +1,9 @@
 from celery import shared_task
 from datetime import datetime
 from azure.storage.blob import BlobServiceClient, generate_blob_sas, BlobSasPermissions
-from django.core.mail import EmailMessage
+from django.core.mail import EmailMessage, EmailMultiAlternatives
 from django.conf import settings
+from utils.email_template import send_email_template_with_attachment
 from utils.blob_functions import (
     extract_account_key,
     extract_account_name,
@@ -29,14 +30,33 @@ def get_azure_credentials(connection_string):
 def send_email_with_url(user_email, attachment_url):
     try:
         message = "Here is the link to the generated document:"
-        email = EmailMessage(
+        compiled_msg_code = send_email_template_with_attachment(
             "Transliteration Logs",
+            user_email,
             message,
+        )
+        msg = EmailMultiAlternatives(
+            "Transliteration Logs",
+            compiled_msg_code,
             settings.DEFAULT_FROM_EMAIL,
             [user_email],
         )
-        email.attach("Generated Document", attachment_url, "text/plain")
-        email.send()
+        msg.attach_alternative(compiled_msg_code, "text/html")
+        # also attach the generated document
+        msg.attach("Generated Document", attachment_url, "text/plain")
+        msg.send()
+        # compiled_msg.attach("Generated Document", attachment_url, "text/plain")
+        # compiled_msg.send()
+
+
+        # email = EmailMessage(
+        #     "Transliteration Logs",
+        #     message,
+        #     settings.DEFAULT_FROM_EMAIL,
+        #     [user_email],
+        # )
+        # email.attach("Generated Document", attachment_url, "text/plain")
+        # email.send()
     except Exception as e:
         print(f"Failed to send email: {str(e)}")
         raise e
diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py
index a364f876d..8bb28ab45 100644
--- a/backend/organizations/tasks.py
+++ b/backend/organizations/tasks.py
@@ -3,8 +3,9 @@
 from celery import shared_task
 import pandas as pd
 from django.conf import settings
-from django.core.mail import EmailMessage
+from django.core.mail import EmailMessage, EmailMultiAlternatives
 from tasks.views import SentenceOperationViewSet
+from utils.email_template import send_email_template_with_attachment
 
 from tasks.models import (
     Task,
@@ -529,31 +530,54 @@ def send_user_reports_mail_org(
     ]
     participation_types_string = ", ".join(participation_types)
 
-    message = (
-        "Dear "
-        + str(user.username)
-        + ",\nYour user payment reports for "
-        + f"{organization.title}"
-        + " are ready.\n Thanks for contributing on Shoonya!"
-        + "\nProject Type: "
-        + f"{project_type}"
-        + "\nParticipation Types: "
-        + f"{participation_types_string}"
-        + (
-            "\nStart Date: " + f"{start_date}" + "\nEnd Date: " + f"{end_date}"
-            if start_date
-            else ""
-        )
+    message = f"""
+    <p> Your user analysis reports for  AI4Bharat are now ready for review. Kindly check the attachment below            </p>
+    <ul style="font-size: 10px; padding-left: 20px;">
+        <li><strong>Project Type:</strong> {project_type}</li>
+        <li><strong>Participation Types:</strong>{participation_types_string}</li>
+        <li><strong>Start Date:</strong> {start_date}</li>
+        <li><strong>End Date:</strong> {end_date}</li>
+    </ul>
+"""
+    compiled_code = send_email_template_with_attachment(
+        "User Analytics Report",
+        user.email,
+        message
     )
-
-    email = EmailMessage(
-        f"{organization.title}" + " Payment Reports",
-        message,
+    msg = EmailMultiAlternatives(
+        "User Analytics Report",
+        compiled_code,
         settings.DEFAULT_FROM_EMAIL,
         [user.email],
-        attachments=[(filename, content, content_type)],
     )
-    email.send()
+    msg.attach_alternative(compiled_code, "text/html")
+    msg.attach(filename, content, content_type)
+    msg.send()
+    # message = (
+    #     "Dear " 
+    #     + str(user.username)
+    #     + ",\nYour user payment reports for "
+    #     + f"{organization.title}"
+    #     + " are ready.\n Thanks for contributing on Shoonya!"
+    #     + "\nProject Type: "
+    #     + f"{project_type}"
+    #     + "\nParticipation Types: "
+    #     + f"{participation_types_string}"
+    #     + (
+    #         "\nStart Date: " + f"{start_date}" + "\nEnd Date: " + f"{end_date}"
+    #         if start_date
+    #         else ""
+    #     )
+    # )
+
+    # email = EmailMessage(
+    #     f"{organization.title}" + " Payment Reports",
+    #     message,
+    #     settings.DEFAULT_FROM_EMAIL,
+    #     [user.email],
+    #     attachments=[(filename, content, content_type)],
+    # )
+    # email.send()
 
 
 def get_counts(
diff --git a/backend/utils/email_template.py b/backend/utils/email_template.py
new file mode 100644
index 000000000..d1cbf33b0
--- /dev/null
+++ b/backend/utils/email_template.py
@@ -0,0 +1,98 @@
+from users.models import User
+def send_email_template_with_attachment(subject,user_email,message):
+    
+    user = User.objects.get(email=user_email)
+    
+    style_string = """
+            *{ margin: 0; 
+            padding: 0;
+            }
+            body {
+            font-family: "Arial", sans-serif;
+            background-color: #f2f8f8;
+            margin: 0;
+            padding: 0;
+            padding-top: 2rem;
+            }
+            .container {
+            background-color: #fff;
+            border: solid 1px #e1e1e1;
+            border-radius: 2px;
+            padding: 1.4rem;
+            max-width: 380px;
+            margin: auto;
+            }
+            .header {
+            width: fit-content;
+            margin: auto;
+            }
+            h1 {
+            font-size: 1.2rem;
+            font-weight: 300;
+            margin: 1rem 0;
+            font-family: "Segoe UI", Tahoma, Geneva, Verdana, sans-serif;
+            }
+            p {
+            font-size: 0.8rem;
+            color: #222;
+            margin: 0.8rem 0;
+            }
+            .primary {
+            color: #18621f;
+            }
+            .footer {
+            margin-top: 1rem;
+            font-size: 0.9rem;
+            }
+            .footer > * {
+            font-size: inherit;
+            }
+    """
+
+
+    html_code = f""" 
+    <!DOCTYPE html>
+                <html lang="en">
+                <head>
+                <meta charset="UTF-8" />
+                <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+                <title>User Analytics (Topic)</title>
+                <style>
+                {style_string}
+                </style>
+                </head>
+                <body>
+                <div class="container">
+                <header class="header">
+                <h3>{subject}</h3>
+                </header>
+                <main>
+                <div style="margin: 1rem auto; width: fit-content">
+                </div>
+                <div>
+                <p>
+                    Dear {user.first_name} {user.last_name},
+                </p>
+                <p>                
+                {message}
+                <p style="font-style: italic">
+                    Thanks for contributing on Shoonya!
+                </p>
+                <p style="font-size: 10px; color:grey">
+                This email was intended for {user_email} If you received it by mistake, please delete it and notify the sender immediately. 
+                </p>
+                </div>
+                </main>
+                <footer class="footer">
+                <p style="font-size: 0.8rem;">
+                Best Regards,<br />
+                Shoonya Admin
+                </p>
+                </footer>
+                </div>
+                </body>
+                </html>
+    """
+    return html_code
+
+       
\ No newline at end of file

From 5f44c3fe24eb2d949fc3e1368850f1a4e1fcbd6c Mon Sep 17 00:00:00 2001
From: Pursottam6003 <rahulsah6003@gmail.com>
Date: Thu, 2 May 2024 17:21:02 +0530
Subject: [PATCH 06/44] updated the black linting

---
 backend/loging/tasks.py         |  1 -
 backend/organizations/tasks.py  |  6 ++----
 backend/utils/email_template.py | 10 ++++------
 3 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/backend/loging/tasks.py b/backend/loging/tasks.py
index 0cc76b84e..c02c99218 100644
--- a/backend/loging/tasks.py
+++ b/backend/loging/tasks.py
@@ -48,7 +48,6 @@ def send_email_with_url(user_email, attachment_url):
         # compiled_msg.attach("Generated Document", attachment_url, "text/plain")
         # compiled_msg.send()
 
-
         # email = EmailMessage(
         #     "Transliteration Logs",
         #     message,
diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py
index 8bb28ab45..1db8df175 100644
--- a/backend/organizations/tasks.py
+++ b/backend/organizations/tasks.py
@@ -540,9 +540,7 @@ def send_user_reports_mail_org(
     </ul>
 """
     compiled_code = send_email_template_with_attachment(
-        "User Analytics Report",
-        user.email,
-        message
+        "User Analytics Report", user.email, message
     )
     msg = EmailMultiAlternatives(
         "User Analytics Report",
@@ -554,7 +552,7 @@ def send_user_reports_mail_org(
     msg.attach(filename, content, content_type)
     msg.send()
     # message = (
-    #     "Dear " 
+    #     "Dear "
     #     + str(user.username)
     #     + ",\nYour user payment reports for "
     #     + f"{organization.title}"
diff --git a/backend/utils/email_template.py b/backend/utils/email_template.py
index d1cbf33b0..81f989ca0 100644
--- a/backend/utils/email_template.py
+++ b/backend/utils/email_template.py
@@ -1,8 +1,9 @@
 from users.models import User
-def send_email_template_with_attachment(subject,user_email,message):
-    
+
+
+def send_email_template_with_attachment(subject, user_email, message):
     user = User.objects.get(email=user_email)
-    
+
     style_string = """
             *{ margin: 0; 
             padding: 0;
@@ -49,7 +50,6 @@ def send_email_template_with_attachment(subject,user_email,message):
             }
     """
 
-
     html_code = f""" 
     <!DOCTYPE html>
                 <html lang="en">
@@ -94,5 +94,3 @@ def send_email_template_with_attachment(subject,user_email,message):
                 </html>
     """
     return html_code
-
-       
\ No newline at end of file

From 2b6d8197b9e13e42c21952d382d0a39d771bfe8a Mon Sep 17 00:00:00 2001
From: ch20b063 <ch20b063@smail.iitm.ac.in>
Date: Fri, 3 May 2024 08:28:51 +0530
Subject: [PATCH 07/44] deleting and resuming task

---
 backend/tasks/urls.py  |  4 ++++
 backend/tasks/views.py | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/backend/tasks/urls.py b/backend/tasks/urls.py
index f74f31963..ceabbf4c7 100644
--- a/backend/tasks/urls.py
+++ b/backend/tasks/urls.py
@@ -7,6 +7,8 @@
     PredictionViewSet,
     get_celery_tasks,
     stopping_celery_tasks,
+    resume_celery_task,
+    delete_celery_task,
 )
 
 router = routers.DefaultRouter()
@@ -17,4 +19,6 @@
 urlpatterns = [
     path("get_celery_tasks/", get_celery_tasks),
     path("stopping_celery_tasks/", stopping_celery_tasks),
+    path("resume_celery_task/", resume_celery_task),
+    path("delete_celery_task/", delete_celery_task),
 ] + router.urls
diff --git a/backend/tasks/views.py b/backend/tasks/views.py
index 20774687c..e5430aadc 100644
--- a/backend/tasks/views.py
+++ b/backend/tasks/views.py
@@ -2605,6 +2605,7 @@ def get_celery_tasks(request):
     return JsonResponse(data["results"], safe=False)
 
 
+@api_view(["GET"])
 def stopping_celery_tasks(req):
     task_id = req.GET.get("task_id")
 
@@ -2624,3 +2625,39 @@ def stopping_celery_tasks(req):
     task.revoke(terminate=True)
 
     return JsonResponse({"message": "Task stopped successfully"}, status=200)
+
+
+@api_view(["GET"])
+def resume_celery_task(req):
+    task_id = req.GET.get("task_id")
+
+    if task_id is None:
+        return JsonResponse({"message": "Task ID is required"}, status=400)
+
+    task = celery_app.AsyncResult(task_id)
+
+    if task is None or task.state not in ["REVOKED", "FAILURE"]:
+        return JsonResponse(
+            {"message": "Task not found or cannot be resumed"}, status=400
+        )
+
+    task.revive()
+
+    return JsonResponse({"message": "Task resumed successfully"}, status=200)
+
+
+@api_view(["GET"])
+def delete_celery_task(req):
+    task_id = req.GET.get("task_id")
+
+    if task_id is None:
+        return JsonResponse({"message": "Task ID is required"}, status=400)
+
+    task = celery_app.AsyncResult(task_id)
+
+    if task is None:
+        return JsonResponse({"message": "Task not found"}, status=404)
+
+    task.forget()
+
+    return JsonResponse({"message": "Task deleted successfully"}, status=200)

From 37dab455cc811a1c8c1932d65cc67157355856b8 Mon Sep 17 00:00:00 2001
From: Pursottam6003 <rahulsah6003@gmail.com>
Date: Mon, 6 May 2024 22:45:44 +0530
Subject: [PATCH 08/44] updated the email template for the backend code

---
 backend/organizations/tasks.py  | 101 +++++++++++++------------------
 backend/users/models.py         |  21 +++++--
 backend/users/views.py          |  41 ++++++++++---
 backend/utils/email_template.py | 104 +++++++++++++++++++++++++++++---
 4 files changed, 188 insertions(+), 79 deletions(-)

diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py
index 1db8df175..86296c834 100644
--- a/backend/organizations/tasks.py
+++ b/backend/organizations/tasks.py
@@ -3,7 +3,7 @@
 from celery import shared_task
 import pandas as pd
 from django.conf import settings
-from django.core.mail import EmailMessage, EmailMultiAlternatives
+from django.core.mail import EmailMultiAlternatives
 from tasks.views import SentenceOperationViewSet
 from utils.email_template import send_email_template_with_attachment
 
@@ -516,7 +516,7 @@ def send_user_reports_mail_org(
 
     content = df.to_csv(index=False)
     content_type = "text/csv"
-    filename = f"{organization.title}_user_analytics.csv"
+    filename = f"{organization.title}_payments_analytics.csv"
 
     participation_types = [
         "Full Time"
@@ -531,7 +531,7 @@ def send_user_reports_mail_org(
     participation_types_string = ", ".join(participation_types)
 
     message = f"""
-    <p> Your user analysis reports for  AI4Bharat are now ready for review. Kindly check the attachment below            </p>
+    <p> Your {organization.title} Payments Report  under  AI4Bharat Organisation are now ready for review. Kindly check the attachment below            </p>
     <ul style="font-size: 10px; padding-left: 20px;">
         <li><strong>Project Type:</strong> {project_type}</li>
         <li><strong>Participation Types:</strong>{participation_types_string}</li>
@@ -540,10 +540,10 @@ def send_user_reports_mail_org(
     </ul>
 """
     compiled_code = send_email_template_with_attachment(
-        "User Analytics Report", user.email, message
+        "Payment Reports", user.username, message
     )
     msg = EmailMultiAlternatives(
-        "User Analytics Report",
+        f"{organization.title} Payment Reports",
         compiled_code,
         settings.DEFAULT_FROM_EMAIL,
         [user.email],
@@ -551,31 +551,6 @@ def send_user_reports_mail_org(
     msg.attach_alternative(compiled_code, "text/html")
     msg.attach(filename, content, content_type)
     msg.send()
-    # message = (
-    #     "Dear "
-    #     + str(user.username)
-    #     + ",\nYour user payment reports for "
-    #     + f"{organization.title}"
-    #     + " are ready.\n Thanks for contributing on Shoonya!"
-    #     + "\nProject Type: "
-    #     + f"{project_type}"
-    #     + "\nParticipation Types: "
-    #     + f"{participation_types_string}"
-    #     + (
-    #         "\nStart Date: " + f"{start_date}" + "\nEnd Date: " + f"{end_date}"
-    #         if start_date
-    #         else ""
-    #     )
-    # )
-
-    # email = EmailMessage(
-    #     f"{organization.title}" + " Payment Reports",
-    #     message,
-    #     settings.DEFAULT_FROM_EMAIL,
-    #     [user.email],
-    #     attachments=[(filename, content, content_type)],
-    # )
-    # email.send()
 
 
 def get_counts(
@@ -1267,25 +1242,26 @@ def send_project_analytics_mail_org(
     content = df.to_csv(index=False)
     content_type = "text/csv"
     filename = f"{organization.title}_project_analytics.csv"
+    message = f"""
+    <p> Your {organization.title} Project Analytics Report under AI4Bharat Organisation are now ready for review. Kindly check the attachment below            </p>
 
-    message = (
-        "Dear "
-        + str(user.username)
-        + ",\nYour project analysis reports for "
-        + f"{organization.title}"
-        + " are ready.\n Thanks for contributing on Shoonya!"
-        + "\nProject Type: "
-        + f"{project_type}"
+    <ul style="font-size: 10px; padding-left: 20px;">
+        <li><strong>Project Type:</strong> {project_type}</li>
+        <li><strong>Language:</strong> {selected_language}</li>
+    </ul>   
+"""
+    compiled_code = send_email_template_with_attachment(
+        "Project Analytics", user.username, message
     )
-
-    email = EmailMessage(
-        f"{organization.title}" + " Project Analytics",
-        message,
+    msg = EmailMultiAlternatives(
+        f"{organization.title} Project Analytics",
+        compiled_code,
         settings.DEFAULT_FROM_EMAIL,
         [user.email],
-        attachments=[(filename, content, content_type)],
     )
-    email.send()
+    msg.attach_alternative(compiled_code, "text/html")
+    msg.attach(filename, content, content_type)
+    msg.send()
 
 
 @shared_task(queue="reports")
@@ -1474,21 +1450,30 @@ def send_user_analytics_mail_org(
     content_type = "text/csv"
     filename = f"{organization.title}_user_analytics.csv"
 
-    message = (
-        "Dear "
-        + str(user.username)
-        + ",\nYour user analysis reports for "
-        + f"{organization.title}"
-        + " are ready.\n Thanks for contributing on Shoonya!"
-        + "\nProject Type: "
-        + f"{project_type}"
+    project_progress_stage_name = "All Stage"
+    if project_progress_stage == ANNOTATION_STAGE:
+        project_progress_stage_name = "Annotation"
+    elif project_progress_stage == REVIEW_STAGE:
+        project_progress_stage_name = "Review"
+    else:
+        project_progress_stage_name = "Super Check"
+    message = f"""
+    <p> Your {organization.title} User Analytics Report under AI4Bharat Organisation are now ready for review. Kindly check the attachment below  </p>
+    <ul style="font-size: 10px; padding-left: 20px;">
+        <li><strong>Project Type:</strong> {project_type}</li>
+        <li><strong>Progress Stage:</strong>{project_progress_stage_name}</li>
+        <li><strong>Target Language:</strong>{tgt_language}</li>
+    </ul>
+"""
+    compiled_code = send_email_template_with_attachment(
+        "User Analytics", user.username, message
     )
-
-    email = EmailMessage(
-        f"{organization.title}" + " User Analytics",
-        message,
+    msg = EmailMultiAlternatives(
+        f"{organization.title} User Analytics",
+        compiled_code,
         settings.DEFAULT_FROM_EMAIL,
         [user.email],
-        attachments=[(filename, content, content_type)],
     )
-    email.send()
+    msg.attach_alternative(compiled_code, "text/html")
+    msg.attach(filename, content, content_type)
+    msg.send()
diff --git a/backend/users/models.py b/backend/users/models.py
index c391d5f23..b93172f33 100644
--- a/backend/users/models.py
+++ b/backend/users/models.py
@@ -9,7 +9,7 @@
 import jwt
 from datetime import datetime, timedelta
 
-from django.core.mail import send_mail
+from django.core.mail import send_mail, EmailMultiAlternatives
 from django.db import models
 from django.db.models.signals import post_delete
 from django.dispatch import receiver
@@ -26,6 +26,7 @@
 
 from .utils import hash_upload
 from .managers import UserManager
+from utils.email_template import send_email_template
 
 # List of Indic languages
 LANG_CHOICES = (
@@ -282,12 +283,24 @@ def send_mail_to_change_password(self, email, key):
         prefix = os.getenv("FRONTEND_URL_FOR_RESET_PASSWORD")
         link = f"{prefix}/#/forget-password/confirm/{key}/{sent_token}"
         try:
-            send_mail(
-                "Reset password link for shoonya",
-                f"Hello! Please click on the following link to reset your password - {link}",
+            subject = "Reset Password Link For Shoonya"
+            message = f"<p> Hello! Please click on the following link to reset your password - {link} </p>"
+
+            compiled_code = send_email_template(subject, message)
+            msg = EmailMultiAlternatives(
+                subject,
+                compiled_code,
                 settings.DEFAULT_FROM_EMAIL,
                 [email],
             )
+            msg.attach_alternative(compiled_code, "text/html")
+            msg.send()
+            # send_mail(
+            #     "Reset password link for shoonya",
+            #     f"Hello! Please click on the following link to reset your password - {link}",
+            #     settings.DEFAULT_FROM_EMAIL,
+            #     [email],
+            # )
         except SMTPAuthenticationError:
             raise Exception(
                 "Failed to authenticate with the SMTP server. Check your email settings."
diff --git a/backend/users/views.py b/backend/users/views.py
index 320a06901..a9e0c868d 100644
--- a/backend/users/views.py
+++ b/backend/users/views.py
@@ -31,6 +31,7 @@
 from organizations.decorators import is_organization_owner
 from users.models import LANG_CHOICES, User, CustomPeriodicTask
 from rest_framework.decorators import action
+from utils.email_template import send_email_template
 from tasks.models import (
     Task,
     ANNOTATOR_ANNOTATION,
@@ -54,7 +55,7 @@
 from datetime import datetime
 import calendar
 from django.conf import settings
-from django.core.mail import send_mail
+from django.core.mail import send_mail, EmailMultiAlternatives
 from workspaces.views import WorkspaceCustomViewSet
 from .utils import generate_random_string, get_role_name
 from rest_framework_simplejwt.tokens import RefreshToken
@@ -676,19 +677,43 @@ def update_email(self, request):
             old_email_update_code = generate_random_string(10)
             new_email_verification_code = generate_random_string(10)
 
-            send_mail(
-                "Email Verification",
-                f"Your email verification code is:{old_email_update_code}",
+            subject = "Email Verification"
+            message = f"<p>Your email verification code is:{old_email_update_code}</p>"
+
+            compiled_code = send_email_template(subject, message)
+
+            msg = EmailMultiAlternatives(
+                subject,
+                message,
                 settings.DEFAULT_FROM_EMAIL,
                 [user.email],
             )
-
-            send_mail(
-                "Email Verification",
-                f"Your email verification code is:{new_email_verification_code}",
+            msg.attach_alternative(compiled_code, "text/html")
+            msg.send()
+
+            # send_mail(
+            #     "Email Verification",
+            #     f"Your email verification code is:{old_email_update_code}",
+            #     settings.DEFAULT_FROM_EMAIL,
+            #     [user.email],
+            # )
+
+            # send_mail(
+            #     "Email Verification",
+            #     f"Your email verification code is:{new_email_verification_code}",
+            #     settings.DEFAULT_FROM_EMAIL,
+            #     [unverified_email],
+            # )
+
+            message = f"Your email verification code is: {new_email_verification_code} "
+            msg1 = EmailMultiAlternatives(
+                subject,
+                message,
                 settings.DEFAULT_FROM_EMAIL,
                 [unverified_email],
             )
+            msg1.attach_alternative(compiled_code, "text/html")
+            msg1.send()
 
             user.unverified_email = unverified_email
             user.old_email_update_code = old_email_update_code
diff --git a/backend/utils/email_template.py b/backend/utils/email_template.py
index 81f989ca0..a1f15efb9 100644
--- a/backend/utils/email_template.py
+++ b/backend/utils/email_template.py
@@ -1,9 +1,4 @@
-from users.models import User
-
-
-def send_email_template_with_attachment(subject, user_email, message):
-    user = User.objects.get(email=user_email)
-
+def send_email_template_with_attachment(subject, username, message):
     style_string = """
             *{ margin: 0; 
             padding: 0;
@@ -56,7 +51,7 @@ def send_email_template_with_attachment(subject, user_email, message):
                 <head>
                 <meta charset="UTF-8" />
                 <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-                <title>User Analytics (Topic)</title>
+                <title>{subject}</title>
                 <style>
                 {style_string}
                 </style>
@@ -71,7 +66,7 @@ def send_email_template_with_attachment(subject, user_email, message):
                 </div>
                 <div>
                 <p>
-                    Dear {user.first_name} {user.last_name},
+                    Dear {username},
                 </p>
                 <p>                
                 {message}
@@ -79,7 +74,98 @@ def send_email_template_with_attachment(subject, user_email, message):
                     Thanks for contributing on Shoonya!
                 </p>
                 <p style="font-size: 10px; color:grey">
-                This email was intended for {user_email} If you received it by mistake, please delete it and notify the sender immediately. 
+                This email was intended for {username} If you received it by mistake, please delete it and notify the sender immediately. 
+                </p>
+                </div>
+                </main>
+                <footer class="footer">
+                <p style="font-size: 0.8rem;">
+                Best Regards,<br />
+                Shoonya Admin
+                </p>
+                </footer>
+                </div>
+                </body>
+                </html>
+    """
+    return html_code
+
+
+def send_email_template(subject, message):
+    style_string = """
+            *{ margin: 0; 
+            padding: 0;
+            }
+            body {
+            font-family: "Arial", sans-serif;
+            background-color: #f2f8f8;
+            margin: 0;
+            padding: 0;
+            padding-top: 2rem;
+            }
+            .container {
+            background-color: #fff;
+            border: solid 1px #e1e1e1;
+            border-radius: 2px;
+            padding: 1.4rem;
+            max-width: 380px;
+            margin: auto;
+            }
+            .header {
+            width: fit-content;
+            margin: auto;
+            }
+            h1 {
+            font-size: 1.2rem;
+            font-weight: 300;
+            margin: 1rem 0;
+            font-family: "Segoe UI", Tahoma, Geneva, Verdana, sans-serif;
+            }
+            p {
+            font-size: 0.8rem;
+            color: #222;
+            margin: 0.8rem 0;
+            }
+            .primary {
+            color: #18621f;
+            }
+            .footer {
+            margin-top: 1rem;
+            font-size: 0.9rem;
+            }
+            .footer > * {
+            font-size: inherit;
+            }
+    """
+
+    html_code = f""" 
+    <!DOCTYPE html>
+                <html lang="en">
+                <head>
+                <meta charset="UTF-8" />
+                <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+                <title>{subject}</title>
+                <style>
+                {style_string}
+                </style>
+                </head>
+                <body>
+                <div class="container">
+                <header class="header">
+                <h3>{subject}</h3>
+                </header>
+                <main>
+                <div style="margin: 1rem auto; width: fit-content">
+                </div>
+                <div>
+                    <p>
+                        Dear User,
+                    </p>
+                              
+                {message}
+
+                <p style="font-size: 10px; color:grey">
+                This is an automated email. Please do not reply to this email.
                 </p>
                 </div>
                 </main>

From 42966df6c94a12f4e7aab187a8775a6624b3d2bc Mon Sep 17 00:00:00 2001
From: Pursottam6003 <rahulsah6003@gmail.com>
Date: Mon, 6 May 2024 23:06:59 +0530
Subject: [PATCH 09/44] changed the time formatting

---
 backend/organizations/tasks.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py
index 86296c834..225888893 100644
--- a/backend/organizations/tasks.py
+++ b/backend/organizations/tasks.py
@@ -529,14 +529,16 @@ def send_user_reports_mail_org(
         for participation_type in participation_types
     ]
     participation_types_string = ", ".join(participation_types)
-
+    # Format the start_date and end_date
+    start_date = start_date.strftime("%Y-%m-%d %H:%M:%S %Z")
+    end_date = end_date.strftime("%Y-%m-%d %H:%M:%S %Z")
     message = f"""
     <p> Your {organization.title} Payments Report  under  AI4Bharat Organisation are now ready for review. Kindly check the attachment below            </p>
     <ul style="font-size: 10px; padding-left: 20px;">
         <li><strong>Project Type:</strong> {project_type}</li>
         <li><strong>Participation Types:</strong>{participation_types_string}</li>
-        <li><strong>Start Date:</strong> {start_date}</li>
-        <li><strong>End Date:</strong> {end_date}</li>
+        <li><strong>Start Date:</strong> {start_date} UTC</li>
+        <li><strong>End Date:</strong> {end_date} UTC </li>
     </ul>
 """
     compiled_code = send_email_template_with_attachment(

From 40a96bebda9660b3e2a394e182a35160dca1d959 Mon Sep 17 00:00:00 2001
From: Ishan Gujarathi <ishan.gujarathi10@gmail.com>
Date: Wed, 8 May 2024 16:14:06 +0530
Subject: [PATCH 10/44] added a feature to remove user from frozen users list
 when user is marked active again being marked inactive

---
 backend/users/views.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/backend/users/views.py b/backend/users/views.py
index 8b864a487..cbaa32455 100644
--- a/backend/users/views.py
+++ b/backend/users/views.py
@@ -902,6 +902,19 @@ def user_details_update(self, request, pk=None):
                     },
                     status=status.HTTP_200_OK,
                 )
+            else:
+                if is_active_payload is True:
+                    workspaces = Workspace.objects.filter(
+                        Q(members=user) | Q(managers=user)
+                    ).distinct()
+
+                workspaceusersviewset_obj = WorkspaceusersViewSet()
+                request.data["user_id"] = user.id
+
+                for workspace in workspaces:
+                    workspaceusersviewset_obj.remove_frozen_user(
+                        request=request, pk=workspace.pk
+                    )
 
         if request.data["role"] != user.role:
             new_role = int(request.data["role"])

From f0d05bcf3d7cb72a86e860fcca3d7284dce88fe2 Mon Sep 17 00:00:00 2001
From: Pursottam6003 <rahulsah6003@gmail.com>
Date: Mon, 13 May 2024 19:20:44 +0530
Subject: [PATCH 11/44] updated the changes for new project creation

---
 backend/projects/annotation_registry.py | 12 +++++++
 backend/projects/views.py               | 44 +++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/backend/projects/annotation_registry.py b/backend/projects/annotation_registry.py
index 2dddab5d3..071f33678 100644
--- a/backend/projects/annotation_registry.py
+++ b/backend/projects/annotation_registry.py
@@ -164,6 +164,18 @@
             "type": ["labels", "textarea", "textarea", "textarea"],
         },
     },
+    "StandardizedTranscriptionEditing": {
+        "transcribed_json": {
+            "to_name": "audio_url",
+            "from_name": [
+                "labels",
+                "verbatim_transcribed_json",
+                "acoustic_normalised_transcribed_json",
+                "standardised_transcription",
+            ],
+            "type": ["labels", "textarea", "textarea", "textarea"],
+        },
+    },
 }
 
 
diff --git a/backend/projects/views.py b/backend/projects/views.py
index 579e26ee1..a1221c772 100644
--- a/backend/projects/views.py
+++ b/backend/projects/views.py
@@ -914,6 +914,50 @@ def convert_prediction_json_to_annotation_result(pk, proj_type):
             # mainly label_dict and text_dict are sent as result
             result.append(label_dict)
             result.append(text_dict)
+    elif proj_type == "StandardisedTranscriptionEditing":
+        # convert the prediction_json to a concatinated transcribed_json
+        data_item = SpeechConversation.objects.get(pk=pk)
+        prediction_json = (
+            json.loads(data_item.prediction_json)
+            if isinstance(data_item.prediction_json, str)
+            else data_item.prediction_json
+        )
+        speakers_json = data_item.speakers_json
+        audio_duration = data_item.audio_duration
+        # converting prediction_json to result (wherever it exists) for every task.
+        if prediction_json == None:
+            return result
+        # Initialize variables
+        concatenated_text = ""
+        min_start_time = float("inf")
+        max_end_time = float("-inf")
+
+        for idx, val in enumerate(prediction_json):
+            # Concatenate the text
+            concatenated_text += val["text"] + " "
+
+            # Update the minimum start time and maximum end time
+            min_start_time = min(min_start_time, val["start"])
+            max_end_time = max(max_end_time, val["end"])
+
+        # Create a single dictionary to store the result
+        result_dict = {
+            "origin": "manual",
+            "to_name": "audio_url",
+            "from_name": "transcribed_json",
+            "original_length": audio_duration,
+            "id": f"shoonya_{generate_random_string(13)}",
+            "type": "textarea",
+            "value": {
+                "start": min_start_time,
+                "end": max_end_time,
+                "text": [concatenated_text],  # Remove trailing space
+            },
+        }
+
+        # Clear the result array and append the single result dictionary
+        result.clear()
+        result.append(result_dict)
     elif (
         proj_type == "OCRTranscriptionEditing"
         or proj_type == "OCRSegmentCategorizationEditing"

From ef047a74843c3ad5b0874859c4d86035b399b634 Mon Sep 17 00:00:00 2001
From: Pursottam6003 <rahulsah6003@gmail.com>
Date: Sun, 19 May 2024 00:25:15 +0530
Subject: [PATCH 12/44] made changes to create project

---
 backend/dataset/models.py              |  7 ++-
 backend/organizations/views.py         |  1 +
 backend/projects/models.py             |  1 -
 backend/projects/project_registry.yaml | 23 ++++++++++
 backend/projects/tasks.py              |  2 +-
 backend/projects/views.py              | 62 +++++++++++++++++++++++++-
 6 files changed, 91 insertions(+), 5 deletions(-)

diff --git a/backend/dataset/models.py b/backend/dataset/models.py
index ec16c9903..ac5d4a4db 100644
--- a/backend/dataset/models.py
+++ b/backend/dataset/models.py
@@ -484,7 +484,12 @@ class SpeechConversation(DatasetBase):
         blank=True,
         help_text=("Prepopulated prediction for the implemented models"),
     )
-
+    final_transcribed_json = models.JSONField(
+        verbose_name="final_transcribed_json",
+        null=True,
+        blank=True,
+        help_text=("Field where data from this standardised_transcription_editing type will be exported."),
+    )
     def __str__(self):
         return str(self.id)
 
diff --git a/backend/organizations/views.py b/backend/organizations/views.py
index 7d5e7b726..46848571e 100644
--- a/backend/organizations/views.py
+++ b/backend/organizations/views.py
@@ -2713,6 +2713,7 @@ def cumulative_tasks_count(self, request, pk=None):
                 "AudioSegmentation",
                 "AudioTranscription",
                 "AudioTranscriptionEditing",
+                "StandardisedTranscriptionEditing"
                 "ContextualSentenceVerification",
                 "ContextualSentenceVerificationAndDomainClassification",
                 "ContextualTranslationEditing",
diff --git a/backend/projects/models.py b/backend/projects/models.py
index da5034cd4..f7eb1d487 100644
--- a/backend/projects/models.py
+++ b/backend/projects/models.py
@@ -249,7 +249,6 @@ class Project(models.Model):
             "Maximum no. of tasks assigned to a user which are at unlabeled stage, as a threshold for pulling new tasks"
         ),
     )
-
     # enable_task_reviews = models.BooleanField(
     #     verbose_name="enable_task_reviews",
     #     default=False,
diff --git a/backend/projects/project_registry.yaml b/backend/projects/project_registry.yaml
index b4c0c8d85..853f8d4ad 100644
--- a/backend/projects/project_registry.yaml
+++ b/backend/projects/project_registry.yaml
@@ -395,3 +395,26 @@ Audio:
         fields: 
           annotations: 
             - transcribed_json
+    StandardizedTranscriptionEditing: 
+      project_mode: "Annotation" 
+      label_studio_jsx_file: "audio/acoustic_transcription.jsx" 
+      input_dataset: 
+        class: SpeechConversation 
+        fields: 
+          - audio_url 
+          - reference_raw_transcript 
+          - audio_duration 
+          - scenario 
+          - domain 
+          - speakers_json
+        display_fields: 
+          - scenario 
+          - audio_url 
+        prediction: machine_transcribed_json 
+      output_dataset:  
+        class: SpeechConversation 
+        save_type: in_place 
+        fields: 
+          annotations: 
+            - transcribed_json
+
diff --git a/backend/projects/tasks.py b/backend/projects/tasks.py
index 8f0bd51c1..dd5eddccd 100644
--- a/backend/projects/tasks.py
+++ b/backend/projects/tasks.py
@@ -314,7 +314,7 @@ def filter_data_items(
 #### CELERY SHARED TASKS
 
 
-@shared_task
+# @shared_task
 def create_parameters_for_task_creation(
     project_type,
     dataset_instance_ids,
diff --git a/backend/projects/views.py b/backend/projects/views.py
index a1221c772..56f3f006b 100644
--- a/backend/projects/views.py
+++ b/backend/projects/views.py
@@ -1039,6 +1039,7 @@ def convert_annotation_result_to_formatted_json(
     is_SpeechConversation,
     is_OCRSegmentCategorizationOROCRSegmentCategorizationEditing,
     is_acoustic=False,
+    is_StandardisedTranscriptionEditing=False,
 ):
     transcribed_json = []
     acoustic_transcribed_json = []
@@ -1132,6 +1133,37 @@ def convert_annotation_result_to_formatted_json(
             acoustic_transcribed_json_modified = json.dumps(
                 acoustic_transcribed_json, ensure_ascii=False
             )
+
+    elif is_StandardisedTranscriptionEditing:
+        '''
+            in need to convert in this format 
+                text": "dummy format",
+                "end_time": "00:00:10.448",
+                "speaker_id": "Speaker 0",
+                "start_time": "00:00:00.000",
+                "id": 1,
+                "acoustic_normalised_text : text
+        '''
+
+        for idx1 in range(0, len(annotation_result), 2):
+            formatted_result_dict = {}
+            text_dict = {}
+            acoustic_text_dict = {}
+            if isinstance(annotation_result[idx1], str):
+                annotation_result[idx1] = json.loads(annotation_result[idx1])
+            if isinstance(annotation_result[idx1 + 1], str):
+                annotation_result[idx1 + 1] = json.loads(annotation_result[idx1 + 1])
+            text_dict = annotation_result[idx1]
+            acoustic_text_dict = annotation_result[idx1 + 1]
+            formatted_result_dict["text"] = text_dict["value"]["text"][0]
+            formatted_result_dict["start_time"] = text_dict["value"]["start"]
+            formatted_result_dict["end_time"] = text_dict["value"]["end"]
+            formatted_result_dict["speaker_id"] = text_dict["value"]["speaker_id"]
+            formatted_result_dict["id"] = text_dict["id"]
+            formatted_result_dict["acoustic_normalised_text"] = acoustic_text_dict["value"]["text"][0]
+            transcribed_json.append(formatted_result_dict)
+
+        
     else:
         dicts = 2 if is_OCRSegmentCategorizationOROCRSegmentCategorizationEditing else 3
         for idx1 in range(0, len(annotation_result), dicts):
@@ -1200,6 +1232,13 @@ def convert_annotation_result_to_formatted_json(
             "acoustic_normalised_transcribed_json": acoustic_transcribed_json_modified,
             "standardised_transcription": standardised_transcription,
         }
+    
+    if is_StandardisedTranscriptionEditing: 
+        return {
+            "verbatim_transcribed_json": transcribed_json_modified,
+            "acoustic_normalised_transcribed_json": acoustic_transcribed_json_modified,
+            "standardised_transcription": standardised_transcription,            
+        }
 
     return transcribed_json_modified
 
@@ -2188,7 +2227,7 @@ def create(self, request, *args, **kwargs):
             proj.save()
 
             # Function call to create the paramters for the sampling and filtering of sentences
-            create_parameters_for_task_creation.delay(
+            create_parameters_for_task_creation(
                 project_type=project_type,
                 dataset_instance_ids=dataset_instance_ids,
                 filter_string=filter_string,
@@ -2409,7 +2448,21 @@ def assign_new_tasks(self, request, pk, *args, **kwargs):
                 "AudioTranscriptionEditing",
                 "OCRTranscriptionEditing",
                 "OCRSegmentCategorizationEditing",
-            ]:
+                "StandardisedTranscriptionEditing",
+            ]:  
+                
+                if project.project_type == "StandardisedTranscriptionEditing":
+                    try:
+                        #gather trascribed_json
+                        result = convert_annotation_result_to_formatted_json(
+                            task.input_data.id, project.project_type, is_StandardisedTranscriptionEditing=True
+                        )
+                    except Exception as e:
+                        print(
+                            f"The prediction json of the data item-{task.input_data.id} is corrupt."
+                        )
+                        task.delete()
+                        continue
                 try:
                     result = convert_prediction_json_to_annotation_result(
                         task.input_data.id, project.project_type
@@ -2423,6 +2476,7 @@ def assign_new_tasks(self, request, pk, *args, **kwargs):
             annotator_anno_count = Annotation_model.objects.filter(
                 task_id=task, annotation_type=ANNOTATOR_ANNOTATION
             ).count()
+
             if annotator_anno_count < project.required_annotators_per_task:
                 cur_user_anno_count = Annotation_model.objects.filter(
                     task_id=task,
@@ -4135,6 +4189,7 @@ def download(self, request, pk=None, *args, **kwargs):
                 project_type == "OCRSegmentCategorizationEditing"
             )
             is_OCRSegmentCategorization = project_type == "OCRSegmentCategorization"
+            is_StandardizedTranscriptionEditing = project_type = "StandardizedTranscriptionEditing"
             for task in tasks:
                 curr_task = process_task(
                     task,
@@ -4153,6 +4208,9 @@ def download(self, request, pk=None, *args, **kwargs):
                         is_ConversationTranslation,
                         is_ConversationVerification,
                     )
+                elif is_StandardizedTranscriptionEditing:
+                    pass 
+
                 elif dataset_type in ["SpeechConversation", "OCRDocument"]:
                     is_SpeechConversation = dataset_type == "SpeechConversation"
                     if is_SpeechConversation:

From cf8c38649f8e9049f0a0414a5258eaf1d0f80b93 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Mon, 27 May 2024 10:23:23 +0530
Subject: [PATCH 13/44] added changes for StandardizedTranscriptionEditing
 project type

---
 ...eechconversation_final_transcribed_json.py |  22 ++
 backend/dataset/models.py                     |   5 +-
 .../0053_alter_project_project_type.py        |  63 ++++++
 backend/projects/project_registry.yaml        |   2 +-
 backend/projects/tasks.py                     |  25 ++-
 backend/projects/utils.py                     |  86 +++++++-
 backend/projects/views.py                     | 204 +++++++++---------
 backend/tasks/views.py                        | 118 ++++++----
 .../migrations/0034_alter_user_is_approved.py |  21 ++
 .../convert_result_to_chitralekha_format.py   |  50 +++--
 10 files changed, 430 insertions(+), 166 deletions(-)
 create mode 100644 backend/dataset/migrations/0047_speechconversation_final_transcribed_json.py
 create mode 100644 backend/projects/migrations/0053_alter_project_project_type.py
 create mode 100644 backend/users/migrations/0034_alter_user_is_approved.py

diff --git a/backend/dataset/migrations/0047_speechconversation_final_transcribed_json.py b/backend/dataset/migrations/0047_speechconversation_final_transcribed_json.py
new file mode 100644
index 000000000..1c9837814
--- /dev/null
+++ b/backend/dataset/migrations/0047_speechconversation_final_transcribed_json.py
@@ -0,0 +1,22 @@
+# Generated by Django 3.2.14 on 2024-05-21 06:02
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("dataset", "0046_merge_20240416_2233"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="speechconversation",
+            name="final_transcribed_json",
+            field=models.JSONField(
+                blank=True,
+                help_text="Field where data from this standardised_transcription_editing type will be exported.",
+                null=True,
+                verbose_name="final_transcribed_json",
+            ),
+        ),
+    ]
diff --git a/backend/dataset/models.py b/backend/dataset/models.py
index ac5d4a4db..c1432cbd3 100644
--- a/backend/dataset/models.py
+++ b/backend/dataset/models.py
@@ -488,8 +488,11 @@ class SpeechConversation(DatasetBase):
         verbose_name="final_transcribed_json",
         null=True,
         blank=True,
-        help_text=("Field where data from this standardised_transcription_editing type will be exported."),
+        help_text=(
+            "Field where data from this standardised_transcription_editing type will be exported."
+        ),
     )
+
     def __str__(self):
         return str(self.id)
 
diff --git a/backend/projects/migrations/0053_alter_project_project_type.py b/backend/projects/migrations/0053_alter_project_project_type.py
new file mode 100644
index 000000000..1e0aee436
--- /dev/null
+++ b/backend/projects/migrations/0053_alter_project_project_type.py
@@ -0,0 +1,63 @@
+# Generated by Django 3.2.14 on 2024-05-21 06:02
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("projects", "0052_alter_project_project_type"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="project",
+            name="project_type",
+            field=models.CharField(
+                choices=[
+                    ("MonolingualTranslation", "MonolingualTranslation"),
+                    ("TranslationEditing", "TranslationEditing"),
+                    (
+                        "SemanticTextualSimilarity_Scale5",
+                        "SemanticTextualSimilarity_Scale5",
+                    ),
+                    ("ContextualTranslationEditing", "ContextualTranslationEditing"),
+                    ("OCRTranscription", "OCRTranscription"),
+                    ("OCRTranscriptionEditing", "OCRTranscriptionEditing"),
+                    ("OCRSegmentCategorization", "OCRSegmentCategorization"),
+                    (
+                        "OCRSegmentCategorizationEditing",
+                        "OCRSegmentCategorizationEditing",
+                    ),
+                    ("MonolingualCollection", "MonolingualCollection"),
+                    ("SentenceSplitting", "SentenceSplitting"),
+                    (
+                        "ContextualSentenceVerification",
+                        "ContextualSentenceVerification",
+                    ),
+                    (
+                        "ContextualSentenceVerificationAndDomainClassification",
+                        "ContextualSentenceVerificationAndDomainClassification",
+                    ),
+                    ("ConversationTranslation", "ConversationTranslation"),
+                    (
+                        "ConversationTranslationEditing",
+                        "ConversationTranslationEditing",
+                    ),
+                    ("ConversationVerification", "ConversationVerification"),
+                    ("AudioTranscription", "AudioTranscription"),
+                    ("AudioSegmentation", "AudioSegmentation"),
+                    ("AudioTranscriptionEditing", "AudioTranscriptionEditing"),
+                    (
+                        "AcousticNormalisedTranscriptionEditing",
+                        "AcousticNormalisedTranscriptionEditing",
+                    ),
+                    (
+                        "StandardizedTranscriptionEditing",
+                        "StandardizedTranscriptionEditing",
+                    ),
+                ],
+                help_text="Project Type indicating the annotation task",
+                max_length=100,
+            ),
+        ),
+    ]
diff --git a/backend/projects/project_registry.yaml b/backend/projects/project_registry.yaml
index 853f8d4ad..e2de18292 100644
--- a/backend/projects/project_registry.yaml
+++ b/backend/projects/project_registry.yaml
@@ -416,5 +416,5 @@ Audio:
         save_type: in_place 
         fields: 
           annotations: 
-            - transcribed_json
+            - final_transcribed_json
 
diff --git a/backend/projects/tasks.py b/backend/projects/tasks.py
index dd5eddccd..90e28df17 100644
--- a/backend/projects/tasks.py
+++ b/backend/projects/tasks.py
@@ -314,7 +314,7 @@ def filter_data_items(
 #### CELERY SHARED TASKS
 
 
-# @shared_task
+@shared_task
 def create_parameters_for_task_creation(
     project_type,
     dataset_instance_ids,
@@ -452,6 +452,9 @@ def export_project_in_place(
     is_AcousticNormalisedTranscriptionEditing = (
         project_type == "AcousticNormalisedTranscriptionEditing"
     )
+    is_StandardizedTranscriptionEditing = (
+        project_type == "StandardizedTranscriptionEditing"
+    )
     is_ConversationVerification = project.project_type == "ConversationVerification"
     bboxes_relation_json = []
     annotated_document_details_json = {}
@@ -464,7 +467,10 @@ def export_project_in_place(
                 print(error)
                 export_excluded_task_ids.append(task.id)
                 continue
-            if is_AcousticNormalisedTranscriptionEditing:
+            if (
+                is_AcousticNormalisedTranscriptionEditing
+                or is_StandardizedTranscriptionEditing
+            ):
                 try:
                     ta_transcribed_json = json.loads(ta["verbatim_transcribed_json"])
                 except json.JSONDecodeError:
@@ -507,7 +513,11 @@ def export_project_in_place(
                 # We need to store the rating in integer format
                 if field == "rating":
                     setattr(data_item, field, int(ta[field]))
-                elif field == "transcribed_json" or field == "prediction_json":
+                elif (
+                    field == "transcribed_json"
+                    or field == "prediction_json"
+                    or field == "final_transcribed_json"
+                ):
                     speakers_details = data_item.speakers_json
                     for idx in range(len(ta_transcribed_json)):
                         ta_labels[idx]["text"] = ta_transcribed_json[idx]
@@ -522,7 +532,10 @@ def export_project_in_place(
                             temp = deepcopy(ta_labels[idx])
                             temp["text"] = ta_acoustic_transcribed_json[idx]
                             ta_acoustic_transcribed_json[idx] = temp
-                    if is_AcousticNormalisedTranscriptionEditing:
+                    if (
+                        is_AcousticNormalisedTranscriptionEditing
+                        or is_StandardizedTranscriptionEditing
+                    ):
                         try:
                             standardised_transcription = json.loads(
                                 ta["standardised_transcription"]
@@ -538,6 +551,10 @@ def export_project_in_place(
                             "acoustic_normalised_transcribed_json": ta_acoustic_transcribed_json,
                             "standardised_transcription": standardised_transcription,
                         }
+                        if is_StandardizedTranscriptionEditing:
+                            setattr(
+                                data_item, "final_transcribed_json", ta_transcribed_json
+                            )
                         setattr(data_item, field, ta_transcribed_json)
                     else:
                         setattr(data_item, field, ta_labels)
diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index 9408d44ce..c9416e398 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -13,7 +13,7 @@
 from users.models import User
 from django.forms import model_to_dict
 
-from dataset.models import Conversation
+from dataset.models import Conversation, SpeechConversation
 from tasks.models import (
     Annotation,
     ANNOTATED,
@@ -419,10 +419,26 @@ def process_speech_results(
 ):
     from projects.views import convert_annotation_result_to_formatted_json
 
+    is_StandardizedTranscriptionEditing = (
+        project_type == "StandardizedTranscriptionEditing"
+    )
+
     if is_audio_segmentation:
         task["data"]["prediction_json"] = convert_annotation_result_to_formatted_json(
             annotation_result, speakers_json, True, False, False
         )
+    elif is_StandardizedTranscriptionEditing:
+        task["data"][
+            "final_transcribed_json"
+        ] = convert_annotation_result_to_formatted_json(
+            annotation_result,
+            speakers_json,
+            True,
+            False,
+            False,
+            True,
+        )
+        task["data"]["transcribed_json"] = task["data"]["final_transcribed_json"]
     else:
         task["data"]["transcribed_json"] = convert_annotation_result_to_formatted_json(
             annotation_result,
@@ -516,3 +532,71 @@ def process_task(
         task_dict["data"] = data
 
     return OrderedDict(task_dict)
+
+
+def convert_time_to_seconds(time_str):
+    # Split the time string into hours, minutes, seconds, and milliseconds
+    hours, minutes, seconds_milliseconds = time_str.split(":")
+    seconds, milliseconds = seconds_milliseconds.split(".")
+
+    # Convert each component to integers
+    hours = int(hours)
+    minutes = int(minutes)
+    seconds = int(seconds)
+    milliseconds = int(milliseconds)
+
+    # Calculate the total time in seconds
+    total_seconds = (hours * 3600) + (minutes * 60) + seconds + (milliseconds / 1000.0)
+
+    return total_seconds
+
+
+def parse_json_for_ste(input_data_id):
+    data_item = SpeechConversation.objects.get(pk=input_data_id)
+    input_data = (
+        json.loads(data_item.transcribed_json)
+        if isinstance(data_item.transcribed_json, str)
+        else data_item.transcribed_json
+    )
+    if not input_data:
+        return []
+    acoustic_normalised = json.loads(input_data["acoustic_normalised_transcribed_json"])
+    standardised_transcription = json.loads(input_data["standardised_transcription"])
+    result = []
+    id_counter = 1
+
+    # Function to convert float seconds to hh:mm:ss.ms format
+    def format_time(seconds):
+        td = datetime.timedelta(seconds=seconds)
+        return str(td)
+
+    # Combine all transcriptions into one list
+    for item in acoustic_normalised:
+        result.append(
+            {
+                "text": item["text"],
+                "end_time": format_time(item["end"]),
+                "speaker_id": f"{item['speaker_id']}",
+                "start_time": format_time(item["start"]),
+                "id": id_counter,
+                "acoustic_normalised_text": item["text"],
+            }
+        )
+        id_counter += 1
+
+    for item in standardised_transcription:
+        result.append(
+            {
+                "acoustic_standardized_text": item["text"],
+                "end_time": format_time(item["end"]),
+                "speaker_id": f"Speaker {item['speaker_id']}",
+                "start_time": format_time(item["start"]),
+                "id": id_counter,
+            }
+        )
+        id_counter += 1
+
+    # Sort the result by start_time and then by the presence of 'acoustic_normalised_text'
+    result.sort(key=lambda x: (x["id"], "acoustic_normalised_text" not in x))
+
+    return result
diff --git a/backend/projects/views.py b/backend/projects/views.py
index 56f3f006b..ca7f0b33c 100644
--- a/backend/projects/views.py
+++ b/backend/projects/views.py
@@ -25,6 +25,8 @@
     process_speech_tasks,
     process_ocr_tasks,
     process_task,
+    convert_time_to_seconds,
+    parse_json_for_ste,
 )
 from django.http import HttpResponse, JsonResponse
 from rest_framework import status, viewsets
@@ -914,46 +916,48 @@ def convert_prediction_json_to_annotation_result(pk, proj_type):
             # mainly label_dict and text_dict are sent as result
             result.append(label_dict)
             result.append(text_dict)
-    elif proj_type == "StandardisedTranscriptionEditing":
-        # convert the prediction_json to a concatinated transcribed_json
-        data_item = SpeechConversation.objects.get(pk=pk)
-        prediction_json = (
-            json.loads(data_item.prediction_json)
-            if isinstance(data_item.prediction_json, str)
-            else data_item.prediction_json
-        )
-        speakers_json = data_item.speakers_json
-        audio_duration = data_item.audio_duration
-        # converting prediction_json to result (wherever it exists) for every task.
-        if prediction_json == None:
-            return result
-        # Initialize variables
-        concatenated_text = ""
-        min_start_time = float("inf")
-        max_end_time = float("-inf")
-
-        for idx, val in enumerate(prediction_json):
-            # Concatenate the text
-            concatenated_text += val["text"] + " "
-
-            # Update the minimum start time and maximum end time
-            min_start_time = min(min_start_time, val["start"])
-            max_end_time = max(max_end_time, val["end"])
-
-        # Create a single dictionary to store the result
-        result_dict = {
-            "origin": "manual",
-            "to_name": "audio_url",
-            "from_name": "transcribed_json",
-            "original_length": audio_duration,
-            "id": f"shoonya_{generate_random_string(13)}",
-            "type": "textarea",
-            "value": {
-                "start": min_start_time,
-                "end": max_end_time,
-                "text": [concatenated_text],  # Remove trailing space
-            },
-        }
+        # elif proj_type == "StandardisedTranscriptionEditing":
+        #     # convert the prediction_json to a concatinated transcribed_json
+        #     data_item = SpeechConversation.objects.get(pk=pk)
+        #     prediction_json = (
+        #         json.loads(data_item.prediction_json)
+        #         if isinstance(data_item.prediction_json, str)
+        #         else data_item.prediction_json
+        #     )
+        #     speakers_json = data_item.speakers_json
+        #     audio_duration = data_item.audio_duration
+        #     # converting prediction_json to result (wherever it exists) for every task.
+        #     if prediction_json == None:
+        #         return result
+        #     # Initialize variables
+        #     concatenated_text = ""
+        #     min_start_time = float("inf")
+        #     max_end_time = float("-inf")
+        #
+        #     for idx, val in enumerate(prediction_json):
+        #         # Concatenate the text
+        #         concatenated_text += val["text"] + " "
+        #
+        #         # Update the minimum start time and maximum end time
+        #         min_start_time = min(min_start_time, val["start"])
+        #         max_end_time = max(max_end_time, val["end"])
+        #     if concatenated_text:
+        #         concatenated_text.strip()
+        #
+        #     # Create a single dictionary to store the result
+        #     result_dict = {
+        #         "origin": "manual",
+        #         "to_name": "audio_url",
+        #         "from_name": "transcribed_json",
+        #         "original_length": audio_duration,
+        #         "id": f"shoonya_{generate_random_string(13)}",
+        #         "type": "textarea",
+        #         "value": {
+        #             "start": min_start_time,
+        #             "end": max_end_time,
+        #             "text": [concatenated_text],  # Remove trailing space
+        #         },
+        #     }
 
         # Clear the result array and append the single result dictionary
         result.clear()
@@ -1045,7 +1049,49 @@ def convert_annotation_result_to_formatted_json(
     acoustic_transcribed_json = []
     standardised_transcription = ""
     transcribed_json_modified, acoustic_transcribed_json_modified = [], []
-    if is_SpeechConversation:
+    if is_StandardisedTranscriptionEditing:
+        verbatim_transcribed_json = []
+        acoustic_normalised_transcribed_json = []
+        standardised_transcription = []
+
+        for item in annotation_result:
+            if isinstance(item, str):
+                item = json.loads(item)
+            if "text" in item:
+                verbatim_transcribed_json.append(
+                    {
+                        "speaker_id": item["speaker_id"],
+                        "start": convert_time_to_seconds(item["start_time"]),
+                        "end": convert_time_to_seconds(item["end_time"]),
+                        "text": item["text"],
+                    }
+                )
+            if "acoustic_normalised_text" in item:
+                acoustic_normalised_transcribed_json.append(
+                    {
+                        "speaker_id": item["speaker_id"],
+                        "start": convert_time_to_seconds(item["start_time"]),
+                        "end": convert_time_to_seconds(item["end_time"]),
+                        "text": item["acoustic_normalised_text"],
+                    }
+                )
+            if "acoustic_standardized_text" in item:
+                standardised_transcription.append(
+                    {
+                        "speaker_id": item["speaker_id"],
+                        "start": convert_time_to_seconds(item["start_time"]),
+                        "end": convert_time_to_seconds(item["end_time"]),
+                        "text": item["acoustic_standardized_text"],
+                    }
+                )
+
+        complete_json = {
+            "verbatim_transcribed_json": verbatim_transcribed_json,
+            "acoustic_normalised_transcribed_json": acoustic_normalised_transcribed_json,
+            "standardised_transcription": standardised_transcription,
+        }
+        transcribed_json.append(complete_json)
+    elif is_SpeechConversation:
         ids_formatted = {}
         for idx1 in range(len(annotation_result)):
             formatted_result_dict = {}
@@ -1133,37 +1179,6 @@ def convert_annotation_result_to_formatted_json(
             acoustic_transcribed_json_modified = json.dumps(
                 acoustic_transcribed_json, ensure_ascii=False
             )
-
-    elif is_StandardisedTranscriptionEditing:
-        '''
-            in need to convert in this format 
-                text": "dummy format",
-                "end_time": "00:00:10.448",
-                "speaker_id": "Speaker 0",
-                "start_time": "00:00:00.000",
-                "id": 1,
-                "acoustic_normalised_text : text
-        '''
-
-        for idx1 in range(0, len(annotation_result), 2):
-            formatted_result_dict = {}
-            text_dict = {}
-            acoustic_text_dict = {}
-            if isinstance(annotation_result[idx1], str):
-                annotation_result[idx1] = json.loads(annotation_result[idx1])
-            if isinstance(annotation_result[idx1 + 1], str):
-                annotation_result[idx1 + 1] = json.loads(annotation_result[idx1 + 1])
-            text_dict = annotation_result[idx1]
-            acoustic_text_dict = annotation_result[idx1 + 1]
-            formatted_result_dict["text"] = text_dict["value"]["text"][0]
-            formatted_result_dict["start_time"] = text_dict["value"]["start"]
-            formatted_result_dict["end_time"] = text_dict["value"]["end"]
-            formatted_result_dict["speaker_id"] = text_dict["value"]["speaker_id"]
-            formatted_result_dict["id"] = text_dict["id"]
-            formatted_result_dict["acoustic_normalised_text"] = acoustic_text_dict["value"]["text"][0]
-            transcribed_json.append(formatted_result_dict)
-
-        
     else:
         dicts = 2 if is_OCRSegmentCategorizationOROCRSegmentCategorizationEditing else 3
         for idx1 in range(0, len(annotation_result), dicts):
@@ -1232,13 +1247,6 @@ def convert_annotation_result_to_formatted_json(
             "acoustic_normalised_transcribed_json": acoustic_transcribed_json_modified,
             "standardised_transcription": standardised_transcription,
         }
-    
-    if is_StandardisedTranscriptionEditing: 
-        return {
-            "verbatim_transcribed_json": transcribed_json_modified,
-            "acoustic_normalised_transcribed_json": acoustic_transcribed_json_modified,
-            "standardised_transcription": standardised_transcription,            
-        }
 
     return transcribed_json_modified
 
@@ -2227,7 +2235,7 @@ def create(self, request, *args, **kwargs):
             proj.save()
 
             # Function call to create the paramters for the sampling and filtering of sentences
-            create_parameters_for_task_creation(
+            create_parameters_for_task_creation.delay(
                 project_type=project_type,
                 dataset_instance_ids=dataset_instance_ids,
                 filter_string=filter_string,
@@ -2448,14 +2456,22 @@ def assign_new_tasks(self, request, pk, *args, **kwargs):
                 "AudioTranscriptionEditing",
                 "OCRTranscriptionEditing",
                 "OCRSegmentCategorizationEditing",
-                "StandardisedTranscriptionEditing",
-            ]:  
-                
-                if project.project_type == "StandardisedTranscriptionEditing":
+                "StandardizedTranscriptionEditing",
+            ]:
+                if project.project_type == "StandardizedTranscriptionEditing":
                     try:
-                        #gather trascribed_json
-                        result = convert_annotation_result_to_formatted_json(
-                            task.input_data.id, project.project_type, is_StandardisedTranscriptionEditing=True
+                        # gather trascribed_json
+                        result = parse_json_for_ste(task.input_data.id)
+                    except Exception as e:
+                        print(
+                            f"The prediction json of the data item-{task.input_data.id} is corrupt."
+                        )
+                        task.delete()
+                        continue
+                else:
+                    try:
+                        result = convert_prediction_json_to_annotation_result(
+                            task.input_data.id, project.project_type
                         )
                     except Exception as e:
                         print(
@@ -2463,16 +2479,6 @@ def assign_new_tasks(self, request, pk, *args, **kwargs):
                         )
                         task.delete()
                         continue
-                try:
-                    result = convert_prediction_json_to_annotation_result(
-                        task.input_data.id, project.project_type
-                    )
-                except Exception as e:
-                    print(
-                        f"The prediction json of the data item-{task.input_data.id} is corrupt."
-                    )
-                    task.delete()
-                    continue
             annotator_anno_count = Annotation_model.objects.filter(
                 task_id=task, annotation_type=ANNOTATOR_ANNOTATION
             ).count()
@@ -4189,7 +4195,6 @@ def download(self, request, pk=None, *args, **kwargs):
                 project_type == "OCRSegmentCategorizationEditing"
             )
             is_OCRSegmentCategorization = project_type == "OCRSegmentCategorization"
-            is_StandardizedTranscriptionEditing = project_type = "StandardizedTranscriptionEditing"
             for task in tasks:
                 curr_task = process_task(
                     task,
@@ -4208,9 +4213,6 @@ def download(self, request, pk=None, *args, **kwargs):
                         is_ConversationTranslation,
                         is_ConversationVerification,
                     )
-                elif is_StandardizedTranscriptionEditing:
-                    pass 
-
                 elif dataset_type in ["SpeechConversation", "OCRDocument"]:
                     is_SpeechConversation = dataset_type == "SpeechConversation"
                     if is_SpeechConversation:
diff --git a/backend/tasks/views.py b/backend/tasks/views.py
index fe621f5e4..af0d1071f 100644
--- a/backend/tasks/views.py
+++ b/backend/tasks/views.py
@@ -1758,6 +1758,14 @@ def partial_update(self, request, pk=None):
             == "AcousticNormalisedTranscriptionEditing"
             else False
         )
+
+        is_StandardizedTranscriptionEditing = (
+            True
+            if annotation_obj.task.project_id.project_type
+            == "StandardizedTranscriptionEditing"
+            else False
+        )
+
         is_ocr_sc_or_sce = (
             True
             if annotation_obj.task.project_id.project_type
@@ -1789,12 +1797,15 @@ def partial_update(self, request, pk=None):
                     ) = self.convert_chitralekha_format_to_LSF(
                         request.data["result"],
                         annotation_obj.task,
-                        is_acoustic_project_type,
-                        is_acoustic_project_type
-                        and annotation_obj.task.project_id.metadata_json[
-                            "acoustic_enabled_stage"
-                        ]
-                        == 1,
+                        is_acoustic_project_type or is_StandardizedTranscriptionEditing,
+                        (
+                            is_acoustic_project_type
+                            and annotation_obj.task.project_id.metadata_json[
+                                "acoustic_enabled_stage"
+                            ]
+                            == 1
+                        )
+                        or is_StandardizedTranscriptionEditing,
                     )
                 else:
                     annotation_obj.result = request.data["result"]
@@ -1845,12 +1856,15 @@ def partial_update(self, request, pk=None):
                     ) = self.convert_chitralekha_format_to_LSF(
                         request.data["result"],
                         annotation_obj.task,
-                        is_acoustic_project_type,
-                        is_acoustic_project_type
-                        and annotation_obj.task.project_id.metadata_json[
-                            "acoustic_enabled_stage"
-                        ]
-                        == 1,
+                        is_acoustic_project_type or is_StandardizedTranscriptionEditing,
+                        (
+                            is_acoustic_project_type
+                            and annotation_obj.task.project_id.metadata_json[
+                                "acoustic_enabled_stage"
+                            ]
+                            == 1
+                        )
+                        or is_StandardizedTranscriptionEditing,
                     )
                     annotation_status = request.data["annotation_status"]
                     if empty_flag == True and annotation_status in [
@@ -1918,12 +1932,15 @@ def partial_update(self, request, pk=None):
                     ) = self.convert_chitralekha_format_to_LSF(
                         request.data["result"],
                         annotation_obj.task,
-                        is_acoustic_project_type,
-                        is_acoustic_project_type
-                        and annotation_obj.task.project_id.metadata_json[
-                            "acoustic_enabled_stage"
-                        ]
-                        <= 2,
+                        is_acoustic_project_type or is_StandardizedTranscriptionEditing,
+                        (
+                            is_acoustic_project_type
+                            and annotation_obj.task.project_id.metadata_json[
+                                "acoustic_enabled_stage"
+                            ]
+                            == 2
+                        )
+                        or is_StandardizedTranscriptionEditing,
                     )
                 else:
                     annotation_obj.result = request.data["result"]
@@ -2013,12 +2030,15 @@ def partial_update(self, request, pk=None):
                     ) = self.convert_chitralekha_format_to_LSF(
                         request.data["result"],
                         annotation_obj.task,
-                        is_acoustic_project_type,
-                        is_acoustic_project_type
-                        and annotation_obj.task.project_id.metadata_json[
-                            "acoustic_enabled_stage"
-                        ]
-                        <= 2,
+                        is_acoustic_project_type or is_StandardizedTranscriptionEditing,
+                        (
+                            is_acoustic_project_type
+                            and annotation_obj.task.project_id.metadata_json[
+                                "acoustic_enabled_stage"
+                            ]
+                            == 2
+                        )
+                        or is_StandardizedTranscriptionEditing,
                     )
                     annotation_status = request.data["annotation_status"]
                     if empty_flag == True and annotation_status in [
@@ -2113,12 +2133,15 @@ def partial_update(self, request, pk=None):
                     ) = self.convert_chitralekha_format_to_LSF(
                         request.data["result"],
                         annotation_obj.task,
-                        is_acoustic_project_type,
-                        is_acoustic_project_type
-                        and annotation_obj.task.project_id.metadata_json[
-                            "acoustic_enabled_stage"
-                        ]
-                        <= 3,
+                        is_acoustic_project_type or is_StandardizedTranscriptionEditing,
+                        (
+                            is_acoustic_project_type
+                            and annotation_obj.task.project_id.metadata_json[
+                                "acoustic_enabled_stage"
+                            ]
+                            == 3
+                        )
+                        or is_StandardizedTranscriptionEditing,
                     )
                 else:
                     annotation_obj.result = request.data["result"]
@@ -2199,12 +2222,15 @@ def partial_update(self, request, pk=None):
                     ) = self.convert_chitralekha_format_to_LSF(
                         request.data["result"],
                         annotation_obj.task,
-                        is_acoustic_project_type,
-                        is_acoustic_project_type
-                        and annotation_obj.task.project_id.metadata_json[
-                            "acoustic_enabled_stage"
-                        ]
-                        <= 3,
+                        is_acoustic_project_type or is_StandardizedTranscriptionEditing,
+                        (
+                            is_acoustic_project_type
+                            and annotation_obj.task.project_id.metadata_json[
+                                "acoustic_enabled_stage"
+                            ]
+                            == 3
+                        )
+                        or is_StandardizedTranscriptionEditing,
                     )
                     if empty_flag == True and annotation_status in [
                         LABELED,
@@ -2324,19 +2350,33 @@ def convert_chitralekha_format_to_LSF(
         if result == None or len(result) == 0:
             return modified_result, empty_text_flag
         for idx, val in enumerate(result):
-            if "standardised_transcription" in val:
+            if "acoustic_standardized_text" in val:
                 if acoustic_enabled:
                     standardised_dict = {
                         "id": f"chitralekha_{idx}s{generate_random_string(13 - len(str(idx)))}",
                         "origin": "manual",
                         "to_name": "audio_url",
-                        "from_name": "standardised_transcription",
+                        "from_name": "acoustic_standardised_transcribed_json",
                         "original_length": audio_duration,
                         "type": "textarea",
                         "value": {
-                            "text": [val["standardised_transcription"]],
+                            "start": self.convert_formatted_time_to_fractional(
+                                val["start_time"]
+                            ),
+                            "end": self.convert_formatted_time_to_fractional(
+                                val["end_time"]
+                            ),
+                            "text": [val["acoustic_standardized_text"]],
                         },
                     }
+                    label_dict_st = deepcopy(standardised_dict)
+                    label_dict_st["type"] = "labels"
+                    del label_dict_st["value"]["text"]
+                    label_dict_st["value"]["labels"] = (
+                        [val["speaker_id"]] if "speaker_id" in val else []
+                    )
+                    label_dict_st["from_name"] = "labels"
+                    modified_result.append(label_dict_st)
                     modified_result.append(standardised_dict)
                 continue
             if "type" in val or "value" in val:
diff --git a/backend/users/migrations/0034_alter_user_is_approved.py b/backend/users/migrations/0034_alter_user_is_approved.py
new file mode 100644
index 000000000..7cfe9702a
--- /dev/null
+++ b/backend/users/migrations/0034_alter_user_is_approved.py
@@ -0,0 +1,21 @@
+# Generated by Django 3.2.14 on 2024-05-22 02:45
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("users", "0033_rename_approved_by_user_invited_by"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="user",
+            name="is_approved",
+            field=models.BooleanField(
+                default=False,
+                help_text="Indicates whether user is approved by the admin or not.",
+                verbose_name="is_approved",
+            ),
+        ),
+    ]
diff --git a/backend/utils/convert_result_to_chitralekha_format.py b/backend/utils/convert_result_to_chitralekha_format.py
index 4e1c2072c..33e48cb6a 100644
--- a/backend/utils/convert_result_to_chitralekha_format.py
+++ b/backend/utils/convert_result_to_chitralekha_format.py
@@ -20,8 +20,8 @@ def create_memory(result):
             memory[key]["labels_dict_idx"] = i
         elif dict_type == "acoustic_normalised_transcribed_json":
             memory[key]["acoustic_text_dict_idx"] = i
-        elif dict_type == "standardised_transcription":
-            memory["standardised_transcription"] = i
+        elif dict_type == "acoustic_standardised_transcribed_json":
+            memory[key]["acoustic_standardised_transcribed_json"] = i
         else:
             memory[key]["text_dict_idx"] = i
     return memory
@@ -49,10 +49,11 @@ def convert_result_to_chitralekha_format(result, ann_id, project_type):
             speaker_id = "Speaker 0"
             seen.add(text_dict_idx)
         elif text_dict_idx == -1:
-            print(
-                f"The data is corrupt for annotation id-{ann_id}, data id- {result[i]['id']}. "
-                f"It does not contain a corresponding text dictionary."
-            )
+            if project_type != "StandardizedTranscriptionEditing":
+                print(
+                    f"The data is corrupt for annotation id-{ann_id}, data id- {result[i]['id']}. "
+                    f"It does not contain a corresponding text dictionary."
+                )
             continue
         else:
             label_dict = result[labels_dict_idx]
@@ -96,19 +97,30 @@ def convert_result_to_chitralekha_format(result, ann_id, project_type):
     modified_result = (
         sort_result_by_start_time(modified_result) if len(modified_result) > 0 else []
     )
-    if (
-        project_type == "AcousticNormalisedTranscriptionEditing"
-        and "standardised_transcription" in memory.keys()
-        and result[memory["standardised_transcription"]]["value"]["text"]
-    ):
-        modified_result.append(
-            {
-                "standardised_transcription": result[
-                    memory["standardised_transcription"]
-                ]["value"]["text"][0]
-            }
-        )
-
+    if project_type == "StandardizedTranscriptionEditing":
+        standard_chitra_dict = {}
+        for i in range(len(result)):
+            if result[i]["id"] in memory:
+                if "acoustic_standardised_transcribed_json" in memory[result[i]["id"]]:
+                    st_dict = result[
+                        memory[result[i]["id"]][
+                            "acoustic_standardised_transcribed_json"
+                        ]
+                    ]
+                    if not standard_chitra_dict:
+                        lb_dict = result[memory[result[i]["id"]]["labels_dict_idx"]]
+                        standard_chitra_dict = {
+                            "acoustic_standardized_text": st_dict["value"]["text"][0],
+                            "end_time": convert_fractional_time_to_formatted(
+                                st_dict["value"]["end"], ann_id, st_dict["id"]
+                            ),
+                            "speaker_id": lb_dict["value"]["labels"][0],
+                            "start_time": convert_fractional_time_to_formatted(
+                                st_dict["value"]["start"], ann_id, st_dict["id"]
+                            ),
+                            "id": count,
+                        }
+                        modified_result.append(standard_chitra_dict)
     return modified_result
 
 

From 9dfdb52c104281a3968918f3c840d9916aaee1e0 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Mon, 27 May 2024 10:25:25 +0530
Subject: [PATCH 14/44] black linting

---
 backend/organizations/views.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/backend/organizations/views.py b/backend/organizations/views.py
index 46848571e..220834dd9 100644
--- a/backend/organizations/views.py
+++ b/backend/organizations/views.py
@@ -2713,8 +2713,7 @@ def cumulative_tasks_count(self, request, pk=None):
                 "AudioSegmentation",
                 "AudioTranscription",
                 "AudioTranscriptionEditing",
-                "StandardisedTranscriptionEditing"
-                "ContextualSentenceVerification",
+                "StandardisedTranscriptionEditing" "ContextualSentenceVerification",
                 "ContextualSentenceVerificationAndDomainClassification",
                 "ContextualTranslationEditing",
                 "ConversationTranslation",

From 3b93f2d2bb44a65430706781280dd8bd921877a8 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Mon, 27 May 2024 11:34:29 +0530
Subject: [PATCH 15/44] black changes

---
 backend/organizations/views.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/backend/organizations/views.py b/backend/organizations/views.py
index 220834dd9..3109cf083 100644
--- a/backend/organizations/views.py
+++ b/backend/organizations/views.py
@@ -2713,7 +2713,8 @@ def cumulative_tasks_count(self, request, pk=None):
                 "AudioSegmentation",
                 "AudioTranscription",
                 "AudioTranscriptionEditing",
-                "StandardisedTranscriptionEditing" "ContextualSentenceVerification",
+                "StandardisedTranscriptionEditing",
+                "ContextualSentenceVerification",
                 "ContextualSentenceVerificationAndDomainClassification",
                 "ContextualTranslationEditing",
                 "ConversationTranslation",

From 86967b6f18ac568043719c38c732c9f772a715ae Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Mon, 27 May 2024 16:30:48 +0530
Subject: [PATCH 16/44] modified the download endpoint

---
 backend/projects/utils.py |   2 +-
 backend/projects/views.py | 208 ++++++++++++++++++++++++--------------
 2 files changed, 133 insertions(+), 77 deletions(-)

diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index c9416e398..a43d55bed 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -435,7 +435,7 @@ def process_speech_results(
             speakers_json,
             True,
             False,
-            False,
+            True,
             True,
         )
         task["data"]["transcribed_json"] = task["data"]["final_transcribed_json"]
diff --git a/backend/projects/views.py b/backend/projects/views.py
index ca7f0b33c..4eedb6c32 100644
--- a/backend/projects/views.py
+++ b/backend/projects/views.py
@@ -1047,57 +1047,65 @@ def convert_annotation_result_to_formatted_json(
 ):
     transcribed_json = []
     acoustic_transcribed_json = []
-    standardised_transcription = ""
+    standardised_json_modified = []
     transcribed_json_modified, acoustic_transcribed_json_modified = [], []
     if is_StandardisedTranscriptionEditing:
         verbatim_transcribed_json = []
         acoustic_normalised_transcribed_json = []
         standardised_transcription = []
 
-        for item in annotation_result:
-            if isinstance(item, str):
-                item = json.loads(item)
-            if "text" in item:
-                verbatim_transcribed_json.append(
-                    {
-                        "speaker_id": item["speaker_id"],
-                        "start": convert_time_to_seconds(item["start_time"]),
-                        "end": convert_time_to_seconds(item["end_time"]),
-                        "text": item["text"],
-                    }
-                )
-            if "acoustic_normalised_text" in item:
-                acoustic_normalised_transcribed_json.append(
-                    {
-                        "speaker_id": item["speaker_id"],
-                        "start": convert_time_to_seconds(item["start_time"]),
-                        "end": convert_time_to_seconds(item["end_time"]),
-                        "text": item["acoustic_normalised_text"],
-                    }
-                )
-            if "acoustic_standardized_text" in item:
-                standardised_transcription.append(
-                    {
-                        "speaker_id": item["speaker_id"],
-                        "start": convert_time_to_seconds(item["start_time"]),
-                        "end": convert_time_to_seconds(item["end_time"]),
-                        "text": item["acoustic_standardized_text"],
-                    }
-                )
-
-        complete_json = {
-            "verbatim_transcribed_json": verbatim_transcribed_json,
-            "acoustic_normalised_transcribed_json": acoustic_normalised_transcribed_json,
-            "standardised_transcription": standardised_transcription,
-        }
-        transcribed_json.append(complete_json)
-    elif is_SpeechConversation:
+        # for item in annotation_result:
+        #     if isinstance(item, str):
+        #         item = json.loads(item)
+        #     if "text" in item:
+        #         verbatim_transcribed_json.append(
+        #             {
+        #                 "speaker_id": item["speaker_id"],
+        #                 "start": convert_time_to_seconds(item["start_time"]),
+        #                 "end": convert_time_to_seconds(item["end_time"]),
+        #                 "text": item["text"],
+        #             }
+        #         )
+        #     if "acoustic_normalised_text" in item:
+        #         acoustic_normalised_transcribed_json.append(
+        #             {
+        #                 "speaker_id": item["speaker_id"],
+        #                 "start": convert_time_to_seconds(item["start_time"]),
+        #                 "end": convert_time_to_seconds(item["end_time"]),
+        #                 "text": item["acoustic_normalised_text"],
+        #             }
+        #         )
+        #     if "acoustic_standardized_text" in item:
+        #         standardised_transcription.append(
+        #             {
+        #                 "speaker_id": item["speaker_id"],
+        #                 "start": convert_time_to_seconds(item["start_time"]),
+        #                 "end": convert_time_to_seconds(item["end_time"]),
+        #                 "text": item["acoustic_standardized_text"],
+        #             }
+        #         )
+        #
+        # complete_json = {
+        #     "verbatim_transcribed_json": verbatim_transcribed_json,
+        #     "acoustic_normalised_transcribed_json": acoustic_normalised_transcribed_json,
+        #     "standardised_transcription": standardised_transcription,
+        # }
+        # transcribed_json.append(complete_json)
+    if is_SpeechConversation:
         ids_formatted = {}
         for idx1 in range(len(annotation_result)):
+            if (
+                "id" in annotation_result[idx1]
+                and annotation_result[idx1]["id"] in ids_formatted
+            ):
+                continue
             formatted_result_dict = {}
             labels_dict = {}
             text_dict = {}
             acoustic_text_dict = {}
+            st_labels_dict = {}
+            st_text_dict = {}
+            st_formatted_result_dict = {}
             if isinstance(annotation_result[idx1], str):
                 annotation_result[idx1] = json.loads(annotation_result[idx1])
             if annotation_result[idx1]["from_name"] == "labels":
@@ -1110,11 +1118,7 @@ def convert_annotation_result_to_formatted_json(
                     annotation_result[idx1], ensure_ascii=False
                 )
                 acoustic_text_dict = annotation_result[idx1]
-            elif annotation_result[idx1]["from_name"] == "standardised_transcription":
-                standardised_transcription = annotation_result[idx1]["value"]["text"][0]
-                continue
             else:
-                text_dict = json.dumps(annotation_result[idx1], ensure_ascii=False)
                 text_dict = annotation_result[idx1]
             for idx2 in range(idx1 + 1, len(annotation_result)):
                 if annotation_result[idx1]["id"] == annotation_result[idx2]["id"]:
@@ -1131,50 +1135,93 @@ def convert_annotation_result_to_formatted_json(
                         text_dict = json.dumps(
                             annotation_result[idx2], ensure_ascii=False
                         )
-                    if not is_acoustic or (
-                        labels_dict and acoustic_text_dict and text_dict
+                    if not is_StandardisedTranscriptionEditing:
+                        if not is_acoustic or (
+                            labels_dict and acoustic_text_dict and text_dict
+                        ):
+                            break
+                elif is_StandardisedTranscriptionEditing:
+                    if (
+                        annotation_result[idx2]["from_name"] == "labels"
+                        and (idx2 + 1) < len(annotation_result)
+                        and annotation_result[idx2 + 1]["from_name"]
+                        == "acoustic_standardised_transcribed_json"
                     ):
-                        break
+                        st_labels_dict = annotation_result[idx2]
+                    elif (
+                        annotation_result[idx2]["from_name"]
+                        == "acoustic_standardised_transcribed_json"
+                    ):
+                        st_text_dict = annotation_result[idx2]
 
-            if annotation_result[idx1]["id"] not in ids_formatted:
-                ids_formatted[annotation_result[idx1]["id"]] = "formatted"
-                if not labels_dict:
+            ids_formatted[annotation_result[idx1]["id"]] = "formatted"
+            if not labels_dict:
+                formatted_result_dict["speaker_id"] = None
+            else:
+                try:
+                    formatted_result_dict["speaker_id"] = next(
+                        speaker
+                        for speaker in speakers_json
+                        if speaker["name"] == labels_dict["value"]["labels"][0]
+                    )["speaker_id"]
+                except (KeyError, StopIteration):
                     formatted_result_dict["speaker_id"] = None
+                formatted_result_dict["start"] = labels_dict["value"]["start"]
+                formatted_result_dict["end"] = labels_dict["value"]["end"]
+
+            if not text_dict:
+                formatted_result_dict["text"] = ""
+            else:
+                text_dict_json = (
+                    json.loads(text_dict) if isinstance(text_dict, str) else text_dict
+                )
+                formatted_result_dict["text"] = text_dict_json["value"]["text"][0]
+                formatted_result_dict["start"] = text_dict_json["value"]["start"]
+                formatted_result_dict["end"] = text_dict_json["value"]["end"]
+
+            transcribed_json.append(formatted_result_dict)
+
+            if is_StandardisedTranscriptionEditing:
+                if not st_labels_dict:
+                    st_formatted_result_dict["speaker_id"] = None
                 else:
                     try:
-                        formatted_result_dict["speaker_id"] = next(
+                        st_formatted_result_dict["speaker_id"] = next(
                             speaker
                             for speaker in speakers_json
-                            if speaker["name"] == labels_dict["value"]["labels"][0]
+                            if speaker["name"] == st_labels_dict["value"]["labels"][0]
                         )["speaker_id"]
                     except (KeyError, StopIteration):
-                        formatted_result_dict["speaker_id"] = None
-                    formatted_result_dict["start"] = labels_dict["value"]["start"]
-                    formatted_result_dict["end"] = labels_dict["value"]["end"]
+                        st_formatted_result_dict["speaker_id"] = None
+                    st_formatted_result_dict["start"] = st_labels_dict["value"]["start"]
+                    st_formatted_result_dict["end"] = st_labels_dict["value"]["end"]
 
-                if not text_dict:
-                    formatted_result_dict["text"] = ""
+                if not st_text_dict:
+                    st_formatted_result_dict["text"] = ""
                 else:
-                    text_dict_json = json.loads(text_dict)
-                    formatted_result_dict["text"] = text_dict_json["value"]["text"][0]
-                    formatted_result_dict["start"] = text_dict_json["value"]["start"]
-                    formatted_result_dict["end"] = text_dict_json["value"]["end"]
-
-                transcribed_json.append(formatted_result_dict)
-
-                if is_acoustic:
-                    acoustic_formatted_result_dict = deepcopy(formatted_result_dict)
-                    acoustic_dict_json = (
-                        json.loads(acoustic_text_dict)
-                        if isinstance(acoustic_text_dict, str)
-                        else acoustic_text_dict
+                    text_dict_json = (
+                        json.loads(st_text_dict)
+                        if isinstance(st_text_dict, str)
+                        else st_text_dict
                     )
-                    acoustic_formatted_result_dict["text"] = (
-                        acoustic_dict_json["value"]["text"][0]
-                        if acoustic_dict_json
-                        else ""
-                    )
-                    acoustic_transcribed_json.append(acoustic_formatted_result_dict)
+                    st_formatted_result_dict["text"] = text_dict_json["value"]["text"][
+                        0
+                    ]
+                    st_formatted_result_dict["start"] = text_dict_json["value"]["start"]
+                    st_formatted_result_dict["end"] = text_dict_json["value"]["end"]
+                standardised_json_modified.append(st_formatted_result_dict)
+
+            if is_acoustic:
+                acoustic_formatted_result_dict = deepcopy(formatted_result_dict)
+                acoustic_dict_json = (
+                    json.loads(acoustic_text_dict)
+                    if isinstance(acoustic_text_dict, str)
+                    else acoustic_text_dict
+                )
+                acoustic_formatted_result_dict["text"] = (
+                    acoustic_dict_json["value"]["text"][0] if acoustic_dict_json else ""
+                )
+                acoustic_transcribed_json.append(acoustic_formatted_result_dict)
         if acoustic_transcribed_json:
             acoustic_transcribed_json_modified = json.dumps(
                 acoustic_transcribed_json, ensure_ascii=False
@@ -1240,12 +1287,21 @@ def convert_annotation_result_to_formatted_json(
                 )
             transcribed_json.append(formatted_result_dict)
     transcribed_json_modified = json.dumps(transcribed_json, ensure_ascii=False)
+    standardised_json_modified = json.dumps(
+        standardised_json_modified, ensure_ascii=False
+    )
 
     if is_acoustic:
+        if is_StandardisedTranscriptionEditing:
+            return {
+                "verbatim_transcribed_json": transcribed_json_modified,
+                "acoustic_normalised_transcribed_json": acoustic_transcribed_json_modified,
+                "standardised_transcription": standardised_json_modified,
+            }
         return {
             "verbatim_transcribed_json": transcribed_json_modified,
             "acoustic_normalised_transcribed_json": acoustic_transcribed_json_modified,
-            "standardised_transcription": standardised_transcription,
+            "standardised_transcription": [],
         }
 
     return transcribed_json_modified

From 3216c3e90a1bfd6aabb255a3ad6e81e3320a88b0 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Mon, 27 May 2024 16:33:49 +0530
Subject: [PATCH 17/44] removed commented lines

---
 backend/projects/views.py | 42 ---------------------------------------
 1 file changed, 42 deletions(-)

diff --git a/backend/projects/views.py b/backend/projects/views.py
index 4eedb6c32..b5b904218 100644
--- a/backend/projects/views.py
+++ b/backend/projects/views.py
@@ -1049,48 +1049,6 @@ def convert_annotation_result_to_formatted_json(
     acoustic_transcribed_json = []
     standardised_json_modified = []
     transcribed_json_modified, acoustic_transcribed_json_modified = [], []
-    if is_StandardisedTranscriptionEditing:
-        verbatim_transcribed_json = []
-        acoustic_normalised_transcribed_json = []
-        standardised_transcription = []
-
-        # for item in annotation_result:
-        #     if isinstance(item, str):
-        #         item = json.loads(item)
-        #     if "text" in item:
-        #         verbatim_transcribed_json.append(
-        #             {
-        #                 "speaker_id": item["speaker_id"],
-        #                 "start": convert_time_to_seconds(item["start_time"]),
-        #                 "end": convert_time_to_seconds(item["end_time"]),
-        #                 "text": item["text"],
-        #             }
-        #         )
-        #     if "acoustic_normalised_text" in item:
-        #         acoustic_normalised_transcribed_json.append(
-        #             {
-        #                 "speaker_id": item["speaker_id"],
-        #                 "start": convert_time_to_seconds(item["start_time"]),
-        #                 "end": convert_time_to_seconds(item["end_time"]),
-        #                 "text": item["acoustic_normalised_text"],
-        #             }
-        #         )
-        #     if "acoustic_standardized_text" in item:
-        #         standardised_transcription.append(
-        #             {
-        #                 "speaker_id": item["speaker_id"],
-        #                 "start": convert_time_to_seconds(item["start_time"]),
-        #                 "end": convert_time_to_seconds(item["end_time"]),
-        #                 "text": item["acoustic_standardized_text"],
-        #             }
-        #         )
-        #
-        # complete_json = {
-        #     "verbatim_transcribed_json": verbatim_transcribed_json,
-        #     "acoustic_normalised_transcribed_json": acoustic_normalised_transcribed_json,
-        #     "standardised_transcription": standardised_transcription,
-        # }
-        # transcribed_json.append(complete_json)
     if is_SpeechConversation:
         ids_formatted = {}
         for idx1 in range(len(annotation_result)):

From 8926ff3cc976177fb89072903695d27e34fdb430 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Thu, 30 May 2024 07:11:27 +0000
Subject: [PATCH 18/44] export changes

---
 backend/projects/annotation_registry.py       |  2 +-
 .../audio/acoustic_transcription.jsx          |  2 +-
 backend/projects/tasks.py                     | 74 +++++++++++++------
 backend/projects/views.py                     |  2 +-
 4 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/backend/projects/annotation_registry.py b/backend/projects/annotation_registry.py
index 071f33678..1932ea32b 100644
--- a/backend/projects/annotation_registry.py
+++ b/backend/projects/annotation_registry.py
@@ -171,7 +171,7 @@
                 "labels",
                 "verbatim_transcribed_json",
                 "acoustic_normalised_transcribed_json",
-                "standardised_transcription",
+                "acoustic_standardised_transcribed_json",
             ],
             "type": ["labels", "textarea", "textarea", "textarea"],
         },
diff --git a/backend/projects/label_studio_jsx_files/audio/acoustic_transcription.jsx b/backend/projects/label_studio_jsx_files/audio/acoustic_transcription.jsx
index a45d56507..69ab8092c 100644
--- a/backend/projects/label_studio_jsx_files/audio/acoustic_transcription.jsx
+++ b/backend/projects/label_studio_jsx_files/audio/acoustic_transcription.jsx
@@ -24,7 +24,7 @@
   </View>
   <View style="display: none;">
     <TextArea name="acoustic_normalised_transcribed_json" toName="audio_url" perRegion="true" className="ignore_assertion" />
-    <TextArea name="standardised_transcription" toName="audio_url" className="ignore_assertion"/>
+    <TextArea name="acoustic_standardised_transcribed_json" toName="audio_url" className="ignore_assertion"/>
   </View>
   {reference_raw_transcript ? <Header value="Reference Transcript" /> 
    <Text name="reference_raw_transcript" 
diff --git a/backend/projects/tasks.py b/backend/projects/tasks.py
index 90e28df17..015f5a40c 100644
--- a/backend/projects/tasks.py
+++ b/backend/projects/tasks.py
@@ -379,7 +379,7 @@ def create_parameters_for_task_creation(
     tasks = create_tasks_from_dataitems(sampled_items, project)
 
 
-@shared_task
+# @shared_task
 def export_project_in_place(
     annotation_fields, project_id, project_type, get_request_data
 ) -> None:
@@ -477,7 +477,10 @@ def export_project_in_place(
                     ta_transcribed_json = [ta["verbatim_transcribed_json"]]
                 except KeyError:
                     ta_transcribed_json = len(ta_labels) * [""]
-                if len(ta_labels) != len(ta_transcribed_json):
+                if (
+                    len(ta_labels) != len(ta_transcribed_json)
+                    and not is_StandardizedTranscriptionEditing
+                ):
                     export_excluded_task_ids.append(task.id)
                     continue
                 try:
@@ -490,9 +493,23 @@ def export_project_in_place(
                     ]
                 except KeyError:
                     ta_acoustic_transcribed_json = len(ta_labels) * [""]
-                if len(ta_labels) != len(ta_acoustic_transcribed_json):
+                if (
+                    len(ta_labels) != len(ta_acoustic_transcribed_json)
+                    and not is_StandardizedTranscriptionEditing
+                ):
                     export_excluded_task_ids.append(task.id)
                     continue
+                if is_StandardizedTranscriptionEditing:
+                    try:
+                        ta_st_transcribed_json = json.loads(
+                            ta["acoustic_standardised_transcribed_json"]
+                        )
+                    except json.JSONDecodeError:
+                        ta_st_transcribed_json = [
+                            ta["acoustic_standardised_transcribed_json"]
+                        ]
+                    except KeyError:
+                        ta_st_transcribed_json = len(ta_labels) * [""]
             else:
                 try:
                     ta_transcribed_json = json.loads(ta["transcribed_json"])
@@ -532,31 +549,42 @@ def export_project_in_place(
                             temp = deepcopy(ta_labels[idx])
                             temp["text"] = ta_acoustic_transcribed_json[idx]
                             ta_acoustic_transcribed_json[idx] = temp
-                    if (
-                        is_AcousticNormalisedTranscriptionEditing
-                        or is_StandardizedTranscriptionEditing
-                    ):
-                        try:
-                            standardised_transcription = json.loads(
-                                ta["standardised_transcription"]
+                    if is_StandardizedTranscriptionEditing:
+                        for jdx in range(idx + 1, len(ta_labels)):
+                            temp2 = deepcopy(ta_labels[jdx])
+                            temp2["text"] = (
+                                ta_st_transcribed_json[jdx - 1]
+                                if 0 <= jdx - 1 < len(ta_st_transcribed_json)
+                                else ""
                             )
-                        except json.JSONDecodeError:
-                            standardised_transcription = ta[
-                                "standardised_transcription"
-                            ]
-                        except KeyError:
-                            standardised_transcription = ""
+                            speaker_id = next(
+                                speaker
+                                for speaker in speakers_details
+                                if speaker["name"] == ta_labels[jdx]["labels"][0]
+                            )["speaker_id"]
+                            temp2["speaker_id"] = speaker_id
+                            del temp2["labels"]
+                            ta_st_transcribed_json[jdx - 1] = (
+                                temp2
+                                if 0 <= jdx - 1 < len(ta_st_transcribed_json)
+                                else []
+                            )
+                            ta_transcribed_json = {
+                                "verbatim_transcribed_json": ta_labels,
+                                "acoustic_normalised_transcribed_json": ta_acoustic_transcribed_json,
+                                "standardised_transcription": ta_st_transcribed_json,
+                            }
+                            setattr(
+                                data_item, "final_transcribed_json", ta_transcribed_json
+                            )
+                    if is_AcousticNormalisedTranscriptionEditing:
                         ta_transcribed_json = {
                             "verbatim_transcribed_json": ta_labels,
                             "acoustic_normalised_transcribed_json": ta_acoustic_transcribed_json,
-                            "standardised_transcription": standardised_transcription,
+                            "standardised_transcription": ta_st_transcribed_json,
                         }
-                        if is_StandardizedTranscriptionEditing:
-                            setattr(
-                                data_item, "final_transcribed_json", ta_transcribed_json
-                            )
                         setattr(data_item, field, ta_transcribed_json)
-                    else:
+                    elif not is_StandardizedTranscriptionEditing:
                         setattr(data_item, field, ta_labels)
                 elif field == "conversation_json":
                     if is_ConversationVerification:
@@ -685,6 +713,8 @@ def export_project_in_place(
         annotation_fields.append("bboxes_relation_json")
     if annotated_document_details_json:
         annotation_fields.append("annotated_document_details_json")
+    if ta_st_transcribed_json:
+        annotation_fields.append("final_transcribed_json")
     dataset_model.objects.bulk_update(data_items, annotation_fields)
 
     tasks = tasks.exclude(id__in=export_excluded_task_ids)
diff --git a/backend/projects/views.py b/backend/projects/views.py
index b5b904218..9ff99ed8c 100644
--- a/backend/projects/views.py
+++ b/backend/projects/views.py
@@ -4320,7 +4320,7 @@ def project_export(self, request, pk=None, *args, **kwargs):
                     return Response(ret_dict, status=ret_status)
 
                 # Call the async task export function for inplace functions
-                export_project_in_place.delay(
+                export_project_in_place(
                     annotation_fields=annotation_fields,
                     project_id=pk,
                     project_type=project_type,

From 19386b25c7b29f904d36970486d17d2b1308d02c Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Tue, 4 Jun 2024 11:49:27 +0000
Subject: [PATCH 19/44] modified the flow

---
 backend/projects/tasks.py                     |  83 ++----
 backend/projects/utils.py                     | 245 ++++++++++++++----
 backend/projects/views.py                     | 138 +++-------
 .../convert_result_to_chitralekha_format.py   |  55 ++--
 4 files changed, 290 insertions(+), 231 deletions(-)

diff --git a/backend/projects/tasks.py b/backend/projects/tasks.py
index 015f5a40c..2796db67c 100644
--- a/backend/projects/tasks.py
+++ b/backend/projects/tasks.py
@@ -18,7 +18,12 @@
 from dataset.models import DatasetInstance
 from .models import *
 from .registry_helper import ProjectRegistry
-from .utils import conversation_wordcount, no_of_words, conversation_sentence_count
+from .utils import (
+    conversation_wordcount,
+    no_of_words,
+    conversation_sentence_count,
+    ann_result_for_ste,
+)
 from .annotation_registry import *
 
 # Celery logger settings
@@ -379,7 +384,7 @@ def create_parameters_for_task_creation(
     tasks = create_tasks_from_dataitems(sampled_items, project)
 
 
-# @shared_task
+@shared_task
 def export_project_in_place(
     annotation_fields, project_id, project_type, get_request_data
 ) -> None:
@@ -452,9 +457,6 @@ def export_project_in_place(
     is_AcousticNormalisedTranscriptionEditing = (
         project_type == "AcousticNormalisedTranscriptionEditing"
     )
-    is_StandardizedTranscriptionEditing = (
-        project_type == "StandardizedTranscriptionEditing"
-    )
     is_ConversationVerification = project.project_type == "ConversationVerification"
     bboxes_relation_json = []
     annotated_document_details_json = {}
@@ -467,20 +469,14 @@ def export_project_in_place(
                 print(error)
                 export_excluded_task_ids.append(task.id)
                 continue
-            if (
-                is_AcousticNormalisedTranscriptionEditing
-                or is_StandardizedTranscriptionEditing
-            ):
+            if is_AcousticNormalisedTranscriptionEditing:
                 try:
                     ta_transcribed_json = json.loads(ta["verbatim_transcribed_json"])
                 except json.JSONDecodeError:
                     ta_transcribed_json = [ta["verbatim_transcribed_json"]]
                 except KeyError:
                     ta_transcribed_json = len(ta_labels) * [""]
-                if (
-                    len(ta_labels) != len(ta_transcribed_json)
-                    and not is_StandardizedTranscriptionEditing
-                ):
+                if len(ta_labels) != len(ta_transcribed_json):
                     export_excluded_task_ids.append(task.id)
                     continue
                 try:
@@ -493,23 +489,9 @@ def export_project_in_place(
                     ]
                 except KeyError:
                     ta_acoustic_transcribed_json = len(ta_labels) * [""]
-                if (
-                    len(ta_labels) != len(ta_acoustic_transcribed_json)
-                    and not is_StandardizedTranscriptionEditing
-                ):
+                if len(ta_labels) != len(ta_acoustic_transcribed_json):
                     export_excluded_task_ids.append(task.id)
                     continue
-                if is_StandardizedTranscriptionEditing:
-                    try:
-                        ta_st_transcribed_json = json.loads(
-                            ta["acoustic_standardised_transcribed_json"]
-                        )
-                    except json.JSONDecodeError:
-                        ta_st_transcribed_json = [
-                            ta["acoustic_standardised_transcribed_json"]
-                        ]
-                    except KeyError:
-                        ta_st_transcribed_json = len(ta_labels) * [""]
             else:
                 try:
                     ta_transcribed_json = json.loads(ta["transcribed_json"])
@@ -530,11 +512,7 @@ def export_project_in_place(
                 # We need to store the rating in integer format
                 if field == "rating":
                     setattr(data_item, field, int(ta[field]))
-                elif (
-                    field == "transcribed_json"
-                    or field == "prediction_json"
-                    or field == "final_transcribed_json"
-                ):
+                elif field == "transcribed_json" or field == "prediction_json":
                     speakers_details = data_item.speakers_json
                     for idx in range(len(ta_transcribed_json)):
                         ta_labels[idx]["text"] = ta_transcribed_json[idx]
@@ -549,42 +527,14 @@ def export_project_in_place(
                             temp = deepcopy(ta_labels[idx])
                             temp["text"] = ta_acoustic_transcribed_json[idx]
                             ta_acoustic_transcribed_json[idx] = temp
-                    if is_StandardizedTranscriptionEditing:
-                        for jdx in range(idx + 1, len(ta_labels)):
-                            temp2 = deepcopy(ta_labels[jdx])
-                            temp2["text"] = (
-                                ta_st_transcribed_json[jdx - 1]
-                                if 0 <= jdx - 1 < len(ta_st_transcribed_json)
-                                else ""
-                            )
-                            speaker_id = next(
-                                speaker
-                                for speaker in speakers_details
-                                if speaker["name"] == ta_labels[jdx]["labels"][0]
-                            )["speaker_id"]
-                            temp2["speaker_id"] = speaker_id
-                            del temp2["labels"]
-                            ta_st_transcribed_json[jdx - 1] = (
-                                temp2
-                                if 0 <= jdx - 1 < len(ta_st_transcribed_json)
-                                else []
-                            )
-                            ta_transcribed_json = {
-                                "verbatim_transcribed_json": ta_labels,
-                                "acoustic_normalised_transcribed_json": ta_acoustic_transcribed_json,
-                                "standardised_transcription": ta_st_transcribed_json,
-                            }
-                            setattr(
-                                data_item, "final_transcribed_json", ta_transcribed_json
-                            )
                     if is_AcousticNormalisedTranscriptionEditing:
                         ta_transcribed_json = {
                             "verbatim_transcribed_json": ta_labels,
                             "acoustic_normalised_transcribed_json": ta_acoustic_transcribed_json,
-                            "standardised_transcription": ta_st_transcribed_json,
+                            "standardised_transcription": "",
                         }
                         setattr(data_item, field, ta_transcribed_json)
-                    elif not is_StandardizedTranscriptionEditing:
+                    else:
                         setattr(data_item, field, ta_labels)
                 elif field == "conversation_json":
                     if is_ConversationVerification:
@@ -703,6 +653,11 @@ def export_project_in_place(
                     if bboxes_relation_json:
                         setattr(data_item, "bboxes_relation_json", bboxes_relation_json)
                     setattr(data_item, field, ta_ocr_transcribed_json)
+                elif field == "final_transcribed_json":
+                    ta_transcribed_json = ann_result_for_ste(
+                        task.correct_annotation.result
+                    )
+                    setattr(data_item, field, ta_transcribed_json)
                 else:
                     setattr(data_item, field, ta[field])
             data_items.append(data_item)
@@ -713,8 +668,6 @@ def export_project_in_place(
         annotation_fields.append("bboxes_relation_json")
     if annotated_document_details_json:
         annotation_fields.append("annotated_document_details_json")
-    if ta_st_transcribed_json:
-        annotation_fields.append("final_transcribed_json")
     dataset_model.objects.bulk_update(data_items, annotation_fields)
 
     tasks = tasks.exclude(id__in=export_excluded_task_ids)
diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index a43d55bed..8ca33a9a5 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -25,8 +25,11 @@
 import yaml
 from yaml.loader import SafeLoader
 from jiwer import wer
-
-from utils.convert_result_to_chitralekha_format import create_memory
+from users.utils import generate_random_string
+from utils.convert_result_to_chitralekha_format import (
+    create_memory,
+    convert_fractional_time_to_formatted,
+)
 
 
 nltk.download("punkt")
@@ -435,7 +438,7 @@ def process_speech_results(
             speakers_json,
             True,
             False,
-            True,
+            False,
             True,
         )
         task["data"]["transcribed_json"] = task["data"]["final_transcribed_json"]
@@ -553,50 +556,204 @@ def convert_time_to_seconds(time_str):
 
 def parse_json_for_ste(input_data_id):
     data_item = SpeechConversation.objects.get(pk=input_data_id)
-    input_data = (
-        json.loads(data_item.transcribed_json)
-        if isinstance(data_item.transcribed_json, str)
-        else data_item.transcribed_json
+    data = (
+        json.loads(data_item.final_transcribed_json)
+        if isinstance(data_item.final_transcribed_json, str)
+        else data_item.final_transcribed_json
     )
-    if not input_data:
+    if not data:
         return []
-    acoustic_normalised = json.loads(input_data["acoustic_normalised_transcribed_json"])
-    standardised_transcription = json.loads(input_data["standardised_transcription"])
-    result = []
-    id_counter = 1
-
-    # Function to convert float seconds to hh:mm:ss.ms format
-    def format_time(seconds):
-        td = datetime.timedelta(seconds=seconds)
-        return str(td)
-
-    # Combine all transcriptions into one list
-    for item in acoustic_normalised:
-        result.append(
-            {
-                "text": item["text"],
-                "end_time": format_time(item["end"]),
-                "speaker_id": f"{item['speaker_id']}",
-                "start_time": format_time(item["start"]),
-                "id": id_counter,
-                "acoustic_normalised_text": item["text"],
+    matched_items = {}
+    total_duration = data.audio_duration
+    for item in data["verbatim_transcribed_json"]:
+        start_end = (item["start"], item["end"])
+        if start_end not in matched_items:
+            matched_items[start_end] = {
+                "verbatim_transcribed_json": [],
+                "vSpeaker_id": [],
+                "standardised_transcription": [],
+                "sSpeaker_id": [],
+                "acoustic_normalised_transcribed_json": [],
             }
-        )
-        id_counter += 1
-
-    for item in standardised_transcription:
-        result.append(
-            {
-                "acoustic_standardized_text": item["text"],
-                "end_time": format_time(item["end"]),
-                "speaker_id": f"Speaker {item['speaker_id']}",
-                "start_time": format_time(item["start"]),
-                "id": id_counter,
+        if "text" in item:
+            matched_items[start_end]["verbatim_transcribed_json"].append(item["text"])
+        if "speaker_id" in item:
+            matched_items[start_end]["vSpeaker_id"].append(item["speaker_id"])
+
+    for item in data["standardised_transcription"]:
+        start_end = (item["start"], item["end"])
+        if start_end not in matched_items:
+            matched_items[start_end] = {
+                "verbatim_transcribed_json": [],
+                "vSpeaker_id": [],
+                "standardised_transcription": [],
+                "sSpeaker_id": [],
+                "acoustic_normalised_transcribed_json": [],
+            }
+        if "text" in item:
+            matched_items[start_end]["standardised_transcription"].append(item["text"])
+        if "speaker_id" in item:
+            matched_items[start_end]["sSpeaker_id"].append(item["speaker_id"])
+
+    for item in data["acoustic_normalised_transcribed_json"]:
+        start_end = (item["start"], item["end"])
+        if start_end not in matched_items:
+            matched_items[start_end] = {
+                "verbatim_transcribed_json": [],
+                "vSpeaker_id": [],
+                "standardised_transcription": [],
+                "sSpeaker_id": [],
+                "acoustic_normalised_transcribed_json": [],
             }
+        matched_items[start_end]["acoustic_normalised_transcribed_json"].append(
+            item["text"]
         )
-        id_counter += 1
 
-    # Sort the result by start_time and then by the presence of 'acoustic_normalised_text'
-    result.sort(key=lambda x: (x["id"], "acoustic_normalised_text" not in x))
-
-    return result
+    output = []
+    idx = 1
+    for start_end, item in matched_items.items():
+        start, end = start_end
+        tempId_vb = f"shoonya_{idx}s{generate_random_string(13 - len(str(idx)))}"
+        tempId_st = f"shoonya_{idx}s{generate_random_string(13 - len(str(idx)))}"
+        if (
+            item["acoustic_normalised_transcribed_json"]
+            and item["verbatim_transcribed_json"]
+        ):
+            output.append(
+                {
+                    "id": tempId_vb,
+                    "type": "textarea",
+                    "value": {
+                        "end": end,
+                        "text": item["acoustic_normalised_transcribed_json"],
+                        "start": start,
+                    },
+                    "origin": "manual",
+                    "to_name": "audio_url",
+                    "from_name": "acoustic_normalised_transcribed_json",
+                    "original_length": total_duration,
+                }
+            )
+            output.append(
+                {
+                    "id": tempId_vb,
+                    "type": "labels",
+                    "value": {
+                        "end": end,
+                        "start": start,
+                        "labels": item["vSpeaker_id"],
+                    },
+                    "origin": "manual",
+                    "to_name": "audio_url",
+                    "from_name": "labels",
+                    "original_length": total_duration,
+                }
+            )
+            output.append(
+                {
+                    "id": tempId_vb,
+                    "type": "textarea",
+                    "value": {
+                        "end": end,
+                        "text": item["verbatim_transcribed_json"],
+                        "start": start,
+                    },
+                    "origin": "manual",
+                    "to_name": "audio_url",
+                    "from_name": "verbatim_transcribed_json",
+                    "original_length": total_duration,
+                }
+            )
+        if item["standardised_transcription"]:
+            output.append(
+                {
+                    "id": tempId_st,
+                    "type": "labels",
+                    "value": {
+                        "end": end,
+                        "start": start,
+                        "labels": item["sSpeaker_id"],
+                    },
+                    "origin": "manual",
+                    "to_name": "audio_url",
+                    "from_name": "labels",
+                    "original_length": total_duration,
+                }
+            )
+            output.append(
+                {
+                    "id": tempId_st,
+                    "type": "textarea",
+                    "value": {
+                        "end": end,
+                        "text": [item["standardised_transcription"]],
+                        "start": start,
+                    },
+                    "origin": "manual",
+                    "to_name": "audio_url",
+                    "from_name": "acoustic_standardised_transcribed_json",
+                    "original_length": total_duration,
+                }
+            )
+        idx += 1
+    return output
+
+
+def ann_result_for_ste(ann_result):
+    vb_list = []
+    ac_list = []
+    st_list = []
+    sId = "Speaker 0"
+    for i, a in enumerate(ann_result):
+        if a["from_name"] == "labels":
+            continue
+        elif a["from_name"] == "acoustic_normalised_transcribed_json":
+            text = a["value"]["text"][0]
+            if i + 1 < len(ann_result) and ann_result[i + 1]["from_name"] == "labels":
+                try:
+                    sId = ann_result[i + 1]["value"]["labels"][0]
+                except Exception as e:
+                    sId = "Speaker 0"
+            ac_list.append(
+                {
+                    "speaker_id": sId,
+                    "start": a["value"]["start"],
+                    "end": a["value"]["end"],
+                    "text": text,
+                }
+            )
+        elif a["from_name"] == "verbatim_transcribed_json":
+            text = a["value"]["text"][0]
+            if i - 1 > 0 and ann_result[i - 1]["from_name"] == "labels":
+                try:
+                    sId = ann_result[i - 1]["value"]["labels"][0]
+                except Exception as e:
+                    sId = "Speaker 0"
+            vb_list.append(
+                {
+                    "speaker_id": sId,
+                    "start": a["value"]["start"],
+                    "end": a["value"]["end"],
+                    "text": text,
+                }
+            )
+        elif a["from_name"] == "acoustic_standardised_transcribed_json":
+            text = a["value"]["text"][0]
+            if i - 1 > 0 and ann_result[i - 1]["from_name"] == "labels":
+                try:
+                    sId = ann_result[i - 1]["value"]["labels"][0]
+                except Exception as e:
+                    sId = "Speaker 0"
+            st_list.append(
+                {
+                    "speaker_id": sId,
+                    "start": a["value"]["start"],
+                    "end": a["value"]["end"],
+                    "text": text,
+                }
+            )
+    return {
+        "verbatim_transcribed_json": vb_list,
+        "acoustic_normalised_transcribed_json": ac_list,
+        "standardised_transcription": st_list,
+    }
diff --git a/backend/projects/views.py b/backend/projects/views.py
index 9ff99ed8c..cc5f6a238 100644
--- a/backend/projects/views.py
+++ b/backend/projects/views.py
@@ -90,6 +90,7 @@
     get_audio_transcription_duration,
     get_audio_segments_count,
     calculate_word_error_rate_between_two_audio_transcription_annotation,
+    ann_result_for_ste,
 )
 
 from users.utils import generate_random_string
@@ -1047,23 +1048,17 @@ def convert_annotation_result_to_formatted_json(
 ):
     transcribed_json = []
     acoustic_transcribed_json = []
-    standardised_json_modified = []
+    standardised_transcription = ""
     transcribed_json_modified, acoustic_transcribed_json_modified = [], []
-    if is_SpeechConversation:
+    if is_StandardisedTranscriptionEditing:
+        transcribed_json = ann_result_for_ste(annotation_result)
+    elif is_SpeechConversation:
         ids_formatted = {}
         for idx1 in range(len(annotation_result)):
-            if (
-                "id" in annotation_result[idx1]
-                and annotation_result[idx1]["id"] in ids_formatted
-            ):
-                continue
             formatted_result_dict = {}
             labels_dict = {}
             text_dict = {}
             acoustic_text_dict = {}
-            st_labels_dict = {}
-            st_text_dict = {}
-            st_formatted_result_dict = {}
             if isinstance(annotation_result[idx1], str):
                 annotation_result[idx1] = json.loads(annotation_result[idx1])
             if annotation_result[idx1]["from_name"] == "labels":
@@ -1077,6 +1072,7 @@ def convert_annotation_result_to_formatted_json(
                 )
                 acoustic_text_dict = annotation_result[idx1]
             else:
+                text_dict = json.dumps(annotation_result[idx1], ensure_ascii=False)
                 text_dict = annotation_result[idx1]
             for idx2 in range(idx1 + 1, len(annotation_result)):
                 if annotation_result[idx1]["id"] == annotation_result[idx2]["id"]:
@@ -1093,93 +1089,50 @@ def convert_annotation_result_to_formatted_json(
                         text_dict = json.dumps(
                             annotation_result[idx2], ensure_ascii=False
                         )
-                    if not is_StandardisedTranscriptionEditing:
-                        if not is_acoustic or (
-                            labels_dict and acoustic_text_dict and text_dict
-                        ):
-                            break
-                elif is_StandardisedTranscriptionEditing:
-                    if (
-                        annotation_result[idx2]["from_name"] == "labels"
-                        and (idx2 + 1) < len(annotation_result)
-                        and annotation_result[idx2 + 1]["from_name"]
-                        == "acoustic_standardised_transcribed_json"
-                    ):
-                        st_labels_dict = annotation_result[idx2]
-                    elif (
-                        annotation_result[idx2]["from_name"]
-                        == "acoustic_standardised_transcribed_json"
+                    if not is_acoustic or (
+                        labels_dict and acoustic_text_dict and text_dict
                     ):
-                        st_text_dict = annotation_result[idx2]
+                        break
 
-            ids_formatted[annotation_result[idx1]["id"]] = "formatted"
-            if not labels_dict:
-                formatted_result_dict["speaker_id"] = None
-            else:
-                try:
-                    formatted_result_dict["speaker_id"] = next(
-                        speaker
-                        for speaker in speakers_json
-                        if speaker["name"] == labels_dict["value"]["labels"][0]
-                    )["speaker_id"]
-                except (KeyError, StopIteration):
+            if annotation_result[idx1]["id"] not in ids_formatted:
+                ids_formatted[annotation_result[idx1]["id"]] = "formatted"
+                if not labels_dict:
                     formatted_result_dict["speaker_id"] = None
-                formatted_result_dict["start"] = labels_dict["value"]["start"]
-                formatted_result_dict["end"] = labels_dict["value"]["end"]
-
-            if not text_dict:
-                formatted_result_dict["text"] = ""
-            else:
-                text_dict_json = (
-                    json.loads(text_dict) if isinstance(text_dict, str) else text_dict
-                )
-                formatted_result_dict["text"] = text_dict_json["value"]["text"][0]
-                formatted_result_dict["start"] = text_dict_json["value"]["start"]
-                formatted_result_dict["end"] = text_dict_json["value"]["end"]
-
-            transcribed_json.append(formatted_result_dict)
-
-            if is_StandardisedTranscriptionEditing:
-                if not st_labels_dict:
-                    st_formatted_result_dict["speaker_id"] = None
                 else:
                     try:
-                        st_formatted_result_dict["speaker_id"] = next(
+                        formatted_result_dict["speaker_id"] = next(
                             speaker
                             for speaker in speakers_json
-                            if speaker["name"] == st_labels_dict["value"]["labels"][0]
+                            if speaker["name"] == labels_dict["value"]["labels"][0]
                         )["speaker_id"]
                     except (KeyError, StopIteration):
-                        st_formatted_result_dict["speaker_id"] = None
-                    st_formatted_result_dict["start"] = st_labels_dict["value"]["start"]
-                    st_formatted_result_dict["end"] = st_labels_dict["value"]["end"]
+                        formatted_result_dict["speaker_id"] = None
+                    formatted_result_dict["start"] = labels_dict["value"]["start"]
+                    formatted_result_dict["end"] = labels_dict["value"]["end"]
 
-                if not st_text_dict:
-                    st_formatted_result_dict["text"] = ""
+                if not text_dict:
+                    formatted_result_dict["text"] = ""
                 else:
-                    text_dict_json = (
-                        json.loads(st_text_dict)
-                        if isinstance(st_text_dict, str)
-                        else st_text_dict
+                    text_dict_json = json.loads(text_dict)
+                    formatted_result_dict["text"] = text_dict_json["value"]["text"][0]
+                    formatted_result_dict["start"] = text_dict_json["value"]["start"]
+                    formatted_result_dict["end"] = text_dict_json["value"]["end"]
+
+                transcribed_json.append(formatted_result_dict)
+
+                if is_acoustic:
+                    acoustic_formatted_result_dict = deepcopy(formatted_result_dict)
+                    acoustic_dict_json = (
+                        json.loads(acoustic_text_dict)
+                        if isinstance(acoustic_text_dict, str)
+                        else acoustic_text_dict
                     )
-                    st_formatted_result_dict["text"] = text_dict_json["value"]["text"][
-                        0
-                    ]
-                    st_formatted_result_dict["start"] = text_dict_json["value"]["start"]
-                    st_formatted_result_dict["end"] = text_dict_json["value"]["end"]
-                standardised_json_modified.append(st_formatted_result_dict)
-
-            if is_acoustic:
-                acoustic_formatted_result_dict = deepcopy(formatted_result_dict)
-                acoustic_dict_json = (
-                    json.loads(acoustic_text_dict)
-                    if isinstance(acoustic_text_dict, str)
-                    else acoustic_text_dict
-                )
-                acoustic_formatted_result_dict["text"] = (
-                    acoustic_dict_json["value"]["text"][0] if acoustic_dict_json else ""
-                )
-                acoustic_transcribed_json.append(acoustic_formatted_result_dict)
+                    acoustic_formatted_result_dict["text"] = (
+                        acoustic_dict_json["value"]["text"][0]
+                        if acoustic_dict_json
+                        else ""
+                    )
+                    acoustic_transcribed_json.append(acoustic_formatted_result_dict)
         if acoustic_transcribed_json:
             acoustic_transcribed_json_modified = json.dumps(
                 acoustic_transcribed_json, ensure_ascii=False
@@ -1245,21 +1198,12 @@ def convert_annotation_result_to_formatted_json(
                 )
             transcribed_json.append(formatted_result_dict)
     transcribed_json_modified = json.dumps(transcribed_json, ensure_ascii=False)
-    standardised_json_modified = json.dumps(
-        standardised_json_modified, ensure_ascii=False
-    )
 
     if is_acoustic:
-        if is_StandardisedTranscriptionEditing:
-            return {
-                "verbatim_transcribed_json": transcribed_json_modified,
-                "acoustic_normalised_transcribed_json": acoustic_transcribed_json_modified,
-                "standardised_transcription": standardised_json_modified,
-            }
         return {
             "verbatim_transcribed_json": transcribed_json_modified,
             "acoustic_normalised_transcribed_json": acoustic_transcribed_json_modified,
-            "standardised_transcription": [],
+            "standardised_transcription": standardised_transcription,
         }
 
     return transcribed_json_modified
@@ -2478,7 +2422,7 @@ def assign_new_tasks(self, request, pk, *args, **kwargs):
                         result = parse_json_for_ste(task.input_data.id)
                     except Exception as e:
                         print(
-                            f"The prediction json of the data item-{task.input_data.id} is corrupt."
+                            f"The final_transcribed_json json of the data item-{task.input_data.id} is corrupt."
                         )
                         task.delete()
                         continue
@@ -4320,7 +4264,7 @@ def project_export(self, request, pk=None, *args, **kwargs):
                     return Response(ret_dict, status=ret_status)
 
                 # Call the async task export function for inplace functions
-                export_project_in_place(
+                export_project_in_place.delay(
                     annotation_fields=annotation_fields,
                     project_id=pk,
                     project_type=project_type,
diff --git a/backend/utils/convert_result_to_chitralekha_format.py b/backend/utils/convert_result_to_chitralekha_format.py
index 33e48cb6a..14f72423c 100644
--- a/backend/utils/convert_result_to_chitralekha_format.py
+++ b/backend/utils/convert_result_to_chitralekha_format.py
@@ -16,7 +16,13 @@ def create_memory(result):
                 "text_dict_idx": -1,
                 "acoustic_text_dict_idx": -1,
             }
-        if dict_type == "labels":
+        if (
+            dict_type == "labels"
+            and i + 1 < len(result)
+            and result[i + 1]["from_name"] == "acoustic_standardised_transcribed_json"
+        ):
+            memory[key]["label_dict_st"] = i
+        elif dict_type == "labels":
             memory[key]["labels_dict_idx"] = i
         elif dict_type == "acoustic_normalised_transcribed_json":
             memory[key]["acoustic_text_dict_idx"] = i
@@ -48,7 +54,7 @@ def convert_result_to_chitralekha_format(result, ann_id, project_type):
                 acoustic_dict = result[acoustic_text_dict_idx]
             speaker_id = "Speaker 0"
             seen.add(text_dict_idx)
-        elif text_dict_idx == -1:
+        if text_dict_idx == -1:
             if project_type != "StandardizedTranscriptionEditing":
                 print(
                     f"The data is corrupt for annotation id-{ann_id}, data id- {result[i]['id']}. "
@@ -98,29 +104,28 @@ def convert_result_to_chitralekha_format(result, ann_id, project_type):
         sort_result_by_start_time(modified_result) if len(modified_result) > 0 else []
     )
     if project_type == "StandardizedTranscriptionEditing":
-        standard_chitra_dict = {}
-        for i in range(len(result)):
-            if result[i]["id"] in memory:
-                if "acoustic_standardised_transcribed_json" in memory[result[i]["id"]]:
-                    st_dict = result[
-                        memory[result[i]["id"]][
-                            "acoustic_standardised_transcribed_json"
-                        ]
-                    ]
-                    if not standard_chitra_dict:
-                        lb_dict = result[memory[result[i]["id"]]["labels_dict_idx"]]
-                        standard_chitra_dict = {
-                            "acoustic_standardized_text": st_dict["value"]["text"][0],
-                            "end_time": convert_fractional_time_to_formatted(
-                                st_dict["value"]["end"], ann_id, st_dict["id"]
-                            ),
-                            "speaker_id": lb_dict["value"]["labels"][0],
-                            "start_time": convert_fractional_time_to_formatted(
-                                st_dict["value"]["start"], ann_id, st_dict["id"]
-                            ),
-                            "id": count,
-                        }
-                        modified_result.append(standard_chitra_dict)
+        for key, value in memory.items():
+            if "acoustic_standardised_transcribed_json" in value:
+                standard_chitra_dict = {}
+                st_dict = result[value["acoustic_standardised_transcribed_json"]]
+                if not standard_chitra_dict:
+                    lb_dict = result[value["label_dict_st"]]
+                    try:
+                        sID = lb_dict["value"]["labels"][0]
+                    except Exception as e:
+                        sID = "Speaker 0"
+                    standard_chitra_dict = {
+                        "acoustic_standardized_text": st_dict["value"]["text"][0],
+                        "end_time": convert_fractional_time_to_formatted(
+                            st_dict["value"]["end"], ann_id, st_dict["id"]
+                        ),
+                        "speaker_id": sID,
+                        "start_time": convert_fractional_time_to_formatted(
+                            st_dict["value"]["start"], ann_id, st_dict["id"]
+                        ),
+                        "id": count,
+                    }
+                    modified_result.append(standard_chitra_dict)
     return modified_result
 
 

From a91547fbe8b21155b7aae2a7f181ab2550693038 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Wed, 5 Jun 2024 11:01:33 +0000
Subject: [PATCH 20/44] added total duration

---
 backend/projects/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index 8ca33a9a5..6b2720b29 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -564,7 +564,7 @@ def parse_json_for_ste(input_data_id):
     if not data:
         return []
     matched_items = {}
-    total_duration = data.audio_duration
+    total_duration = data_item.audio_duration
     for item in data["verbatim_transcribed_json"]:
         start_end = (item["start"], item["end"])
         if start_end not in matched_items:

From ba7a4f696ea6a78cc5d431e0becb45b9bb3f4492 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Thu, 6 Jun 2024 04:55:53 +0000
Subject: [PATCH 21/44] changes for transcribed_json

---
 backend/projects/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index 6b2720b29..244f92a82 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -557,9 +557,9 @@ def convert_time_to_seconds(time_str):
 def parse_json_for_ste(input_data_id):
     data_item = SpeechConversation.objects.get(pk=input_data_id)
     data = (
-        json.loads(data_item.final_transcribed_json)
-        if isinstance(data_item.final_transcribed_json, str)
-        else data_item.final_transcribed_json
+        json.loads(data_item.transcribed_json)
+        if isinstance(data_item.transcribed_json, str)
+        else data_item.transcribed_json
     )
     if not data:
         return []

From c8a30728c6640f5b368e08a632a0254b5860a2e3 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Thu, 6 Jun 2024 06:46:26 +0000
Subject: [PATCH 22/44] added ac enabled stage

---
 backend/projects/views.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/backend/projects/views.py b/backend/projects/views.py
index cc5f6a238..d9b9d79c8 100644
--- a/backend/projects/views.py
+++ b/backend/projects/views.py
@@ -2182,7 +2182,10 @@ def create(self, request, *args, **kwargs):
                 proj.metadata_json[
                     "automatic_annotation_creation_mode"
                 ] = automatic_annotation_creation_mode
-            if proj.project_type == "AcousticNormalisedTranscriptionEditing":
+            if proj.project_type in [
+                "AcousticNormalisedTranscriptionEditing",
+                "StandardizedTranscriptionEditing",
+            ]:
                 if proj.metadata_json == None:
                     proj.metadata_json = {}
                 proj.metadata_json["acoustic_enabled_stage"] = (

From bef8b143edbebd4d2bcbf981d565eac9bec8933d Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Thu, 6 Jun 2024 09:06:23 +0000
Subject: [PATCH 23/44] minor fix

---
 backend/projects/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index 244f92a82..a9b32b6cf 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -686,7 +686,7 @@ def parse_json_for_ste(input_data_id):
                     "type": "textarea",
                     "value": {
                         "end": end,
-                        "text": [item["standardised_transcription"]],
+                        "text": item["standardised_transcription"],
                         "start": start,
                     },
                     "origin": "manual",

From 57f53dde7adb88aed9718395c7f81b57f2ca521d Mon Sep 17 00:00:00 2001
From: Pursottam6003 <rahulsah6003@gmail.com>
Date: Tue, 18 Jun 2024 19:44:22 +0530
Subject: [PATCH 24/44] updated the code for creating new logging functionality

---
 backend/loging/serializers.py |  6 ++++
 backend/loging/urls.py        |  7 ++++-
 backend/loging/views.py       | 53 ++++++++++++++++++++++++++++++++++-
 3 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/backend/loging/serializers.py b/backend/loging/serializers.py
index f068e2e4a..d198dbd97 100644
--- a/backend/loging/serializers.py
+++ b/backend/loging/serializers.py
@@ -15,3 +15,9 @@ class TransliterationSerializer(serializers.Serializer):
     source = serializers.CharField()
     language = serializers.CharField()
     steps = SelectionSerializer(many=True)
+
+
+class TransliterationLogSerializer(serializers.Serializer):
+    source_text = serializers.CharField()
+    target_text = serializers.CharField()
+    transliterated_text = serializers.CharField()
diff --git a/backend/loging/urls.py b/backend/loging/urls.py
index 54f9d83d9..d583f9603 100644
--- a/backend/loging/urls.py
+++ b/backend/loging/urls.py
@@ -1,6 +1,6 @@
 from django.urls import path, include
 from rest_framework import routers
-from loging.views import TransliterationSelectionViewSet
+from loging.views import TransliterationSelectionViewSet, TransliterationLogView
 
 router = routers.DefaultRouter()
 
@@ -11,4 +11,9 @@
         TransliterationSelectionViewSet.as_view(),
         name="transliteration_selection",
     ),
+    path(
+        "transliteration-log/",
+        TransliterationLogView.as_view(),
+        name="transliteration-log",
+    ),
 ]
diff --git a/backend/loging/views.py b/backend/loging/views.py
index 16f454ed3..2f4362f2b 100644
--- a/backend/loging/views.py
+++ b/backend/loging/views.py
@@ -1,12 +1,16 @@
 from rest_framework import status
 from rest_framework.response import Response
 from rest_framework.views import APIView
-from loging.serializers import TransliterationSerializer
+from loging.serializers import TransliterationSerializer, TransliterationLogSerializer
 from azure.storage.blob import BlobServiceClient
 from .tasks import retrieve_logs_and_send_through_email
 from users.models import User
 from rest_framework.permissions import IsAuthenticated
 from utils.blob_functions import test_container_connection
+from drf_yasg.utils import swagger_auto_schema
+from rest_framework.permissions import IsAuthenticated
+from rest_framework.decorators import permission_classes
+from rest_framework.decorators import action
 
 import os
 import json
@@ -140,3 +144,50 @@ def get(self, request):
                 {"message": "Failed to retrieve logs", "error": str(e)},
                 status=status.HTTP_500_INTERNAL_SERVER_ERROR,
             )
+
+
+class TransliterationLogView(APIView):
+    def post(self, request):
+        # Validate that the payload contains exactly three words
+        if len(request.data) != 3:
+            return Response(
+                {"error": "Payload must contain exactly three words."},
+                status=status.HTTP_400_BAD_REQUEST,
+            )
+
+        serializer = TransliterationLogSerializer(data=request.data)
+        if serializer.is_valid():
+            # Process the valid data here
+            data = serializer.validated_data
+            self.log_transliteration(data)
+            return Response(serializer.data, status=status.HTTP_201_CREATED)
+        else:
+            return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
+
+    def log_transliteration(self, data):
+        try:
+            current_time = datetime.datetime.now().isoformat()
+            data_with_timestamp = {**data, "timestamp": current_time}
+
+            # Azure Blob Storage setup
+            blob_service_client = BlobServiceClient.from_connection_string(
+                self.AZURE_STORAGE_CONNECTION_STRING
+            )
+            container_client = blob_service_client.get_container_client(
+                self.TRANSLITERATION_CONTAINER_NAME
+            )
+            current_date = datetime.date.today().isoformat()
+            log_file_name = f"{current_date}.log"
+            blob_client = container_client.get_blob_client(log_file_name)
+
+            if not blob_client.exists():
+                blob_client.upload_blob("[]", overwrite=True)
+
+            existing_data = blob_client.download_blob().readall().decode("utf-8")
+            existing_json_data = json.loads(existing_data)
+            existing_json_data.append(data_with_timestamp)
+
+            updated_content = json.dumps(existing_json_data, indent=2)
+            blob_client.upload_blob(updated_content, overwrite=True)
+        except Exception as e:
+            print(f"Failed to log transliteration data: {str(e)}")

From 57778dbe3bd2192180a491299fb5ae9882ccbe39 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Wed, 19 Jun 2024 11:21:03 +0000
Subject: [PATCH 25/44] Initial changes

---
 ...ocument_bboxes_relation_prediction_json.py | 19 +++++
 backend/dataset/models.py                     |  4 ++
 backend/projects/annotation_registry.py       | 11 +++
 .../0054_alter_project_project_type.py        | 67 +++++++++++++++++
 backend/projects/project_registry.yaml        | 20 ++++++
 backend/projects/views.py                     | 71 ++++++-------------
 6 files changed, 141 insertions(+), 51 deletions(-)
 create mode 100644 backend/dataset/migrations/0048_ocrdocument_bboxes_relation_prediction_json.py
 create mode 100644 backend/projects/migrations/0054_alter_project_project_type.py

diff --git a/backend/dataset/migrations/0048_ocrdocument_bboxes_relation_prediction_json.py b/backend/dataset/migrations/0048_ocrdocument_bboxes_relation_prediction_json.py
new file mode 100644
index 000000000..437079846
--- /dev/null
+++ b/backend/dataset/migrations/0048_ocrdocument_bboxes_relation_prediction_json.py
@@ -0,0 +1,19 @@
+# Generated by Django 3.2.14 on 2024-06-19 11:15
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("dataset", "0047_speechconversation_final_transcribed_json"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="ocrdocument",
+            name="bboxes_relation_prediction_json",
+            field=models.JSONField(
+                blank=True, null=True, verbose_name="bboxes_relation_prediction_json"
+            ),
+        ),
+    ]
diff --git a/backend/dataset/models.py b/backend/dataset/models.py
index c1432cbd3..181d1c1bd 100644
--- a/backend/dataset/models.py
+++ b/backend/dataset/models.py
@@ -311,6 +311,10 @@ class OCRDocument(DatasetBase):
         verbose_name="bboxes_relation_json", null=True, blank=True
     )
 
+    bboxes_relation_prediction_json = models.JSONField(
+        verbose_name="bboxes_relation_prediction_json", null=True, blank=True
+    )
+
     annotated_document_details_json = models.JSONField(
         verbose_name="annotated_document_details_json", null=True, blank=True
     )
diff --git a/backend/projects/annotation_registry.py b/backend/projects/annotation_registry.py
index 071f33678..6e28bf0da 100644
--- a/backend/projects/annotation_registry.py
+++ b/backend/projects/annotation_registry.py
@@ -152,6 +152,17 @@
             "type": ["textarea", "labels", "textarea"],
         },
     },
+    "OCRSegmentCategorisationRelationMappingEditing": {
+        "ocr_transcribed_json": {
+            "to_name": "image_url",
+            "from_name": [
+                "annotation_bboxes",
+                "annotation_labels",
+                "annotation_transcripts",
+            ],
+            "type": ["textarea", "labels", "textarea"],
+        },
+    },
     "AcousticNormalisedTranscriptionEditing": {
         "transcribed_json": {
             "to_name": "audio_url",
diff --git a/backend/projects/migrations/0054_alter_project_project_type.py b/backend/projects/migrations/0054_alter_project_project_type.py
new file mode 100644
index 000000000..dd141c349
--- /dev/null
+++ b/backend/projects/migrations/0054_alter_project_project_type.py
@@ -0,0 +1,67 @@
+# Generated by Django 3.2.14 on 2024-06-19 11:16
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("projects", "0053_alter_project_project_type"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="project",
+            name="project_type",
+            field=models.CharField(
+                choices=[
+                    ("MonolingualTranslation", "MonolingualTranslation"),
+                    ("TranslationEditing", "TranslationEditing"),
+                    (
+                        "SemanticTextualSimilarity_Scale5",
+                        "SemanticTextualSimilarity_Scale5",
+                    ),
+                    ("ContextualTranslationEditing", "ContextualTranslationEditing"),
+                    ("OCRTranscription", "OCRTranscription"),
+                    ("OCRTranscriptionEditing", "OCRTranscriptionEditing"),
+                    ("OCRSegmentCategorization", "OCRSegmentCategorization"),
+                    (
+                        "OCRSegmentCategorizationEditing",
+                        "OCRSegmentCategorizationEditing",
+                    ),
+                    (
+                        "OCRSegmentCategorisationRelationMappingEditing",
+                        "OCRSegmentCategorisationRelationMappingEditing",
+                    ),
+                    ("MonolingualCollection", "MonolingualCollection"),
+                    ("SentenceSplitting", "SentenceSplitting"),
+                    (
+                        "ContextualSentenceVerification",
+                        "ContextualSentenceVerification",
+                    ),
+                    (
+                        "ContextualSentenceVerificationAndDomainClassification",
+                        "ContextualSentenceVerificationAndDomainClassification",
+                    ),
+                    ("ConversationTranslation", "ConversationTranslation"),
+                    (
+                        "ConversationTranslationEditing",
+                        "ConversationTranslationEditing",
+                    ),
+                    ("ConversationVerification", "ConversationVerification"),
+                    ("AudioTranscription", "AudioTranscription"),
+                    ("AudioSegmentation", "AudioSegmentation"),
+                    ("AudioTranscriptionEditing", "AudioTranscriptionEditing"),
+                    (
+                        "AcousticNormalisedTranscriptionEditing",
+                        "AcousticNormalisedTranscriptionEditing",
+                    ),
+                    (
+                        "StandardizedTranscriptionEditing",
+                        "StandardizedTranscriptionEditing",
+                    ),
+                ],
+                help_text="Project Type indicating the annotation task",
+                max_length=100,
+            ),
+        ),
+    ]
diff --git a/backend/projects/project_registry.yaml b/backend/projects/project_registry.yaml
index e2de18292..334b850e2 100644
--- a/backend/projects/project_registry.yaml
+++ b/backend/projects/project_registry.yaml
@@ -154,6 +154,26 @@ OCR:
         fields:
           annotations:
             - ocr_transcribed_json
+    OCRSegmentCategorisationRelationMappingEditing:
+      project_mode: "Annotation"
+      label_studio_jsx_file: "ocr/ocr_transcription.jsx"
+      input_dataset:
+        class: OCRDocument
+        fields:
+          - language
+          - image_url
+          - ocr_domain
+        display_fields:
+          - language
+          - image_url
+          - ocr_domain
+        prediction: ocr_prediction_json
+      output_dataset:
+        class: OCRDocument
+        save_type: in_place
+        fields:
+          annotations:
+            - ocr_transcribed_json
 Monolingual:
   description: "Monolingual Data Collection"
   project_types:
diff --git a/backend/projects/views.py b/backend/projects/views.py
index b5b904218..d597595ce 100644
--- a/backend/projects/views.py
+++ b/backend/projects/views.py
@@ -916,56 +916,11 @@ def convert_prediction_json_to_annotation_result(pk, proj_type):
             # mainly label_dict and text_dict are sent as result
             result.append(label_dict)
             result.append(text_dict)
-        # elif proj_type == "StandardisedTranscriptionEditing":
-        #     # convert the prediction_json to a concatinated transcribed_json
-        #     data_item = SpeechConversation.objects.get(pk=pk)
-        #     prediction_json = (
-        #         json.loads(data_item.prediction_json)
-        #         if isinstance(data_item.prediction_json, str)
-        #         else data_item.prediction_json
-        #     )
-        #     speakers_json = data_item.speakers_json
-        #     audio_duration = data_item.audio_duration
-        #     # converting prediction_json to result (wherever it exists) for every task.
-        #     if prediction_json == None:
-        #         return result
-        #     # Initialize variables
-        #     concatenated_text = ""
-        #     min_start_time = float("inf")
-        #     max_end_time = float("-inf")
-        #
-        #     for idx, val in enumerate(prediction_json):
-        #         # Concatenate the text
-        #         concatenated_text += val["text"] + " "
-        #
-        #         # Update the minimum start time and maximum end time
-        #         min_start_time = min(min_start_time, val["start"])
-        #         max_end_time = max(max_end_time, val["end"])
-        #     if concatenated_text:
-        #         concatenated_text.strip()
-        #
-        #     # Create a single dictionary to store the result
-        #     result_dict = {
-        #         "origin": "manual",
-        #         "to_name": "audio_url",
-        #         "from_name": "transcribed_json",
-        #         "original_length": audio_duration,
-        #         "id": f"shoonya_{generate_random_string(13)}",
-        #         "type": "textarea",
-        #         "value": {
-        #             "start": min_start_time,
-        #             "end": max_end_time,
-        #             "text": [concatenated_text],  # Remove trailing space
-        #         },
-        #     }
-
-        # Clear the result array and append the single result dictionary
-        result.clear()
-        result.append(result_dict)
-    elif (
-        proj_type == "OCRTranscriptionEditing"
-        or proj_type == "OCRSegmentCategorizationEditing"
-    ):
+    elif proj_type in [
+        "OCRTranscriptionEditing",
+        "OCRSegmentCategorizationEditing",
+        "OCRSegmentCategorisationRelationMappingEditing",
+    ]:
         data_item = OCRDocument.objects.get(pk=pk)
         ocr_prediction_json = (
             json.loads(data_item.ocr_prediction_json)
@@ -974,6 +929,10 @@ def convert_prediction_json_to_annotation_result(pk, proj_type):
         )
         if ocr_prediction_json == None:
             return result
+        is_OCRSegmentCategorisationRelationMappingEditing = (
+            proj_type == "OCRSegmentCategorisationRelationMappingEditing"
+        )
+        id_set = set()
         for idx, val in enumerate(ocr_prediction_json):
             image_rotation = (
                 ocr_prediction_json["image_rotation"]
@@ -981,6 +940,8 @@ def convert_prediction_json_to_annotation_result(pk, proj_type):
                 else 0
             )
             custom_id = f"shoonya_{idx}s{generate_random_string(13 - len(str(idx)))}"
+            if is_OCRSegmentCategorisationRelationMappingEditing:
+                id_set.add(custom_id)
             # creating values
             common_value = {
                 "x": val["x"],
@@ -1033,7 +994,15 @@ def convert_prediction_json_to_annotation_result(pk, proj_type):
             result.append(rectangle_dict)
             result.append(label_dict)
             result.append(text_dict)
-
+        bboxes_relation_prediction_json = (
+            json.loads(data_item.bboxes_relation_prediction_json)
+            if isinstance(data_item.bboxes_relation_prediction_json, str)
+            else data_item.bboxes_relation_prediction_json
+        )
+        for b in bboxes_relation_prediction_json:
+            if "from_id" in b and "to_id" in b:
+                if b["from_id"] in id_set and b["to_id"] in id_set:
+                    result.append(b)
     return result
 
 

From 45d574605ddd0078de27b55e6daee63c2819fca6 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Fri, 21 Jun 2024 09:49:16 +0000
Subject: [PATCH 26/44] added changes to download and export

---
 backend/projects/utils.py | 13 +++++++++++--
 backend/projects/views.py |  4 ++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index a9b32b6cf..691c41095 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -351,7 +351,10 @@ def process_speech_tasks(task, is_audio_segmentation, project_type):
 
 
 def process_ocr_tasks(
-    task, is_OCRSegmentCategorization, is_OCRSegmentCategorizationEditing
+    task,
+    is_OCRSegmentCategorization,
+    is_OCRSegmentCategorizationEditing,
+    is_OCRSegmentCategorisationRelationMappingEditing,
 ):
     annotation_result = process_annotation_result(task)
     process_ocr_results(
@@ -359,6 +362,7 @@ def process_ocr_tasks(
         annotation_result,
         is_OCRSegmentCategorization,
         is_OCRSegmentCategorizationEditing,
+        is_OCRSegmentCategorisationRelationMappingEditing,
     )
 
 
@@ -457,6 +461,7 @@ def process_ocr_results(
     annotation_result,
     is_OCRSegmentCategorization,
     is_OCRSegmentCategorizationEditing,
+    is_OCRSegmentCategorisationRelationMappingEditing,
 ):
     from projects.views import convert_annotation_result_to_formatted_json
 
@@ -467,7 +472,11 @@ def process_ocr_results(
         is_OCRSegmentCategorization or is_OCRSegmentCategorizationEditing,
         False,
     )
-    if is_OCRSegmentCategorization or is_OCRSegmentCategorizationEditing:
+    if (
+        is_OCRSegmentCategorization
+        or is_OCRSegmentCategorizationEditing
+        or is_OCRSegmentCategorisationRelationMappingEditing
+    ):
         bboxes_relation_json = []
         for ann in annotation_result:
             if "type" in ann and ann["type"] == "relation":
diff --git a/backend/projects/views.py b/backend/projects/views.py
index 1d5f8f976..278d0f64a 100644
--- a/backend/projects/views.py
+++ b/backend/projects/views.py
@@ -4125,6 +4125,9 @@ def download(self, request, pk=None, *args, **kwargs):
                 project_type == "OCRSegmentCategorizationEditing"
             )
             is_OCRSegmentCategorization = project_type == "OCRSegmentCategorization"
+            is_OCRSegmentCategorisationRelationMappingEditing = (
+                project_type == "OCRSegmentCategorisationRelationMappingEditing"
+            )
             for task in tasks:
                 curr_task = process_task(
                     task,
@@ -4154,6 +4157,7 @@ def download(self, request, pk=None, *args, **kwargs):
                             curr_task,
                             is_OCRSegmentCategorization,
                             is_OCRSegmentCategorizationEditing,
+                            is_OCRSegmentCategorisationRelationMappingEditing,
                         )
                 tasks_list.append(curr_task)
             download_resources = True

From c37a807b5f3330c998a67fd73a66a7d4bc311dee Mon Sep 17 00:00:00 2001
From: Pursottam6003 <rahulsah6003@gmail.com>
Date: Sat, 22 Jun 2024 19:00:47 +0530
Subject: [PATCH 27/44] ressolve the blocking api issue part 2

---
 backend/loging/serializers.py | 11 ++++++++---
 backend/loging/views.py       | 35 +++++++++++++++++++++++++----------
 2 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/backend/loging/serializers.py b/backend/loging/serializers.py
index d198dbd97..4fefad4d2 100644
--- a/backend/loging/serializers.py
+++ b/backend/loging/serializers.py
@@ -1,4 +1,5 @@
 from rest_framework import serializers
+import uuid
 
 
 class SelectionSerializer(serializers.Serializer):
@@ -18,6 +19,10 @@ class TransliterationSerializer(serializers.Serializer):
 
 
 class TransliterationLogSerializer(serializers.Serializer):
-    source_text = serializers.CharField()
-    target_text = serializers.CharField()
-    transliterated_text = serializers.CharField()
+    source_english_text = serializers.CharField()
+    indic_translation_text = serializers.CharField()
+    romanised_text = serializers.CharField()
+    edited_romanised_text = serializers.CharField(required=False)
+    language = serializers.CharField()
+    uuid = serializers.UUIDField(default=uuid.uuid4, read_only=True)
+
diff --git a/backend/loging/views.py b/backend/loging/views.py
index 2f4362f2b..7d8ae44a2 100644
--- a/backend/loging/views.py
+++ b/backend/loging/views.py
@@ -2,7 +2,7 @@
 from rest_framework.response import Response
 from rest_framework.views import APIView
 from loging.serializers import TransliterationSerializer, TransliterationLogSerializer
-from azure.storage.blob import BlobServiceClient
+from azure.storage.blob import BlobServiceClient, BlobClient
 from .tasks import retrieve_logs_and_send_through_email
 from users.models import User
 from rest_framework.permissions import IsAuthenticated
@@ -21,6 +21,7 @@
 
 AZURE_STORAGE_CONNECTION_STRING = os.getenv("AZURE_CONNECTION_STRING")
 TRANSLITERATION_CONTAINER_NAME = os.getenv("TRANSLITERATION_CONTAINER_NAME")
+INDIC_ROMANISED_LOGS = os.getenv("INDIC_ROMANISED_LOGS")
 
 
 class CustomJSONEncoder(json.JSONEncoder):
@@ -149,9 +150,9 @@ def get(self, request):
 class TransliterationLogView(APIView):
     def post(self, request):
         # Validate that the payload contains exactly three words
-        if len(request.data) != 3:
+        if len(request.data) != 5:
             return Response(
-                {"error": "Payload must contain exactly three words."},
+                {"error": "Payload must contain exactly six elements."},
                 status=status.HTTP_400_BAD_REQUEST,
             )
 
@@ -166,27 +167,41 @@ def post(self, request):
 
     def log_transliteration(self, data):
         try:
+            # print('data here',data)
             current_time = datetime.datetime.now().isoformat()
             data_with_timestamp = {**data, "timestamp": current_time}
 
+            print('azure string here : ', AZURE_STORAGE_CONNECTION_STRING)
+            print('container name here : ', INDIC_ROMANISED_LOGS)
             # Azure Blob Storage setup
             blob_service_client = BlobServiceClient.from_connection_string(
-                self.AZURE_STORAGE_CONNECTION_STRING
+                AZURE_STORAGE_CONNECTION_STRING
             )
+
             container_client = blob_service_client.get_container_client(
-                self.TRANSLITERATION_CONTAINER_NAME
+                INDIC_ROMANISED_LOGS
             )
-            current_date = datetime.date.today().isoformat()
-            log_file_name = f"{current_date}.log"
-            blob_client = container_client.get_blob_client(log_file_name)
 
+            # print(blob_service_client)
+            # INDIC_ROMANISED_LOGS = "indic-romanised-logs"
+            # blob_client = BlobClient.from_blob_url(blob_url)
+            # container_client = blob_service_client.get_container_client(INDIC_ROMANISED_LOGS)
+            # container_client = blob_service_client.get_container_client(
+            #     INDIC_ROMANISED_LOGS
+            # )
+            # print('container client',container_client)
+            # create a single log file which contains logs of all transliterations 
+
+            log_file_name= "main_transliteration.log"
+            blob_client = container_client.get_blob_client(log_file_name)
+            print('the code is at 199')
+            print(blob_client.exists())
             if not blob_client.exists():
                 blob_client.upload_blob("[]", overwrite=True)
-
+            print('code at 203 line')
             existing_data = blob_client.download_blob().readall().decode("utf-8")
             existing_json_data = json.loads(existing_data)
             existing_json_data.append(data_with_timestamp)
-
             updated_content = json.dumps(existing_json_data, indent=2)
             blob_client.upload_blob(updated_content, overwrite=True)
         except Exception as e:

From a8de12c18f8475b2a96800f1127993e1e19b854b Mon Sep 17 00:00:00 2001
From: Pursottam6003 <rahulsah6003@gmail.com>
Date: Sat, 22 Jun 2024 19:05:54 +0530
Subject: [PATCH 28/44] formatted the code with black

---
 backend/loging/serializers.py |  1 -
 backend/loging/views.py       | 12 ++++++------
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/backend/loging/serializers.py b/backend/loging/serializers.py
index 4fefad4d2..85a84c0d1 100644
--- a/backend/loging/serializers.py
+++ b/backend/loging/serializers.py
@@ -25,4 +25,3 @@ class TransliterationLogSerializer(serializers.Serializer):
     edited_romanised_text = serializers.CharField(required=False)
     language = serializers.CharField()
     uuid = serializers.UUIDField(default=uuid.uuid4, read_only=True)
-
diff --git a/backend/loging/views.py b/backend/loging/views.py
index 7d8ae44a2..76d4e321f 100644
--- a/backend/loging/views.py
+++ b/backend/loging/views.py
@@ -171,8 +171,8 @@ def log_transliteration(self, data):
             current_time = datetime.datetime.now().isoformat()
             data_with_timestamp = {**data, "timestamp": current_time}
 
-            print('azure string here : ', AZURE_STORAGE_CONNECTION_STRING)
-            print('container name here : ', INDIC_ROMANISED_LOGS)
+            print("azure string here : ", AZURE_STORAGE_CONNECTION_STRING)
+            print("container name here : ", INDIC_ROMANISED_LOGS)
             # Azure Blob Storage setup
             blob_service_client = BlobServiceClient.from_connection_string(
                 AZURE_STORAGE_CONNECTION_STRING
@@ -190,15 +190,15 @@ def log_transliteration(self, data):
             #     INDIC_ROMANISED_LOGS
             # )
             # print('container client',container_client)
-            # create a single log file which contains logs of all transliterations 
+            # create a single log file which contains logs of all transliterations
 
-            log_file_name= "main_transliteration.log"
+            log_file_name = "main_transliteration.log"
             blob_client = container_client.get_blob_client(log_file_name)
-            print('the code is at 199')
+            print("the code is at 199")
             print(blob_client.exists())
             if not blob_client.exists():
                 blob_client.upload_blob("[]", overwrite=True)
-            print('code at 203 line')
+            print("code at 203 line")
             existing_data = blob_client.download_blob().readall().decode("utf-8")
             existing_json_data = json.loads(existing_data)
             existing_json_data.append(data_with_timestamp)

From c8d27fd0c428fc7177d040fd73d88f2275e52e04 Mon Sep 17 00:00:00 2001
From: Pursottam6003 <rahulsah6003@gmail.com>
Date: Sat, 22 Jun 2024 20:41:06 +0530
Subject: [PATCH 29/44] ressolved the logging error

---
 backend/loging/views.py | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/backend/loging/views.py b/backend/loging/views.py
index 76d4e321f..d22b1e90c 100644
--- a/backend/loging/views.py
+++ b/backend/loging/views.py
@@ -171,8 +171,6 @@ def log_transliteration(self, data):
             current_time = datetime.datetime.now().isoformat()
             data_with_timestamp = {**data, "timestamp": current_time}
 
-            print("azure string here : ", AZURE_STORAGE_CONNECTION_STRING)
-            print("container name here : ", INDIC_ROMANISED_LOGS)
             # Azure Blob Storage setup
             blob_service_client = BlobServiceClient.from_connection_string(
                 AZURE_STORAGE_CONNECTION_STRING
@@ -182,27 +180,18 @@ def log_transliteration(self, data):
                 INDIC_ROMANISED_LOGS
             )
 
-            # print(blob_service_client)
-            # INDIC_ROMANISED_LOGS = "indic-romanised-logs"
-            # blob_client = BlobClient.from_blob_url(blob_url)
-            # container_client = blob_service_client.get_container_client(INDIC_ROMANISED_LOGS)
-            # container_client = blob_service_client.get_container_client(
-            #     INDIC_ROMANISED_LOGS
-            # )
-            # print('container client',container_client)
-            # create a single log file which contains logs of all transliterations
-
-            log_file_name = "main_transliteration.log"
+            log_file_name = "romanised_transliteration.log"
             blob_client = container_client.get_blob_client(log_file_name)
-            print("the code is at 199")
-            print(blob_client.exists())
+
             if not blob_client.exists():
+                # upload some data to the blob
                 blob_client.upload_blob("[]", overwrite=True)
-            print("code at 203 line")
+
             existing_data = blob_client.download_blob().readall().decode("utf-8")
             existing_json_data = json.loads(existing_data)
             existing_json_data.append(data_with_timestamp)
             updated_content = json.dumps(existing_json_data, indent=2)
             blob_client.upload_blob(updated_content, overwrite=True)
+            print("Logged transliteration data successfully")
         except Exception as e:
             print(f"Failed to log transliteration data: {str(e)}")

From 19b1aba4eae1ac5e9f8991d2901894ef351c9302 Mon Sep 17 00:00:00 2001
From: Pursottam6003 <rahulsah6003@gmail.com>
Date: Mon, 24 Jun 2024 15:45:31 +0530
Subject: [PATCH 30/44] updated the code for backend to create a new log file
 with respect to current date

---
 backend/loging/views.py | 37 ++++++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/backend/loging/views.py b/backend/loging/views.py
index d22b1e90c..36e728738 100644
--- a/backend/loging/views.py
+++ b/backend/loging/views.py
@@ -161,7 +161,10 @@ def post(self, request):
             # Process the valid data here
             data = serializer.validated_data
             self.log_transliteration(data)
-            return Response(serializer.data, status=status.HTTP_201_CREATED)
+            return Response(
+                {"message": "Data stored in Azure Blob successfully"},
+                status=status.HTTP_201_CREATED,
+            )
         else:
             return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
 
@@ -172,6 +175,9 @@ def log_transliteration(self, data):
             data_with_timestamp = {**data, "timestamp": current_time}
 
             # Azure Blob Storage setup
+            current_date = datetime.date.today()
+            log_file_name = f"{current_date.isoformat()}.log"
+
             blob_service_client = BlobServiceClient.from_connection_string(
                 AZURE_STORAGE_CONNECTION_STRING
             )
@@ -180,7 +186,7 @@ def log_transliteration(self, data):
                 INDIC_ROMANISED_LOGS
             )
 
-            log_file_name = "romanised_transliteration.log"
+            # log_file_name = "romanised_transliteration.log"
             blob_client = container_client.get_blob_client(log_file_name)
 
             if not blob_client.exists():
@@ -190,8 +196,33 @@ def log_transliteration(self, data):
             existing_data = blob_client.download_blob().readall().decode("utf-8")
             existing_json_data = json.loads(existing_data)
             existing_json_data.append(data_with_timestamp)
-            updated_content = json.dumps(existing_json_data, indent=2)
+
+            updated_content = json.dumps(existing_json_data, cls=CustomJSONEncoder)
             blob_client.upload_blob(updated_content, overwrite=True)
+
+            create_empty_log_for_next_day(container_client)
+
+            main_transliteration_blob_client = container_client.get_blob_client(
+                "romanised_transliteration.log"
+            )
+
+            if not main_transliteration_blob_client.exists():
+                main_transliteration_blob_client.upload_blob("[]", overwrite=True)
+
+            main_transliteration_data = (
+                main_transliteration_blob_client.download_blob()
+                .readall()
+                .decode("utf-8")
+            )
+            main_transliteration_json_data = json.loads(main_transliteration_data)
+            main_transliteration_json_data.append(data_with_timestamp)
+            main_updated_content = json.dumps(
+                main_transliteration_json_data, cls=CustomJSONEncoder
+            )
+            main_transliteration_blob_client.upload_blob(
+                main_updated_content, overwrite=True
+            )
+
             print("Logged transliteration data successfully")
         except Exception as e:
             print(f"Failed to log transliteration data: {str(e)}")

From fec9f3f99807f230d32ec7743991a2f650a54748 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Thu, 27 Jun 2024 01:27:19 +0000
Subject: [PATCH 31/44] small_fix

---
 backend/projects/views.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/backend/projects/views.py b/backend/projects/views.py
index 278d0f64a..13ccae6c6 100644
--- a/backend/projects/views.py
+++ b/backend/projects/views.py
@@ -1000,6 +1000,11 @@ def convert_prediction_json_to_annotation_result(pk, proj_type):
             if isinstance(data_item.bboxes_relation_prediction_json, str)
             else data_item.bboxes_relation_prediction_json
         )
+        bboxes_relation_prediction_json = (
+            []
+            if bboxes_relation_prediction_json is None
+            else bboxes_relation_prediction_json
+        )
         for b in bboxes_relation_prediction_json:
             if "from_id" in b and "to_id" in b:
                 if b["from_id"] in id_set and b["to_id"] in id_set:
@@ -2387,6 +2392,7 @@ def assign_new_tasks(self, request, pk, *args, **kwargs):
                 "OCRTranscriptionEditing",
                 "OCRSegmentCategorizationEditing",
                 "StandardizedTranscriptionEditing",
+                "OCRSegmentCategorisationRelationMappingEditing",
             ]:
                 if project.project_type == "StandardizedTranscriptionEditing":
                     try:

From f056eb0acfef08871731ac3398f8a37b5a713279 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Thu, 27 Jun 2024 14:42:04 +0000
Subject: [PATCH 32/44] added duplicate annotation message

---
 backend/tasks/views.py | 55 ++++++++++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 18 deletions(-)

diff --git a/backend/tasks/views.py b/backend/tasks/views.py
index af0d1071f..e7ee3a445 100644
--- a/backend/tasks/views.py
+++ b/backend/tasks/views.py
@@ -58,6 +58,7 @@
 import sacrebleu
 
 from utils.date_time_conversions import utc_to_ist
+from django.db import IntegrityError
 
 # Create your views here.
 
@@ -1813,11 +1814,14 @@ def partial_update(self, request, pk=None):
                     annotation_obj.annotation_notes = request.data["annotation_notes"]
                     update_fields_list.append("annotation_notes")
                 annotation_obj.lead_time = request.data["lead_time"]
-                annotation_obj.save(update_fields=update_fields_list)
-                annotation_response = Response(
-                    AnnotationSerializer(annotation_obj).data
-                )
-                response_message = "Success"
+                try:
+                    annotation_obj.save(update_fields=update_fields_list)
+                    annotation_response = Response(
+                        AnnotationSerializer(annotation_obj).data
+                    )
+                    response_message = "Success"
+                except IntegrityError as e:
+                    response_message = "This task is having duplicate annotation. Please deallocate this task"
             else:
                 if "annotation_status" in dict(request.data) and request.data[
                     "annotation_status"
@@ -1881,7 +1885,10 @@ def partial_update(self, request, pk=None):
                             },
                             status=status.HTTP_400_BAD_REQUEST,
                         )
-                annotation_response = super().partial_update(request)
+                try:
+                    annotation_response = super().partial_update(request)
+                except IntegrityError as e:
+                    response_message = "This task is having duplicate annotation. Please deallocate this task"
                 annotation_id = annotation_response.data["id"]
                 annotation = Annotation.objects.get(pk=annotation_id)
                 task = annotation.task
@@ -1948,11 +1955,14 @@ def partial_update(self, request, pk=None):
                     annotation_obj.review_notes = request.data["review_notes"]
                     update_fields_list.append("review_notes")
                 annotation_obj.lead_time = request.data["lead_time"]
-                annotation_obj.save(update_fields=update_fields_list)
-                annotation_response = Response(
-                    AnnotationSerializer(annotation_obj).data
-                )
-                response_message = "Success"
+                try:
+                    annotation_obj.save(update_fields=update_fields_list)
+                    annotation_response = Response(
+                        AnnotationSerializer(annotation_obj).data
+                    )
+                    response_message = "Success"
+                except IntegrityError as e:
+                    response_message = "This task is having duplicate annotation. Please deallocate this task"
 
             else:
                 if "annotation_status" in dict(request.data) and request.data[
@@ -2055,7 +2065,10 @@ def partial_update(self, request, pk=None):
                             },
                             status=status.HTTP_400_BAD_REQUEST,
                         )
-                annotation_response = super().partial_update(request)
+                try:
+                    annotation_response = super().partial_update(request)
+                except IntegrityError as e:
+                    response_message = "This task is having duplicate annotation. Please deallocate this task"
                 annotation_id = annotation_response.data["id"]
                 annotation = Annotation.objects.get(pk=annotation_id)
                 task = annotation.task
@@ -2149,11 +2162,14 @@ def partial_update(self, request, pk=None):
                     annotation_obj.supercheck_notes = request.data["supercheck_notes"]
                     update_fields_list.append("supercheck_notes")
                 annotation_obj.lead_time = request.data["lead_time"]
-                annotation_obj.save(update_fields=update_fields_list)
-                annotation_response = Response(
-                    AnnotationSerializer(annotation_obj).data
-                )
-                response_message = "Success"
+                try:
+                    annotation_obj.save(update_fields=update_fields_list)
+                    annotation_response = Response(
+                        AnnotationSerializer(annotation_obj).data
+                    )
+                    response_message = "Success"
+                except IntegrityError as e:
+                    response_message = "This task is having duplicate annotation. Please deallocate this task"
 
             else:
                 if "annotation_status" in dict(request.data) and request.data[
@@ -2246,7 +2262,10 @@ def partial_update(self, request, pk=None):
                             },
                             status=status.HTTP_400_BAD_REQUEST,
                         )
-                annotation_response = super().partial_update(request)
+                try:
+                    annotation_response = super().partial_update(request)
+                except IntegrityError as e:
+                    response_message = "This task is having duplicate annotation. Please deallocate this task"
                 annotation_id = annotation_response.data["id"]
                 annotation = Annotation.objects.get(pk=annotation_id)
 

From 759e0da78a028bc71a5b6d89f1a55aa893e7de5a Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Fri, 28 Jun 2024 08:27:55 +0000
Subject: [PATCH 33/44] celery fix

---
 backend/dataset/tasks.py          | 10 ++++------
 backend/notifications/tasks.py    |  2 +-
 backend/projects/tasks.py         |  8 ++++----
 backend/shoonya_backend/celery.py |  1 +
 4 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/backend/dataset/tasks.py b/backend/dataset/tasks.py
index 793521c35..4d84a66c4 100644
--- a/backend/dataset/tasks.py
+++ b/backend/dataset/tasks.py
@@ -12,11 +12,9 @@
 #### CELERY SHARED TASKS
 
 
-@shared_task(
-    bind=True,
-)
+@shared_task(queue="default")
 def upload_data_to_data_instance(
-    self, dataset_string, pk, dataset_type, content_type, deduplicate=False
+    dataset_string, pk, dataset_type, content_type, deduplicate=False
 ):
     # sourcery skip: raise-specific-error
     """Celery background task to upload the data to the dataset instance through file upload.
@@ -102,8 +100,8 @@ def upload_data_to_data_instance(
         raise Exception(f"Upload failed for lines: {failed_rows}")
 
 
-@shared_task(bind=True)
-def deduplicate_dataset_instance_items(self, pk, deduplicate_field_list):
+@shared_task(queue="default")
+def deduplicate_dataset_instance_items(pk, deduplicate_field_list):
     if len(deduplicate_field_list) == 0:
         return "Field list cannot be empty"
     try:
diff --git a/backend/notifications/tasks.py b/backend/notifications/tasks.py
index 34388cb75..9a6370d1c 100644
--- a/backend/notifications/tasks.py
+++ b/backend/notifications/tasks.py
@@ -22,7 +22,7 @@ def delete_excess_Notification(user):
     return 0
 
 
-# @shared_task
+# @shared_task(queue: "default")
 def create_notification_handler(
     title, notification_type, users_ids, project_id=None, task_id=None
 ):
diff --git a/backend/projects/tasks.py b/backend/projects/tasks.py
index 2796db67c..187447846 100644
--- a/backend/projects/tasks.py
+++ b/backend/projects/tasks.py
@@ -319,7 +319,7 @@ def filter_data_items(
 #### CELERY SHARED TASKS
 
 
-@shared_task
+@shared_task(queue="default")
 def create_parameters_for_task_creation(
     project_type,
     dataset_instance_ids,
@@ -384,7 +384,7 @@ def create_parameters_for_task_creation(
     tasks = create_tasks_from_dataitems(sampled_items, project)
 
 
-@shared_task
+@shared_task(queue="default")
 def export_project_in_place(
     annotation_fields, project_id, project_type, get_request_data
 ) -> None:
@@ -676,7 +676,7 @@ def export_project_in_place(
     return f"Exported {len(data_items)} items."
 
 
-@shared_task
+@shared_task(queue="default")
 def export_project_new_record(
     annotation_fields,
     project_id,
@@ -823,7 +823,7 @@ def export_project_new_record(
     tasks.update(task_status=EXPORTED)
 
 
-@shared_task
+@shared_task(queue="default")
 def add_new_data_items_into_project(project_id, items):
     """Function to pull the dataitems into the project
 
diff --git a/backend/shoonya_backend/celery.py b/backend/shoonya_backend/celery.py
index b75b761e3..e15d00cb9 100644
--- a/backend/shoonya_backend/celery.py
+++ b/backend/shoonya_backend/celery.py
@@ -23,6 +23,7 @@
 # Celery Queue related settings
 celery_app.conf.task_default_queue = "default"
 celery_app.conf.task_routes = {
+    "default.tasks.*": {"queue": "default"},
     "functions.tasks.*": {"queue": "functions"},
     "reports.tasks.*": {"queue": "reports"},
 }

From 5ccc43cc84feede30d03b434e899240476dea291 Mon Sep 17 00:00:00 2001
From: kartikvirendrar <kartikrajput357@gmail.com>
Date: Mon, 8 Jul 2024 17:07:11 +0530
Subject: [PATCH 34/44] changes for setting back parent result to
 revised/rejected task result

---
 backend/tasks/views.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/backend/tasks/views.py b/backend/tasks/views.py
index e7ee3a445..c1635e32f 100644
--- a/backend/tasks/views.py
+++ b/backend/tasks/views.py
@@ -2091,6 +2091,7 @@ def partial_update(self, request, pk=None):
                     parent.review_notes = annotation.review_notes
                     if review_status == TO_BE_REVISED:
                         parent.annotation_status = TO_BE_REVISED
+                        parent.result = annotation.result
                         task.task_status = INCOMPLETE
                         rev_loop_count = task.revision_loop_count
                         if not is_revised:
@@ -2109,7 +2110,7 @@ def partial_update(self, request, pk=None):
                                 supercheck_annotation.save()
                         except:
                             pass
-                    parent.save(update_fields=["review_notes", "annotation_status"])
+                    parent.save(update_fields=["review_notes", "annotation_status", "result"])
                     task.save()
 
                 if review_status in [
@@ -2285,6 +2286,7 @@ def partial_update(self, request, pk=None):
                     parent.supercheck_notes = annotation.supercheck_notes
                     if supercheck_status == REJECTED:
                         parent.annotation_status = REJECTED
+                        parent.result = annotation.result
                         task.task_status = ANNOTATED
                         rev_loop_count = task.revision_loop_count
                         if not is_rejected:
@@ -2294,7 +2296,7 @@ def partial_update(self, request, pk=None):
                         task.revision_loop_count = rev_loop_count
                     else:
                         task.task_status = SUPER_CHECKED
-                    parent.save(update_fields=["supercheck_notes", "annotation_status"])
+                    parent.save(update_fields=["supercheck_notes", "annotation_status", "result"])
                     task.save()
 
                 if supercheck_status in [

From 4bc843cb445f1e9565a3d939debcfd991e9ddcd9 Mon Sep 17 00:00:00 2001
From: kartikvirendrar <kartikrajput357@gmail.com>
Date: Mon, 8 Jul 2024 17:07:53 +0530
Subject: [PATCH 35/44] changes for setting back parent result to
 revised/rejected task result

---
 backend/tasks/views.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/backend/tasks/views.py b/backend/tasks/views.py
index c1635e32f..c3d9531b2 100644
--- a/backend/tasks/views.py
+++ b/backend/tasks/views.py
@@ -2110,7 +2110,9 @@ def partial_update(self, request, pk=None):
                                 supercheck_annotation.save()
                         except:
                             pass
-                    parent.save(update_fields=["review_notes", "annotation_status", "result"])
+                    parent.save(
+                        update_fields=["review_notes", "annotation_status", "result"]
+                    )
                     task.save()
 
                 if review_status in [
@@ -2296,7 +2298,13 @@ def partial_update(self, request, pk=None):
                         task.revision_loop_count = rev_loop_count
                     else:
                         task.task_status = SUPER_CHECKED
-                    parent.save(update_fields=["supercheck_notes", "annotation_status", "result"])
+                    parent.save(
+                        update_fields=[
+                            "supercheck_notes",
+                            "annotation_status",
+                            "result",
+                        ]
+                    )
                     task.save()
 
                 if supercheck_status in [

From 38a213375fcf00f8eed330a5643712c6d676b6f4 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Tue, 9 Jul 2024 05:39:16 +0000
Subject: [PATCH 36/44] substituted lang_choices in constants

---
 backend/functions/tasks.py |  3 ++-
 backend/projects/models.py |  2 +-
 backend/users/models.py    | 28 ++--------------------------
 backend/users/views.py     |  3 ++-
 backend/utils/constants.py | 26 ++++++++++++++++++++++++++
 5 files changed, 33 insertions(+), 29 deletions(-)
 create mode 100644 backend/utils/constants.py

diff --git a/backend/functions/tasks.py b/backend/functions/tasks.py
index c235ad1ca..2fb0767a2 100644
--- a/backend/functions/tasks.py
+++ b/backend/functions/tasks.py
@@ -29,7 +29,7 @@
     ANNOTATED,
 )
 from tasks.views import SentenceOperationViewSet
-from users.models import User, LANG_CHOICES
+from users.models import User
 from django.core.mail import EmailMessage
 
 from utils.blob_functions import (
@@ -56,6 +56,7 @@
 import tempfile
 
 from shoonya_backend.locks import Lock
+from utils.constants import LANG_CHOICES
 
 import logging
 
diff --git a/backend/projects/models.py b/backend/projects/models.py
index f7eb1d487..a63909368 100644
--- a/backend/projects/models.py
+++ b/backend/projects/models.py
@@ -7,7 +7,7 @@
 from .registry_helper import ProjectRegistry
 from django.utils.timezone import now
 from datetime import datetime, timedelta
-from users.models import LANG_CHOICES
+from utils.constants import LANG_CHOICES
 
 # from dataset import LANG_CHOICES
 
diff --git a/backend/users/models.py b/backend/users/models.py
index b93172f33..af0b9541a 100644
--- a/backend/users/models.py
+++ b/backend/users/models.py
@@ -27,34 +27,10 @@
 from .utils import hash_upload
 from .managers import UserManager
 from utils.email_template import send_email_template
+from utils.constants import LANG_CHOICES
 
 # List of Indic languages
-LANG_CHOICES = (
-    ("English", "English"),
-    ("Assamese", "Assamese"),
-    ("Bengali", "Bengali"),
-    ("Bodo", "Bodo"),
-    ("Dogri", "Dogri"),
-    ("Gujarati", "Gujarati"),
-    ("Hindi", "Hindi"),
-    ("Kannada", "Kannada"),
-    ("Kashmiri", "Kashmiri"),
-    ("Konkani", "Konkani"),
-    ("Maithili", "Maithili"),
-    ("Malayalam", "Malayalam"),
-    ("Manipuri", "Manipuri"),
-    ("Marathi", "Marathi"),
-    ("Nepali", "Nepali"),
-    ("Odia", "Odia"),
-    ("Punjabi", "Punjabi"),
-    ("Sanskrit", "Sanskrit"),
-    ("Santali", "Santali"),
-    ("Sindhi", "Sindhi"),
-    ("Sinhala", "Sinhala"),
-    ("Tamil", "Tamil"),
-    ("Telugu", "Telugu"),
-    ("Urdu", "Urdu"),
-)
+
 load_dotenv()
 # Create your models here.
 # class Language(models.Model):
diff --git a/backend/users/views.py b/backend/users/views.py
index d240fe1f3..912c3b9d2 100644
--- a/backend/users/views.py
+++ b/backend/users/views.py
@@ -29,7 +29,7 @@
 from organizations.models import Invite, Organization
 from organizations.serializers import InviteGenerationSerializer
 from organizations.decorators import is_organization_owner
-from users.models import LANG_CHOICES, User, CustomPeriodicTask
+from users.models import User, CustomPeriodicTask
 from rest_framework.decorators import action
 from utils.email_template import send_email_template
 from tasks.models import (
@@ -62,6 +62,7 @@
 from dotenv import load_dotenv
 import logging
 from workspaces.views import WorkspaceusersViewSet
+from utils.constants import LANG_CHOICES
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
diff --git a/backend/utils/constants.py b/backend/utils/constants.py
new file mode 100644
index 000000000..4b1434b28
--- /dev/null
+++ b/backend/utils/constants.py
@@ -0,0 +1,26 @@
+LANG_CHOICES = (
+    ("English", "English"),
+    ("Assamese", "Assamese"),
+    ("Bengali", "Bengali"),
+    ("Bodo", "Bodo"),
+    ("Dogri", "Dogri"),
+    ("Gujarati", "Gujarati"),
+    ("Hindi", "Hindi"),
+    ("Kannada", "Kannada"),
+    ("Kashmiri", "Kashmiri"),
+    ("Konkani", "Konkani"),
+    ("Maithili", "Maithili"),
+    ("Malayalam", "Malayalam"),
+    ("Manipuri", "Manipuri"),
+    ("Marathi", "Marathi"),
+    ("Nepali", "Nepali"),
+    ("Odia", "Odia"),
+    ("Punjabi", "Punjabi"),
+    ("Sanskrit", "Sanskrit"),
+    ("Santali", "Santali"),
+    ("Sindhi", "Sindhi"),
+    ("Sinhala", "Sinhala"),
+    ("Tamil", "Tamil"),
+    ("Telugu", "Telugu"),
+    ("Urdu", "Urdu"),
+)

From da3bb7d8e7d37aa703ca06be0fcfbfe397ff8d69 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Wed, 21 Aug 2024 03:35:35 +0000
Subject: [PATCH 37/44] added filtering for datasets

---
 backend/functions/tasks.py          | 109 ++++++++++++++++++++++++++--
 backend/functions/views.py          |  40 +++++++++-
 backend/projects/registry_helper.py |  12 +++
 3 files changed, 152 insertions(+), 9 deletions(-)

diff --git a/backend/functions/tasks.py b/backend/functions/tasks.py
index e52b02a84..614641df1 100644
--- a/backend/functions/tasks.py
+++ b/backend/functions/tasks.py
@@ -57,7 +57,10 @@
 
 from shoonya_backend.locks import Lock
 from utils.constants import LANG_CHOICES
-
+from projects.tasks import filter_data_items
+from projects.models import BATCH
+from dataset import models as dataset_models
+from projects.registry_helper import ProjectRegistry
 import logging
 
 logger = logging.getLogger(__name__)
@@ -73,6 +76,10 @@ def sentence_text_translate_and_save_translation_pairs(
     input_dataset_instance_id,
     output_dataset_instance_id,
     batch_size,
+    filter_string,
+    sampling_mode,
+    sampling_parameters,
+    variable_parameters,
     api_type="indic-trans-v2",
     checks_for_particular_languages=False,
     automate_missing_data_items=True,
@@ -88,6 +95,10 @@ def sentence_text_translate_and_save_translation_pairs(
             Allowed - [indic-trans, google, indic-trans-v2, azure, blank]
         checks_for_particular_languages (bool): If True, checks for the particular languages in the translations.
         automate_missing_data_items (bool): If True, consider only those data items that are missing in the target dataset instance.
+        filter_string (str): string to filter input data.
+        sampling_mode (str): can be batch or full.
+        sampling_parameters (json): is a json that contains, batch number and batch size
+
     """
     task_name = "sentence_text_translate_and_save_translation_pairs"
     output_sentences = list(
@@ -114,6 +125,14 @@ def sentence_text_translate_and_save_translation_pairs(
             "metadata_json",
         )
     )
+    if filter_string and sampling_mode and sampling_parameters:
+        input_sentences = get_filtered_items(
+            "SentenceText",
+            input_dataset_instance_id,
+            filter_string,
+            sampling_mode,
+            sampling_parameters,
+        )
 
     # Convert the input_sentences list into a dataframe
     input_sentences_complete_df = pd.DataFrame(
@@ -404,7 +423,15 @@ def conversation_data_machine_translation(
 
 @shared_task(bind=True)
 def generate_ocr_prediction_json(
-    self, dataset_instance_id, user_id, api_type, automate_missing_data_items
+    self,
+    dataset_instance_id,
+    user_id,
+    api_type,
+    automate_missing_data_items,
+    filter_string,
+    sampling_mode,
+    sampling_parameters,
+    variable_parameters,
 ):
     """Function to generate OCR prediction data and to save to the same data item.
     Args:
@@ -437,7 +464,14 @@ def generate_ocr_prediction_json(
         )
     except Exception as e:
         ocr_data_items = []
-
+    if filter_string and sampling_mode and sampling_parameters:
+        ocr_data_items = get_filtered_items(
+            "OCRDocument",
+            dataset_instance_id,
+            filter_string,
+            sampling_mode,
+            sampling_parameters,
+        )
     # converting the dataset_instance to pandas dataframe.
     ocr_data_items_df = pd.DataFrame(
         ocr_data_items,
@@ -556,7 +590,15 @@ def generate_ocr_prediction_json(
 
 @shared_task(bind=True)
 def generate_asr_prediction_json(
-    self, dataset_instance_id, user_id, api_type, automate_missing_data_items
+    self,
+    dataset_instance_id,
+    user_id,
+    api_type,
+    automate_missing_data_items,
+    filter_string,
+    sampling_mode,
+    sampling_parameters,
+    variable_parameters,
 ):
     """Function to generate ASR prediction data and to save to the same data item.
     Args:
@@ -590,7 +632,14 @@ def generate_asr_prediction_json(
         )
     except Exception as e:
         asr_data_items = []
-
+    if filter_string and sampling_mode and sampling_parameters:
+        asr_data_items = get_filtered_items(
+            "SpeechConversation",
+            dataset_instance_id,
+            filter_string,
+            sampling_mode,
+            sampling_parameters,
+        )
     # converting the dataset_instance to pandas dataframe.
     asr_data_items_df = pd.DataFrame(
         asr_data_items,
@@ -704,7 +753,16 @@ def generate_asr_prediction_json(
 
 
 @shared_task(bind=True)
-def populate_draft_data_json(self, pk, user_id, fields_list):
+def populate_draft_data_json(
+    self,
+    pk,
+    user_id,
+    fields_list,
+    filter_string,
+    sampling_mode,
+    sampling_parameters,
+    variable_parameters,
+):
     task_name = "populate_draft_data_json"
     try:
         dataset_instance = DatasetInstance.objects.get(pk=pk)
@@ -713,6 +771,10 @@ def populate_draft_data_json(self, pk, user_id, fields_list):
     dataset_type = dataset_instance.dataset_type
     dataset_model = apps.get_model("dataset", dataset_type)
     dataset_items = dataset_model.objects.filter(instance_id=dataset_instance)
+    if filter_string and sampling_mode and sampling_parameters:
+        dataset_items = get_filtered_items(
+            dataset_type, pk, filter_string, sampling_mode, sampling_parameters
+        )
     cnt = 0
     for dataset_item in dataset_items:
         new_draft_data_json = {}
@@ -1696,3 +1758,38 @@ def upload_all_projects_to_blob_and_get_url(csv_files_directory):
             return "Error in generating url"
         blob_url = f"https://{account_name}.blob.{endpoint_suffix}/{CONTAINER_NAME_FOR_DOWNLOAD_ALL_PROJECTS}/{blob_client.blob_name}?{sas_token}"
     return blob_url
+
+
+def get_filtered_items(
+    dataset_model,
+    dataset_instance_id,
+    filter_string,
+    sampling_mode,
+    sampling_parameters,
+):
+    registry_helper = ProjectRegistry.get_instance()
+    project_type = registry_helper.get_project_name_from_dataset(dataset_model)
+    if not isinstance(dataset_instance_id, list):
+        dataset_instance_id = [dataset_instance_id]
+    filtered_items = filter_data_items(
+        project_type=project_type,
+        dataset_instance_ids=dataset_instance_id,
+        filter_string=filter_string,
+    )
+    # Apply sampling
+    if sampling_mode == BATCH:
+        batch_size = sampling_parameters["batch_size"]
+        try:
+            batch_number = sampling_parameters["batch_number"]
+            if len(batch_number) == 0:
+                batch_number = [1]
+        except KeyError:
+            batch_number = [1]
+        sampled_items = []
+        for batch_num in batch_number:
+            sampled_items += filtered_items[
+                batch_size * (batch_num - 1) : batch_size * batch_num
+            ]
+    else:
+        sampled_items = filtered_items
+    return sampled_items
diff --git a/backend/functions/views.py b/backend/functions/views.py
index 09608665b..ccbc14434 100644
--- a/backend/functions/views.py
+++ b/backend/functions/views.py
@@ -274,6 +274,10 @@ def schedule_sentence_text_translate_job(request):
     automate_missing_data_items = request.data.get(
         "automate_missing_data_items", "true"
     )
+    filter_string = request.data.get("filter_string", None)
+    sampling_mode = request.data.get("sampling_mode", None)
+    sampling_parameters = request.data.get("sampling_parameters_json", None)
+    variable_parameters = request.data.get("variable_parameters", None)
 
     # Convert checks for languages into boolean
     checks_for_particular_languages = checks_for_particular_languages.lower() == "true"
@@ -311,6 +315,10 @@ def schedule_sentence_text_translate_job(request):
         input_dataset_instance_id=input_dataset_instance_id,
         output_dataset_instance_id=output_dataset_instance_id,
         batch_size=batch_size,
+        filter_string=filter_string,
+        sampling_mode=sampling_mode,
+        sampling_parameters=sampling_parameters,
+        variable_parameters=variable_parameters,
         api_type=api_type,
         checks_for_particular_languages=checks_for_particular_languages,
         automate_missing_data_items=automate_missing_data_items,
@@ -537,7 +545,10 @@ def schedule_ocr_prediction_json_population(request):
     except KeyError:
         automate_missing_data_items = True
 
-    # Calling a function asynchronously to create ocr predictions.
+    filter_string = request.data.get("filter_string")
+    sampling_mode = request.data.get("sampling_mode")
+    sampling_parameters = request.data.get("sampling_parameters_json")
+    variable_parameters = request.data.get("variable_parameters")
 
     uid = request.user.id
 
@@ -546,6 +557,10 @@ def schedule_ocr_prediction_json_population(request):
         user_id=uid,
         api_type=api_type,
         automate_missing_data_items=automate_missing_data_items,
+        filter_string=filter_string,
+        sampling_mode=sampling_mode,
+        sampling_parameters=sampling_parameters,
+        variable_parameters=variable_parameters,
     )
 
     # Returning response
@@ -574,8 +589,20 @@ def schedule_draft_data_json_population(request):
     pk = request.data["dataset_instance_id"]
 
     uid = request.user.id
+    filter_string = request.data.get("filter_string")
+    sampling_mode = request.data.get("sampling_mode")
+    sampling_parameters = request.data.get("sampling_parameters_json")
+    variable_parameters = request.data.get("variable_parameters")
 
-    populate_draft_data_json.delay(pk=pk, user_id=uid, fields_list=fields_list)
+    populate_draft_data_json(
+        pk=pk,
+        user_id=uid,
+        fields_list=fields_list,
+        filter_string=filter_string,
+        sampling_mode=sampling_mode,
+        sampling_parameters=sampling_parameters,
+        variable_parameters=variable_parameters,
+    )
 
     ret_dict = {"message": "draft_data_json population started"}
     ret_status = status.HTTP_200_OK
@@ -624,7 +651,10 @@ def schedule_asr_prediction_json_population(request):
     except KeyError:
         automate_missing_data_items = True
 
-    # Calling a function asynchronously to create ocr predictions.
+    filter_string = request.data.get("filter_string")
+    sampling_mode = request.data.get("sampling_mode")
+    sampling_parameters = request.data.get("sampling_parameters_json")
+    variable_parameters = request.data.get("variable_parameters")
 
     uid = request.user.id
 
@@ -633,6 +663,10 @@ def schedule_asr_prediction_json_population(request):
         user_id=uid,
         api_type=api_type,
         automate_missing_data_items=automate_missing_data_items,
+        filter_string=filter_string,
+        sampling_mode=sampling_mode,
+        sampling_parameters=sampling_parameters,
+        variable_parameters=variable_parameters,
     )
 
     ret_dict = {"message": "Generating ASR Predictions"}
diff --git a/backend/projects/registry_helper.py b/backend/projects/registry_helper.py
index ed1859e4c..3f8a5653a 100644
--- a/backend/projects/registry_helper.py
+++ b/backend/projects/registry_helper.py
@@ -253,3 +253,15 @@ def validate_registry(self):
                     )
 
         return True
+
+    def get_project_name_from_dataset(self, dataset_name: str):
+        for project_key, project_type in self.project_types.items():
+            input_dataset = project_type.get("input_dataset", {})
+            output_dataset = project_type.get("output_dataset", {})
+
+            if (
+                input_dataset.get("class") == dataset_name
+                or output_dataset.get("class") == dataset_name
+            ):
+                return project_key
+        return None

From c1030544cdf9f38f0456155879a95c21ca748a09 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Fri, 6 Sep 2024 10:26:11 +0000
Subject: [PATCH 38/44] sup_cumulative_tasks_count

---
 backend/organizations/views.py            | 52 ++++++++++++++++++-----
 backend/utils/filter_tasks_by_ann_type.py | 47 ++++++++++++++++++++
 backend/workspaces/views.py               | 52 ++++++++++++++++++-----
 3 files changed, 129 insertions(+), 22 deletions(-)
 create mode 100644 backend/utils/filter_tasks_by_ann_type.py

diff --git a/backend/organizations/views.py b/backend/organizations/views.py
index 3109cf083..219e79015 100644
--- a/backend/organizations/views.py
+++ b/backend/organizations/views.py
@@ -52,6 +52,7 @@
     send_project_analytics_mail_org,
     send_user_analytics_mail_org,
 )
+from utils.filter_tasks_by_ann_type import filter_tasks_by_ann_type
 
 
 def get_task_count(proj_ids, status, annotator, return_count=True):
@@ -2743,24 +2744,41 @@ def cumulative_tasks_count(self, request, pk=None):
             other_lang = []
             for lang in languages:
                 proj_lang_filter = proj_objs.filter(tgt_language=lang)
-                annotation_tasks_count = 0
-                reviewer_task_count = 0
+                annotation_tasks = Task.objects.filter(
+                    project_id__in=proj_lang_filter,
+                    task_status__in=[
+                        "annotated",
+                        "reviewed",
+                        "super_checked",
+                    ],
+                )
                 reviewer_tasks = Task.objects.filter(
                     project_id__in=proj_lang_filter,
                     project_id__project_stage__in=[REVIEW_STAGE, SUPERCHECK_STAGE],
-                    task_status__in=["reviewed", "exported", "super_checked"],
+                    task_status__in=["reviewed", "super_checked"],
                 )
-
-                annotation_tasks = Task.objects.filter(
+                supercheck_tasks = Task.objects.filter(
+                    project_id__in=proj_lang_filter,
+                    project_id__project_stage__in=[SUPERCHECK_STAGE],
+                    task_status__in=["super_checked"],
+                )
+                annotation_tasks_exported = Task.objects.filter(
                     project_id__in=proj_lang_filter,
+                    project_id__project_stage__in=[ANNOTATION_STAGE],
                     task_status__in=[
-                        "annotated",
-                        "reviewed",
                         "exported",
-                        "super_checked",
                     ],
                 )
-
+                reviewer_tasks_exported = Task.objects.filter(
+                    project_id__in=proj_lang_filter,
+                    project_id__project_stage__in=[REVIEW_STAGE],
+                    task_status__in=["exported"],
+                )
+                supercheck_tasks_exported = Task.objects.filter(
+                    project_id__in=proj_lang_filter,
+                    project_id__project_stage__in=[SUPERCHECK_STAGE],
+                    task_status__in=["exported"],
+                )
                 if metainfo == True:
                     result = {}
 
@@ -2975,14 +2993,23 @@ def cumulative_tasks_count(self, request, pk=None):
                         }
 
                 else:
-                    reviewer_task_count = reviewer_tasks.count()
+                    reviewer_task_count = (
+                        reviewer_tasks.count() + reviewer_tasks_exported.count()
+                    )
 
-                    annotation_tasks_count = annotation_tasks.count()
+                    annotation_tasks_count = (
+                        annotation_tasks.count() + annotation_tasks_exported.count()
+                    )
+
+                    supercheck_tasks_count = (
+                        supercheck_tasks.count() + supercheck_tasks_exported.count()
+                    )
 
                     result = {
                         "language": lang,
                         "ann_cumulative_tasks_count": annotation_tasks_count,
                         "rew_cumulative_tasks_count": reviewer_task_count,
+                        "sup_cumulative_tasks_count": supercheck_tasks_count,
                     }
 
                 if lang == None or lang == "":
@@ -2992,6 +3019,7 @@ def cumulative_tasks_count(self, request, pk=None):
 
             ann_task_count = 0
             rew_task_count = 0
+            sup_task_count = 0
             ann_word_count = 0
             rew_word_count = 0
             ann_aud_dur = 0
@@ -3006,6 +3034,7 @@ def cumulative_tasks_count(self, request, pk=None):
                 if metainfo != True:
                     ann_task_count += dat["ann_cumulative_tasks_count"]
                     rew_task_count += dat["rew_cumulative_tasks_count"]
+                    sup_task_count += dat["sup_cumulative_tasks_count"]
                 else:
                     if project_type in get_audio_project_types():
                         ann_aud_dur += convert_hours_to_seconds(
@@ -3048,6 +3077,7 @@ def cumulative_tasks_count(self, request, pk=None):
                         "language": "Others",
                         "ann_cumulative_tasks_count": ann_task_count,
                         "rew_cumulative_tasks_count": rew_task_count,
+                        "sup_cumulative_tasks_count": sup_task_count,
                     }
                 else:
                     if project_type in get_audio_project_types():
diff --git a/backend/utils/filter_tasks_by_ann_type.py b/backend/utils/filter_tasks_by_ann_type.py
new file mode 100644
index 000000000..3fb641293
--- /dev/null
+++ b/backend/utils/filter_tasks_by_ann_type.py
@@ -0,0 +1,47 @@
+from tasks.models import (
+    Annotation,
+    ANNOTATOR_ANNOTATION,
+    REVIEWER_ANNOTATION,
+    SUPER_CHECKER_ANNOTATION,
+    LABELED,
+    ACCEPTED,
+    ACCEPTED_WITH_MINOR_CHANGES,
+    ACCEPTED_WITH_MAJOR_CHANGES,
+    VALIDATED,
+    VALIDATED_WITH_CHANGES,
+)
+
+
+def filter_tasks_by_ann_type(annotation_tasks, reviewer_tasks, supercheck_tasks):
+    filtered_annotation_tasks, filtered_reviewer_tasks, filtered_supercheck_tasks = (
+        [],
+        [],
+        [],
+    )
+    for a in annotation_tasks:
+        anno = Annotation.objects.filter(
+            task=a, annotation_type=ANNOTATOR_ANNOTATION, annotation_status=LABELED
+        )[0]
+        if anno:
+            filtered_annotation_tasks.append(a)
+    for r in reviewer_tasks:
+        anno = Annotation.objects.filter(
+            task=r,
+            annotation_type=REVIEWER_ANNOTATION,
+            annotation_status__in=[
+                ACCEPTED,
+                ACCEPTED_WITH_MINOR_CHANGES,
+                ACCEPTED_WITH_MAJOR_CHANGES,
+            ],
+        )[0]
+        if anno:
+            filtered_reviewer_tasks.append(r)
+    for s in supercheck_tasks:
+        anno = Annotation.objects.filter(
+            task=s,
+            annotation_type=SUPER_CHECKER_ANNOTATION,
+            annotation_status__in=[VALIDATED, VALIDATED_WITH_CHANGES],
+        )[0]
+        if anno:
+            filtered_supercheck_tasks.append(s)
+    return filtered_annotation_tasks, filtered_reviewer_tasks, filtered_supercheck_tasks
diff --git a/backend/workspaces/views.py b/backend/workspaces/views.py
index e5cd136d9..5843a91d8 100644
--- a/backend/workspaces/views.py
+++ b/backend/workspaces/views.py
@@ -61,7 +61,7 @@
     get_review_reports,
     get_supercheck_reports,
 )
-
+from utils.filter_tasks_by_ann_type import filter_tasks_by_ann_type
 
 # Create your views here.
 
@@ -1404,23 +1404,41 @@ def cumulative_tasks_count_all(self, request, pk=None):
             other_lang = []
             for lang in languages:
                 proj_lang_filter = proj_objs.filter(tgt_language=lang)
-                annotation_tasks_count = 0
-                reviewer_task_count = 0
+                annotation_tasks = Task.objects.filter(
+                    project_id__in=proj_lang_filter,
+                    task_status__in=[
+                        "annotated",
+                        "reviewed",
+                        "super_checked",
+                    ],
+                )
                 reviewer_tasks = Task.objects.filter(
                     project_id__in=proj_lang_filter,
                     project_id__project_stage__in=[REVIEW_STAGE, SUPERCHECK_STAGE],
-                    task_status__in=["reviewed", "exported", "super_checked"],
+                    task_status__in=["reviewed", "super_checked"],
                 )
-
-                annotation_tasks = Task.objects.filter(
+                supercheck_tasks = Task.objects.filter(
+                    project_id__in=proj_lang_filter,
+                    project_id__project_stage__in=[SUPERCHECK_STAGE],
+                    task_status__in=["super_checked"],
+                )
+                annotation_tasks_exported = Task.objects.filter(
                     project_id__in=proj_lang_filter,
+                    project_id__project_stage__in=[ANNOTATION_STAGE],
                     task_status__in=[
-                        "annotated",
-                        "reviewed",
                         "exported",
-                        "super_checked",
                     ],
                 )
+                reviewer_tasks_exported = Task.objects.filter(
+                    project_id__in=proj_lang_filter,
+                    project_id__project_stage__in=[REVIEW_STAGE],
+                    task_status__in=["exported"],
+                )
+                supercheck_tasks_exported = Task.objects.filter(
+                    project_id__in=proj_lang_filter,
+                    project_id__project_stage__in=[SUPERCHECK_STAGE],
+                    task_status__in=["exported"],
+                )
 
                 if metainfo == True:
                     result = {}
@@ -1636,14 +1654,23 @@ def cumulative_tasks_count_all(self, request, pk=None):
                         }
 
                 else:
-                    reviewer_task_count = reviewer_tasks.count()
+                    reviewer_task_count = (
+                        reviewer_tasks.count() + reviewer_tasks_exported.count()
+                    )
 
-                    annotation_tasks_count = annotation_tasks.count()
+                    annotation_tasks_count = (
+                        annotation_tasks.count() + annotation_tasks_exported.count()
+                    )
+
+                    supercheck_tasks_count = (
+                        supercheck_tasks.count() + supercheck_tasks_exported.count()
+                    )
 
                     result = {
                         "language": lang,
                         "ann_cumulative_tasks_count": annotation_tasks_count,
                         "rew_cumulative_tasks_count": reviewer_task_count,
+                        "sup_cumulative_tasks_count": supercheck_tasks_count,
                     }
 
                 if lang == None or lang == "":
@@ -1653,6 +1680,7 @@ def cumulative_tasks_count_all(self, request, pk=None):
 
             ann_task_count = 0
             rew_task_count = 0
+            sup_task_count = 0
             ann_word_count = 0
             rew_word_count = 0
             ann_aud_dur = 0
@@ -1667,6 +1695,7 @@ def cumulative_tasks_count_all(self, request, pk=None):
                 if metainfo != True:
                     ann_task_count += dat["ann_cumulative_tasks_count"]
                     rew_task_count += dat["rew_cumulative_tasks_count"]
+                    sup_task_count += dat["sup_cumulative_tasks_count"]
                 else:
                     if project_type in get_audio_project_types():
                         ann_aud_dur += convert_hours_to_seconds(
@@ -1710,6 +1739,7 @@ def cumulative_tasks_count_all(self, request, pk=None):
                         "language": "Others",
                         "ann_cumulative_tasks_count": ann_task_count,
                         "rew_cumulative_tasks_count": rew_task_count,
+                        "sup_cumulative_tasks_count": sup_task_count,
                     }
                 else:
                     if project_type in get_audio_project_types():

From f851b15f56ea536dc6ade1ececfdb256e4eb0b79 Mon Sep 17 00:00:00 2001
From: Ishvinder Sethi <ishvindersethi22@gmail.com>
Date: Thu, 19 Sep 2024 18:11:27 +0530
Subject: [PATCH 39/44] Update .env.example

---
 .env.example | 56 +++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 12 deletions(-)

diff --git a/.env.example b/.env.example
index ea7e31252..da9c390d6 100644
--- a/.env.example
+++ b/.env.example
@@ -1,21 +1,53 @@
 SECRET_KEY='<-- YOUR SECRET KEY HERE -->'
 
-DB_NAME='postgres' # Insert your database name here
-DB_USER='postgres' # Insert your PostgreSQL username here
-DB_PASSWORD='password' #Insert your PostgreSQL password here.
-DB_HOST='db'
-DB_PORT='5432'
+DB_NAME='citus' # Insert your database name here
+DB_USER='citus' # Insert your PostgreSQL username here
+DB_PASSWORD='' #Insert your PostgreSQL password here.
+DB_HOST=''
 
-SMTP_USERNAME = ""
-SMTP_PASSWORD = ""
+API_URL=''
 
-API_URL='http://localhost:8000'
 
-LOGGING='false'
-LOG_LEVEL='INFO'
 
-ENV='dev'
+DB_PORT='5432'
 
-FRONTEND_URL=''
+ENV='dev'
+DEFAULT_FROM_EMAIL=""
+EMAIL_HOST=""
+SMTP_USERNAME=""
+SMTP_PASSWORD=""
 
 INDIC_TRANS_V2_KEY=''
+INDIC_TRANS_V2_URL=''
+
+LOGGING='true'
+LOG_LEVEL='WARNING'
+
+GOOGLE_APPLICATION_CREDENTIALS = ''
+
+FRONTEND_URL_FOR_RESET_PASSWORD = 'https://dev.shoonya.ai4bharat.org'
+SECRET_KEY_RESET_PASSWORD = ''
+
+ASR_DHRUVA_URL = ''
+ASR_DHRUVA_AUTHORIZATION = ''
+
+INDEX_NAME= 'django_logs_dev'
+ELASTICSEARCH_URL=''
+
+AZURE_CONNECTION_STRING = ''
+
+STORAGE_ACCOUNT_CONNECTION_STRING='
+
+CONTAINER_NAME_FOR_DOWNLOAD_ALL_PROJECTS=''
+
+LOGS_CONTAINER_NAME='logs'
+
+
+FLOWER_ADDRESS="localhost"
+FLOWER_PORT="5555"
+FLOWER_USERNAME="shoonya"
+FLOWER_PASSWORD="flower123"
+FRONTEND_URL=''
+CELERY_BROKER_URL="redis://redis:6379"
+REDIS_HOST="127.0.0.1"
+REDIS_PORT="6379"

From bac533f8a71ad80dce282d14b68a1e984a2de1a2 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Mon, 23 Sep 2024 08:08:29 +0000
Subject: [PATCH 40/44] added count fix

---
 backend/organizations/views.py | 9 +++++++--
 backend/workspaces/views.py    | 9 +++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/backend/organizations/views.py b/backend/organizations/views.py
index 219e79015..c8ce73c0e 100644
--- a/backend/organizations/views.py
+++ b/backend/organizations/views.py
@@ -2994,11 +2994,16 @@ def cumulative_tasks_count(self, request, pk=None):
 
                 else:
                     reviewer_task_count = (
-                        reviewer_tasks.count() + reviewer_tasks_exported.count()
+                        reviewer_tasks.count()
+                        + reviewer_tasks_exported.count()
+                        + supercheck_tasks_exported.count()
                     )
 
                     annotation_tasks_count = (
-                        annotation_tasks.count() + annotation_tasks_exported.count()
+                        annotation_tasks.count()
+                        + annotation_tasks_exported.count()
+                        + reviewer_tasks_exported.count()
+                        + supercheck_tasks_exported.count()
                     )
 
                     supercheck_tasks_count = (
diff --git a/backend/workspaces/views.py b/backend/workspaces/views.py
index 5843a91d8..367a0f93f 100644
--- a/backend/workspaces/views.py
+++ b/backend/workspaces/views.py
@@ -1655,11 +1655,16 @@ def cumulative_tasks_count_all(self, request, pk=None):
 
                 else:
                     reviewer_task_count = (
-                        reviewer_tasks.count() + reviewer_tasks_exported.count()
+                        reviewer_tasks.count()
+                        + reviewer_tasks_exported.count()
+                        + supercheck_tasks_exported.count()
                     )
 
                     annotation_tasks_count = (
-                        annotation_tasks.count() + annotation_tasks_exported.count()
+                        annotation_tasks.count()
+                        + annotation_tasks_exported.count()
+                        + reviewer_tasks_exported.count()
+                        + supercheck_tasks_exported.count()
                     )
 
                     supercheck_tasks_count = (

From 5a55d0111ecd3f062f5919729472cdd40aa357c9 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Mon, 23 Sep 2024 09:22:07 +0000
Subject: [PATCH 41/44] added changes for azure keys

---
 backend/shoonya_backend/log_transfer.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/backend/shoonya_backend/log_transfer.py b/backend/shoonya_backend/log_transfer.py
index c4d348b02..5fd80443e 100644
--- a/backend/shoonya_backend/log_transfer.py
+++ b/backend/shoonya_backend/log_transfer.py
@@ -19,14 +19,12 @@
 log_file_path = log_file_dir + log_file_name
 
 
-blob_service_client = BlobServiceClient.from_connection_string(
-    AZURE_STORAGE_CONNECTION_STRING
-)
-
-container_client = blob_service_client.get_container_client(CONTAINER_NAME)
-
-
 def get_most_recent_creation_date():
+    blob_service_client = BlobServiceClient.from_connection_string(
+        AZURE_STORAGE_CONNECTION_STRING
+    )
+
+    container_client = blob_service_client.get_container_client(CONTAINER_NAME)
     blobs = list(container_client.list_blobs())
     if not blobs:
         return None
@@ -68,6 +66,11 @@ def rotate_logs():
         zip_log_file(zip_file_name)
 
         zip_file_path_on_disk = os.path.join(log_file_dir, zip_file_name)
+        blob_service_client = BlobServiceClient.from_connection_string(
+            AZURE_STORAGE_CONNECTION_STRING
+        )
+
+        container_client = blob_service_client.get_container_client(CONTAINER_NAME)
 
         blob_client = container_client.get_blob_client(zip_file_name)
         with open(zip_file_path_on_disk, "rb") as file:

From 8443826d40c567a5ac750fcbfa1bd058d2ca3d1d Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Mon, 23 Sep 2024 09:39:27 +0000
Subject: [PATCH 42/44] resolved key error

---
 backend/shoonya_backend/log_transfer.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/backend/shoonya_backend/log_transfer.py b/backend/shoonya_backend/log_transfer.py
index c4d348b02..5fd80443e 100644
--- a/backend/shoonya_backend/log_transfer.py
+++ b/backend/shoonya_backend/log_transfer.py
@@ -19,14 +19,12 @@
 log_file_path = log_file_dir + log_file_name
 
 
-blob_service_client = BlobServiceClient.from_connection_string(
-    AZURE_STORAGE_CONNECTION_STRING
-)
-
-container_client = blob_service_client.get_container_client(CONTAINER_NAME)
-
-
 def get_most_recent_creation_date():
+    blob_service_client = BlobServiceClient.from_connection_string(
+        AZURE_STORAGE_CONNECTION_STRING
+    )
+
+    container_client = blob_service_client.get_container_client(CONTAINER_NAME)
     blobs = list(container_client.list_blobs())
     if not blobs:
         return None
@@ -68,6 +66,11 @@ def rotate_logs():
         zip_log_file(zip_file_name)
 
         zip_file_path_on_disk = os.path.join(log_file_dir, zip_file_name)
+        blob_service_client = BlobServiceClient.from_connection_string(
+            AZURE_STORAGE_CONNECTION_STRING
+        )
+
+        container_client = blob_service_client.get_container_client(CONTAINER_NAME)
 
         blob_client = container_client.get_blob_client(zip_file_name)
         with open(zip_file_path_on_disk, "rb") as file:

From 748a6ef1db5bf2178b7ef87f365c328580504fa6 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Fri, 25 Oct 2024 05:04:18 +0000
Subject: [PATCH 43/44] minor_fix in acoustic_enabled_stage

---
 backend/tasks/views.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/backend/tasks/views.py b/backend/tasks/views.py
index 991c4f141..c0514a13e 100644
--- a/backend/tasks/views.py
+++ b/backend/tasks/views.py
@@ -1801,6 +1801,8 @@ def partial_update(self, request, pk=None):
                         is_acoustic_project_type or is_StandardizedTranscriptionEditing,
                         (
                             is_acoustic_project_type
+                            and "acoustic_enabled_stage"
+                            in annotation_obj.task.project_id.metadata_json
                             and annotation_obj.task.project_id.metadata_json[
                                 "acoustic_enabled_stage"
                             ]
@@ -1863,6 +1865,8 @@ def partial_update(self, request, pk=None):
                         is_acoustic_project_type or is_StandardizedTranscriptionEditing,
                         (
                             is_acoustic_project_type
+                            and "acoustic_enabled_stage"
+                            in annotation_obj.task.project_id.metadata_json
                             and annotation_obj.task.project_id.metadata_json[
                                 "acoustic_enabled_stage"
                             ]
@@ -1942,6 +1946,8 @@ def partial_update(self, request, pk=None):
                         is_acoustic_project_type or is_StandardizedTranscriptionEditing,
                         (
                             is_acoustic_project_type
+                            and "acoustic_enabled_stage"
+                            in annotation_obj.task.project_id.metadata_json
                             and annotation_obj.task.project_id.metadata_json[
                                 "acoustic_enabled_stage"
                             ]
@@ -2043,6 +2049,8 @@ def partial_update(self, request, pk=None):
                         is_acoustic_project_type or is_StandardizedTranscriptionEditing,
                         (
                             is_acoustic_project_type
+                            and "acoustic_enabled_stage"
+                            in annotation_obj.task.project_id.metadata_json
                             and annotation_obj.task.project_id.metadata_json[
                                 "acoustic_enabled_stage"
                             ]
@@ -2152,6 +2160,8 @@ def partial_update(self, request, pk=None):
                         is_acoustic_project_type or is_StandardizedTranscriptionEditing,
                         (
                             is_acoustic_project_type
+                            and "acoustic_enabled_stage"
+                            in annotation_obj.task.project_id.metadata_json
                             and annotation_obj.task.project_id.metadata_json[
                                 "acoustic_enabled_stage"
                             ]
@@ -2244,6 +2254,8 @@ def partial_update(self, request, pk=None):
                         is_acoustic_project_type or is_StandardizedTranscriptionEditing,
                         (
                             is_acoustic_project_type
+                            and "acoustic_enabled_stage"
+                            in annotation_obj.task.project_id.metadata_json
                             and annotation_obj.task.project_id.metadata_json[
                                 "acoustic_enabled_stage"
                             ]

From f33852befc3c63a7cef907d7aa07a71a6c671dc1 Mon Sep 17 00:00:00 2001
From: munishmangla98 <munishmangla98@gmail.com>
Date: Tue, 3 Jun 2025 12:12:32 +0530
Subject: [PATCH 44/44] bulk_add_members_to_projects

---
 backend/shoonya_backend/settings.py |  1 +
 backend/workspaces/views.py         | 73 +++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+)

diff --git a/backend/shoonya_backend/settings.py b/backend/shoonya_backend/settings.py
index 2915ce894..673da5ba6 100644
--- a/backend/shoonya_backend/settings.py
+++ b/backend/shoonya_backend/settings.py
@@ -47,6 +47,7 @@
 # Application definition
 
 INSTALLED_APPS = [
+    "django_extensions",
     "django.contrib.admin",
     "django.contrib.auth",
     "django.contrib.contenttypes",
diff --git a/backend/workspaces/views.py b/backend/workspaces/views.py
index 367a0f93f..f608da3d0 100644
--- a/backend/workspaces/views.py
+++ b/backend/workspaces/views.py
@@ -3594,3 +3594,76 @@ def loggedin_user_workspaces(self, request):
         workspaces = Workspace.objects.filter(members__in=[request.user.pk])
         workspaces_serializer = WorkspaceNameSerializer(workspaces, many=True)
         return Response(workspaces_serializer.data)
+    
+    @action(
+        detail=True,
+        methods=["POST"],
+        name="Bulk add Members to Projects",
+        url_name="bulk_add_members_to_projects",
+        )
+
+    @is_particular_organization_owner
+    def bulk_add_members_to_projects(self, request, pk=None, *args, **kwargs):
+        """
+        Add users based on role in the project.
+        """
+        user_emails = request.data.get("user_emails", [])
+        project_ids = request.data.get("project_ids", [])
+        role = request.data.get("user_role", None)  # default to none
+        if not isinstance(user_emails, list) or not isinstance(project_ids, list):
+            return Response(
+                {"message": "user_emails and project_ids must be lists."},
+                status=status.HTTP_400_BAD_REQUEST,
+            )
+        if role not in ["annotator", "reviewer", "super_checker"]:
+            return Response(
+                {"message": "Invalid role. Must be annotator or reviewer or super_checker."},
+                status=status.HTTP_400_BAD_REQUEST,
+            )
+        valid_users = []
+        invalid_user_emails = []
+        for email in user_emails:
+            try:
+                user = User.objects.get(email=email)
+                valid_users.append(user)
+            except User.DoesNotExist:
+                invalid_user_emails.append(email)
+        valid_projects = []
+        invalid_project_ids = []
+        excepted_additions = []
+        for pid in project_ids:
+            try:
+                project = Project.objects.get(pk=pid)
+                valid_projects.append(project)
+            except Project.DoesNotExist:
+                invalid_project_ids.append(pid)
+        for project in valid_projects:
+            for user in valid_users:
+                if user not in project.workspace_id.members.all():
+                    project.workspace_id.members.add(user)
+                    project.workspace_id.save()
+                if role == "annotator":
+                    if user in project.annotators.all():
+                        excepted_additions.append(user.email)
+                    else:
+                        project.annotators.add(user)
+                elif role == "reviewer":
+                    if user in project.annotation_reviewers.all():
+                        excepted_additions.append(user.email)
+                    else:
+                        project.annotation_reviewers.add(user)
+                elif role == "super_checker":
+                    if user in project.review_supercheckers.all():
+                        excepted_additions.append(user.email)
+                    else:
+                        project.review_supercheckers.add(user)
+                project.save()
+        message = "Users added to projects successfully."
+        if excepted_additions != []:
+            message += f'Following users were not yet added: {excepted_additions}'
+        return Response(
+            {
+                "message": message,
+            },
+            status=status.HTTP_200_OK,
+        )
\ No newline at end of file