Encoding bugresolved for audio

kushalpoddar · kushalpoddar · commit 0ff4e3073762 · 2024-05-30T18:15:00.000+05:30
diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py
@@ -10,6 +10,7 @@
 import itertools
 import struct
 from enum import IntEnum
+import chardet
 from abc import ABC, abstractmethod
 from contextlib import closing
 from typing import Iterable
@@ -516,6 +517,15 @@ def get_total_frames(self):
 
         return total_frame
 
+    def get_file_encoding(self, file_path):
+
+        with open(file_path, 'rb') as f:
+            rawdata = f.read(1024)
+        result = chardet.detect(rawdata)
+        encoding = result['encoding']
+
+        return encoding
+
     def __iter__(self):
         with self._get_av_container() as container:
             stream = container.streams.audio[0]
@@ -534,7 +544,9 @@ def get_progress(self, pos):
     def _get_av_container(self):
         if isinstance(self._source_path[0], io.BytesIO):
             self._source_path[0].seek(0) # required for re-reading
-        return av.open(self._source_path[0])
+
+        encoding = self.get_file_encoding(self._source_path[0])
+        return av.open(self._source_path[0], metadata_encoding = encoding)
 
     def _get_duration(self):
         with self._get_av_container() as container:
diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py
@@ -956,21 +956,21 @@ def update_progress(progress):
     compressed_chunk_writer = compressed_chunk_writer_class(db_data.image_quality, **kwargs)
     original_chunk_writer = original_chunk_writer_class(original_quality, **kwargs)
 
-    # def get_file_encoding(file_path):
-    #     import chardet
+    def get_file_encoding(file_path):
+        import chardet
 
-    #     with open(file_path, 'rb') as f:
-    #         rawdata = f.read(1024)
-    #     result = chardet.detect(rawdata)
-    #     encoding = result['encoding']
+        with open(file_path, 'rb') as f:
+            rawdata = f.read(1024)
+        result = chardet.detect(rawdata)
+        encoding = result['encoding']
 
-    #     return encoding
+        return encoding
     def get_audio_duration(file_path):
-        # encoding=get_file_encoding(file_path)
-        # slogger.glob.debug("ENCODING")
-        # slogger.glob.debug(encoding)
+        encoding=get_file_encoding(file_path)
+        slogger.glob.debug("ENCODING")
+        slogger.glob.debug(encoding)
         # Open the audio file
-        container = av.open(file_path)
+        container = av.open(file_path, metadata_encoding=encoding)
 
         # Get the first audio stream
         audio_stream = next((stream for stream in container.streams if stream.codec.type == 'audio'), None)
@@ -1000,6 +1000,7 @@ def get_audio_duration(file_path):
 
         segment_duration = db_task.segment_duration
         db_task.data.audio_total_duration = get_audio_duration(details['source_path'][0])
+        # db_task.data.audio_total_duration = 720000 #get_audio_duration(details['source_path'][0])
         total_audio_frames = extractor.get_total_frames()
 
         slogger.glob.debug("TOTAL AUDIO DURATION")