Skip to content

Commit 0ff4e30

Browse files
committed
Encoding bugresolved for audio
1 parent 62e94bb commit 0ff4e30

File tree

2 files changed

+25
-12
lines changed

2 files changed

+25
-12
lines changed

cvat/apps/engine/media_extractors.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import itertools
1111
import struct
1212
from enum import IntEnum
13+
import chardet
1314
from abc import ABC, abstractmethod
1415
from contextlib import closing
1516
from typing import Iterable
@@ -516,6 +517,15 @@ def get_total_frames(self):
516517

517518
return total_frame
518519

520+
def get_file_encoding(self, file_path):
521+
522+
with open(file_path, 'rb') as f:
523+
rawdata = f.read(1024)
524+
result = chardet.detect(rawdata)
525+
encoding = result['encoding']
526+
527+
return encoding
528+
519529
def __iter__(self):
520530
with self._get_av_container() as container:
521531
stream = container.streams.audio[0]
@@ -534,7 +544,9 @@ def get_progress(self, pos):
534544
def _get_av_container(self):
535545
if isinstance(self._source_path[0], io.BytesIO):
536546
self._source_path[0].seek(0) # required for re-reading
537-
return av.open(self._source_path[0])
547+
548+
encoding = self.get_file_encoding(self._source_path[0])
549+
return av.open(self._source_path[0], metadata_encoding = encoding)
538550

539551
def _get_duration(self):
540552
with self._get_av_container() as container:

cvat/apps/engine/task.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -956,21 +956,21 @@ def update_progress(progress):
956956
compressed_chunk_writer = compressed_chunk_writer_class(db_data.image_quality, **kwargs)
957957
original_chunk_writer = original_chunk_writer_class(original_quality, **kwargs)
958958

959-
# def get_file_encoding(file_path):
960-
# import chardet
959+
def get_file_encoding(file_path):
960+
import chardet
961961

962-
# with open(file_path, 'rb') as f:
963-
# rawdata = f.read(1024)
964-
# result = chardet.detect(rawdata)
965-
# encoding = result['encoding']
962+
with open(file_path, 'rb') as f:
963+
rawdata = f.read(1024)
964+
result = chardet.detect(rawdata)
965+
encoding = result['encoding']
966966

967-
# return encoding
967+
return encoding
968968
def get_audio_duration(file_path):
969-
# encoding=get_file_encoding(file_path)
970-
# slogger.glob.debug("ENCODING")
971-
# slogger.glob.debug(encoding)
969+
encoding=get_file_encoding(file_path)
970+
slogger.glob.debug("ENCODING")
971+
slogger.glob.debug(encoding)
972972
# Open the audio file
973-
container = av.open(file_path)
973+
container = av.open(file_path, metadata_encoding=encoding)
974974

975975
# Get the first audio stream
976976
audio_stream = next((stream for stream in container.streams if stream.codec.type == 'audio'), None)
@@ -1000,6 +1000,7 @@ def get_audio_duration(file_path):
10001000

10011001
segment_duration = db_task.segment_duration
10021002
db_task.data.audio_total_duration = get_audio_duration(details['source_path'][0])
1003+
# db_task.data.audio_total_duration = 720000 #get_audio_duration(details['source_path'][0])
10031004
total_audio_frames = extractor.get_total_frames()
10041005

10051006
slogger.glob.debug("TOTAL AUDIO DURATION")

0 commit comments

Comments
 (0)