@@ -59,6 +59,18 @@ def split_dataset(items, eval_split_max_size=None, eval_split_size=0.01):
5959 return items [:eval_split_size ], items [eval_split_size :]
6060
6161
62+ def add_extra_keys (metadata , language , dataset_name ):
63+ for item in metadata :
64+ # add language name
65+ item ["language" ] = language
66+ # add unique audio name
67+ relfilepath = os .path .splitext (item ["audio_file" ].replace (item ["root_path" ], "" ))[0 ]
68+ audio_unique_name = f"{ dataset_name } #{ relfilepath } "
69+ item ["audio_unique_name" ] = audio_unique_name
70+
71+ return metadata
72+
73+
6274def load_tts_samples (
6375 datasets : Union [List [Dict ], Dict ],
6476 eval_split = True ,
@@ -111,15 +123,15 @@ def load_tts_samples(
111123 # load train set
112124 meta_data_train = formatter (root_path , meta_file_train , ignored_speakers = ignored_speakers )
113125 assert len (meta_data_train ) > 0 , f" [!] No training samples found in { root_path } /{ meta_file_train } "
114- meta_data_train = [{** item , ** {"language" : language , "dataset_name" : dataset_name }} for item in meta_data_train ]
126+
127+ meta_data_train = add_extra_keys (meta_data_train , language , dataset_name )
128+
115129 print (f" | > Found { len (meta_data_train )} files in { Path (root_path ).resolve ()} " )
116130 # load evaluation split if set
117131 if eval_split :
118132 if meta_file_val :
119133 meta_data_eval = formatter (root_path , meta_file_val , ignored_speakers = ignored_speakers )
120- meta_data_eval = [
121- {** item , ** {"language" : language , "dataset_name" : dataset_name }} for item in meta_data_eval
122- ]
134+ meta_data_eval = add_extra_keys (meta_data_eval , language , dataset_name )
123135 else :
124136 meta_data_eval , meta_data_train = split_dataset (meta_data_train , eval_split_max_size , eval_split_size )
125137 meta_data_eval_all += meta_data_eval
0 commit comments