Skip to content

Commit 3180651

Browse files
committed
remove necessity for file cache
1 parent 7c2e88e commit 3180651

File tree

2 files changed

+20
-15
lines changed

2 files changed

+20
-15
lines changed

run_asvspoof_generation.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,42 @@
22

33
import librosa
44
import soundfile as sf
5-
import torch
65
from tqdm import tqdm
76

87
from InferenceInterfaces.ToucanTTSInterface import ToucanTTSInterface
98
from Utility.utils import float2pcm
109

10+
PATH_TO_MLS_ENGLISH_TRAIN = "/mount/resources/speech/corpora/MultiLingLibriSpeech/mls_english/train"
1111
PATH_TO_GENERATION_FILE = "p1_ttsvc_surrogate.tsv"
1212
PATH_TO_OUTPUT_DIR = "asv_spoof_outputs_no_pros"
1313
DEVICE = "cuda"
1414

15+
16+
def build_path_to_transcript_dict_mls_english():
17+
path_to_transcript = dict()
18+
with open(os.path.join(PATH_TO_MLS_ENGLISH_TRAIN, "transcripts.txt"), "r", encoding="utf8") as file:
19+
lookup = file.read()
20+
for line in lookup.split("\n"):
21+
if line.strip() != "":
22+
fields = line.split("\t")
23+
wav_folders = fields[0].split("_")
24+
wav_path = f"{PATH_TO_MLS_ENGLISH_TRAIN}/audio/{wav_folders[0]}/{wav_folders[1]}/{fields[0]}.flac"
25+
path_to_transcript[wav_path] = fields[1]
26+
return path_to_transcript
27+
28+
1529
if __name__ == '__main__':
30+
print("loading model...")
1631
tts = ToucanTTSInterface(device=DEVICE, tts_model_path="ASVSpoof")
17-
path_to_transcript_dict = torch.load("mls_transcript_cache.pt")
32+
print("prepare path to transcript lookup...")
33+
path_to_transcript_dict = build_path_to_transcript_dict_mls_english()
1834
filename_to_path = dict()
1935
for p in path_to_transcript_dict:
2036
filename_to_path[p.split("/")[-1].rstrip(".flac")] = p
2137
with open(PATH_TO_GENERATION_FILE, "r") as file:
2238
generation_list = file.read().split("\n")
2339
os.makedirs(PATH_TO_OUTPUT_DIR, exist_ok=True)
24-
40+
print("generating audios...")
2541
for generation_item in tqdm(generation_list):
2642
if generation_item == "":
2743
continue
@@ -34,7 +50,4 @@
3450
tts.set_utterance_embedding(path_to_reference_audio=source_list)
3551
cloned_utterance = tts(transcript)
3652
resampled_utt = librosa.resample(cloned_utterance, orig_sr=24000, target_sr=16000)
37-
sf.write(file=f"{PATH_TO_OUTPUT_DIR}/" + output_name + ".flac",
38-
data=float2pcm(resampled_utt),
39-
samplerate=16000,
40-
subtype="PCM_16")
53+
sf.write(file=f"{PATH_TO_OUTPUT_DIR}/" + output_name + ".flac", data=float2pcm(resampled_utt), samplerate=16000, subtype="PCM_16")

run_model_downloader.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,6 @@ def download_models():
4747
filename=os.path.abspath(os.path.join(MODELS_DIR, "Embedding", "embedding_function.pt")),
4848
reporthook=report)
4949

50-
#############
51-
print("Downloading Cleaned MLS Texts")
52-
os.makedirs(os.path.join(MODELS_DIR, "Embedding"), exist_ok=True)
53-
filename, headers = urllib.request.urlretrieve(
54-
url="https://github.com/DigitalPhonetics/IMS-Toucan/releases/download/v2.asvspoof/mls_transcript_cache.pt",
55-
filename="mls_transcript_cache.pt",
56-
reporthook=report)
57-
5850
#############
5951
print("Downloading Codec Model")
6052
filename, headers = urllib.request.urlretrieve(

0 commit comments

Comments
 (0)