diff --git a/.gitignore b/.gitignore index 70c097cf505..66f184b05bc 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ training_set2/ training_set2_onlyrealrir/ training_set4/ training_set5/ +pdns_training_set/ +personalized_dev_testset/ logs/ test_set2/ training_set_sept11/ diff --git a/download-dns-challenge-4-pdns.sh b/download-dns-challenge-4-pdns.sh index a346a471066..43ca3125f3f 100644 --- a/download-dns-challenge-4-pdns.sh +++ b/download-dns-challenge-4-pdns.sh @@ -36,56 +36,56 @@ BLOB_NAMES=( - pdns_training_set/raw/pdns_training_set.raw.clean.english_000.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_001.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_002.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_003.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_004.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_005.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_006.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_007.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_008.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_009.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_010.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_011.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_012.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_013.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_014.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_015.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_016.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_017.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_018.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_019.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.english_020.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.french_000.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.german_000.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.german_001.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.german_002.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.german_003.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.german_004.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.german_005.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.german_006.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.german_007.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.german_008.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_000.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_001.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_002.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_003.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_004.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_005.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_006.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_007.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_008.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_009.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_010.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_011.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_012.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_013.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_014.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_015.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_016.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_017.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_018.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_019.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.english_020.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.french_000.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.german_000.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.german_001.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.german_002.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.german_003.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.german_004.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.german_005.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.german_006.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.german_007.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.german_008.tar.bz2 pdns_training_set/raw/pdns_training_set.raw.clean.italian_000.tar.bz2 pdns_training_set/raw/pdns_training_set.raw.clean.italian_001.tar.bz2 pdns_training_set/raw/pdns_training_set.raw.clean.italian_002.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.russian_000.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.spanish_000.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.spanish_001.tar.bz2 - pdns_training_set/raw/pdns_training_set.raw.clean.spanish_002.tar.bz2 - - pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.english_000.tar.bz2 - pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.english_001.tar.bz2 - pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.english_002.tar.bz2 - pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.english_003.tar.bz2 - pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.english_004.tar.bz2 - pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.french_000.tar.bz2 - pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.german_000.tar.bz2 - pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.german_001.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.russian_000.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.spanish_000.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.spanish_001.tar.bz2 + # pdns_training_set/raw/pdns_training_set.raw.clean.spanish_002.tar.bz2 + + # pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.english_000.tar.bz2 + # pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.english_001.tar.bz2 + # pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.english_002.tar.bz2 + # pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.english_003.tar.bz2 + # pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.english_004.tar.bz2 + # pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.french_000.tar.bz2 + # pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.german_000.tar.bz2 + # pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.german_001.tar.bz2 pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.italian_000.tar.bz2 - pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.russian_000.tar.bz2 - pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.spanish_000.tar.bz2 + # pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.russian_000.tar.bz2 + # pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.spanish_000.tar.bz2 pdns_training_set/pdns_training_set.enrollment_embeddings_000.tar.bz2 @@ -100,7 +100,7 @@ BLOB_NAMES=( datasets_fullband/noise_fullband/datasets_fullband.noise_fullband.freesound_000.tar.bz2 datasets_fullband/noise_fullband/datasets_fullband.noise_fullband.freesound_001.tar.bz2 - datasets_fullband/datasets_fullband.impulse_responses_000.tar.bz2 + # datasets_fullband/datasets_fullband.impulse_responses_000.tar.bz2 personalized_dev_testset/personalized_dev_testset.enrollment.tar.bz2 personalized_dev_testset/personalized_dev_testset.noisy_testclips.tar.bz2 @@ -130,5 +130,5 @@ do # wget "$URL" -O "$OUTPUT_PATH/$BLOB" # Same, + unpack files on the fly - # curl "$URL" | tar -C "$OUTPUT_PATH" -f - -x -j + curl "$URL" | tar -C "$OUTPUT_PATH" -f - -x -j done diff --git a/pdns_noisyspeech_synthesizer_singleprocess.py b/pdns_noisyspeech_synthesizer_singleprocess.py index dc030f9990f..19ef2813ea4 100644 --- a/pdns_noisyspeech_synthesizer_singleprocess.py +++ b/pdns_noisyspeech_synthesizer_singleprocess.py @@ -573,20 +573,15 @@ def main_body(): if cfg['noise_dir'] != 'None': noise_dir = cfg['noise_dir'] - if not os.path.exists(noise_dir): - assert False, ('Clean speech data is required') - + if not os.path.exists: + assert False, ('Noise data is required') + print(clean_dir) print(clean_dir2) print(noise_dir) print(spkid_csv) print(rir_dir) - - if cfg['noise_dir'] != 'None': - noise_dir = cfg['noise_dir'] - if not os.path.exists: - assert False, ('Noise data is required') - + params['fs'] = int(cfg['sampling_rate']) params['audioformat'] = cfg['audioformat'] params['audio_length'] = float(cfg['audio_length']) diff --git a/pdns_synthesizer_icassp2022.cfg b/pdns_synthesizer_icassp2022.cfg new file mode 100644 index 00000000000..cebe1da69a7 --- /dev/null +++ b/pdns_synthesizer_icassp2022.cfg @@ -0,0 +1,137 @@ +# Configuration for generating Noisy Speech Dataset + +# - sampling_rate: Specify the sampling rate. Default is 16 kHz +# - audioformat: default is .wav +# - audio_length: Minimum Length of each audio clip (noisy and clean speech) in seconds that will be generated by augmenting utterances. +# - silence_length: Duration of silence introduced between clean speech utterances. +# - total_hours: Total number of hours of data required. Units are in hours. +# - snr_lower: Lower bound for SNR required (default: 0 dB) +# - snr_upper: Upper bound for SNR required (default: 40 dB) +# - target_level_lower: Lower bound for the target audio level before audiowrite (default: -35 dB) +# - target_level_upper: Upper bound for the target audio level before audiowrite (default: -15 dB) +# - total_snrlevels: Number of SNR levels required (default: 5, which means there are 5 levels between snr_lower and snr_upper) +# - clean_activity_threshold: Activity threshold for clean speech +# - noise_activity_threshold: Activity threshold for noise +# - fileindex_start: Starting file ID that will be used in filenames +# - fileindex_end: Last file ID that will be used in filenames +# - is_test_set: Set it to True if it is the test set, else False for the training set +# - noise_dir: Specify the directory path to all noise files +# - Speech_dir: Specify the directory path to all clean speech files +# - noisy_destination: Specify path to the destination directory to store noisy speech +# - clean_destination: Specify path to the destination directory to store clean speech +# - noise_destination: Specify path to the destination directory to store noise speech +# - log_dir: Specify path to the directory to store all the log files + +# Configuration for unit tests +# - snr_test: Set to True if SNR test is required, else False +# - norm_test: Set to True if Normalization test is required, else False +# - sampling_rate_test: Set to True if Sampling Rate test is required, else False +# - clipping_test: Set to True if Clipping test is required, else False +# - unit_tests_log_dir: Specify path to the directory where you want to store logs + +[noisy_speech] + +sampling_rate: 48000 +audioformat: *.wav +audio_length: 30 +# 15, 12, 30 +silence_length: 0.2 +total_hours: 1000 +# 1000 +#200 +# 2.5, 500, 100 +snr_lower: -5 +#-5, 0 +snr_upper: 20 +# 25, 40 +randomize_snr: True +target_level_lower: -35 +target_level_upper: -15 +total_snrlevels: 31 +# 5 +clean_activity_threshold: 0.0 +noise_activity_threshold: 0.2 +fileindex_start: None +fileindex_end: None +is_test_set: False +# True, False + +noise_dir: /mnt/f/4th_DNSChallenge/ICASSP_2022/DNS-Challenge/datasets/noise +#/mnt/f/4th_DNSChallenge/INTERSPEECH_2021/DNS-Challenge/datasets_fullband/noise +#F:\4th_DNSChallenge\INTERSPEECH_2021\DNS-Challenge\datasets_fullband\noise +#datasets\pdns_training_set\noise +#\test_set2\Test_Noise +# datasets\noise +# \datasets\noise + +speech_dir: /mnt/f/4th_DNSChallenge/ICASSP_2022/DNS-Challenge/datasets/clean +# D:\kanhawin_git\primary_speakers_VCTK_16k_for_synthesizer +# datasets\test_set2\Singing_Voice\wav_16k +# dir with secondary speaker clean speech +speech_dir2: /mnt/f/4th_DNSChallenge/ICASSP_2022/DNS-Challenge/datasets/clean +#D:\kanhawin_git\secondary_speakers_voxCeleb2_16k +# datasets\test_set2\Singing_Voice\wav_16k + +spkid_csv: /mnt/f/4th_DNSChallenge/ICASSP_2022/DNS-Challenge/datasets/filelists/complete_ps_split.csv +#/mnt/f/4th_DNSChallenge/ICASSP_2022/DNS-Challenge/datasets/filelists/vctk_spkid.csv +# datasets\clean +noise_types_excluded: None + +rir_dir: /mnt/f/4th_DNSChallenge/ICASSP_2022/DNS-Challenge/datasets/pdns_training_set/impulse_responses +#/mnt/f/4th_DNSChallenge/ICASSP_2022/DNS-Challenge/datasets/impulse_responses +# F:\4th_DNSChallenge\ICASSP_2022\DNS-Challenge\datasets\impulse_responses + +# \datasets\clean +noisy_destination: /mnt/f/4th_DNSChallenge/ICASSP_2022/DNS-Challenge/datasets/pdns_training_set/mixed/noisy +# datasets/training_data/noisy +# datasets\test_set2\synthetic_personalizeddns\noisy +#training_set2_onlyrealrir\noisy +#\noisy +clean_destination: /mnt/f/4th_DNSChallenge/ICASSP_2022/DNS-Challenge/datasets/pdns_training_set/mixed/clean +#datasets\test_set2\synthetic_personalizeddns\clean +# training_set2_onlyrealrir\clean +# \clean +noise_destination: /mnt/f/4th_DNSChallenge/ICASSP_2022/DNS-Challenge/datasets/pdns_training_set/mixed/noise +# datasets/training_data/noise +#datasets\test_set2\synthetic_personalizeddns\noise +#training_set2_onlyrealrir\noise +# \noise +log_dir: logs +# \logs + +# Config: add singing voice to clean speech +clean_singing: datasets\clean_singing\VocalSet11\FULL +singing_choice: 3 +# 1 for only male, 2 for only female, 3 (default) for both male and female + +# Config: add reverb to clean speech +rir_choice: 1 +# 1 for only real rir, 2 for only synthetic rir, 3 (default) use both real and synthetic +lower_t60: 0.3 +# lower bound of t60 range in seconds +upper_t60: 1.3 +# upper bound of t60 range in seconds +rir_table_csv: datasets\acoustic_params\RIR_table_simple.csv +clean_speech_t60_csv: datasets\acoustic_params\cleanspeech_table_t60_c50.csv +# percent_for_adding_reverb=0.5 # percentage of clean speech convolved with RIR + +# pdns testsets +# primary_data: D:\kanhawin_git\primary_speakers_VCTK_16k +#'D:\PersonalizedDNS_dataset\synthetic_primary' +# secondary_data='D:\kanhawin_git\secondary_speakers_voxCeleb2_16k' +#'D:\PersonalizedDNS_dataset\synthetic_secondary' +# noise_data= datasets\test_set2\synthetic\noise +# pdns_testset_clean= datasets\test_set2\pdns\clean +# pdns_testset_noisy= datasets\test_set2\pdns\noisy + +# adaptation_data_seconds=120 +# num_primary_spk=100 +# num_clips=600 + +# Unit tests config +snr_test: True +norm_test: True +sampling_rate_test = True +clipping_test = True + +unit_tests_log_dir: unittests_logs