@@ -65,17 +65,18 @@ def remove_higher_frequency(self, wav, ratio=0.95):
6565 stft = librosa .stft (wav )
6666 real , img = np .real (stft ), np .imag (stft )
6767 mag = (real ** 2 + img ** 2 ) ** 0.5
68- cos , sin = real / mag , img / mag
68+ cos , sin = real / ( mag + EPS ) , img / ( mag + EPS )
6969 spec = np .abs (stft ) # [1025,T]
7070 feature = spec .copy ()
71- feature = np .log10 (feature )
71+ feature = np .log10 (feature + EPS )
7272 feature [feature < 0 ] = 0
7373 energy_level = np .sum (feature , axis = 1 )
7474 threshold = np .sum (energy_level ) * ratio
7575 curent_level , i = energy_level [0 ], 0
7676 while (i < energy_level .shape [0 ] and curent_level < threshold ):
7777 curent_level += energy_level [i + 1 , ...]
7878 i += 1
79+ print (i )
7980 spec [i :, ...] = np .zeros_like (spec [i :, ...])
8081 stft = spec * cos + 1j * spec * sin
8182 return librosa .istft (stft )
@@ -92,13 +93,13 @@ def restore_inmem(self, wav_10k, cuda=False, mode=0, your_vocoder_func=None):
9293 elif (mode == 2 ):
9394 self ._model .train () # More effective on seriously demaged speech
9495
95- if (mode == 0 ):
96- wav_10k = self .remove_higher_frequency (wav_10k )
9796 res = []
98- seg_length = 44100 * 60
97+ seg_length = 44100 * 30
9998 break_point = seg_length
10099 while break_point < wav_10k .shape [0 ]+ seg_length :
101100 segment = wav_10k [break_point - seg_length :break_point ]
101+ if (mode == 1 ):
102+ segment = self .remove_higher_frequency (segment )
102103 sp ,mel_noisy = self ._pre (self ._model , segment , cuda )
103104 out_model = self ._model (sp , mel_noisy )
104105 denoised_mel = from_log (out_model ['mel' ])
0 commit comments