Skip to content

Commit bbbe3f0

Browse files
committed
improve restoration strategy in mode 0
1 parent f5d1511 commit bbbe3f0

File tree

4 files changed

+47
-10
lines changed

4 files changed

+47
-10
lines changed

README.md

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,25 @@ from voicefixer import Vocoder
2626
# Initialize model
2727
voicefixer = VoiceFixer()
2828
# Speech restoration
29+
30+
# Mode 0
31+
voicefixer.restore(input="", # input wav file path
32+
output="", # output wav file path
33+
cuda=False, # whether to use gpu acceleration
34+
mode = 0) # You can try out mode 0, 1, 2 to find out the best result
35+
# Mode 1
36+
voicefixer.restore(input="", # input wav file path
37+
output="", # output wav file path
38+
cuda=False, # whether to use gpu acceleration
39+
mode = 1) # You can try out mode 0, 1, 2 to find out the best result
40+
# Mode 2
2941
voicefixer.restore(input="", # input wav file path
3042
output="", # output wav file path
3143
cuda=False, # whether to use gpu acceleration
32-
mode = 0) # You can try out mode 0, 1 to find out the best result
44+
mode = 2) # You can try out mode 0, 1, 2 to find out the best result
45+
46+
47+
3348

3449
# Universal speaker independent vocoder
3550
vocoder = Vocoder(sample_rate=44100) # Only 44100 sampling rate is supported.

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
3232
AUTHOR = 'Haohe Liu'
3333
REQUIRES_PYTHON = '>=3.7.0'
34-
VERSION = '0.0.7'
34+
VERSION = '0.0.8'
3535

3636
# What packages are required for this module to be executed?
3737
REQUIRED = [

test/test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,6 @@
1414

1515
voicefixer = VoiceFixer()
1616

17-
voicefixer.restore(input="/Users/liuhaohe/Desktop/test_song.wav",
18-
output="/Users/liuhaohe/Desktop/test_song_out_2.wav",
19-
cuda=False,mode=1)
17+
voicefixer.restore(input="/Users/liuhaohe/Downloads/lieshi_short.wav",
18+
output="/Users/liuhaohe/Downloads/lieshi_short.wav",
19+
cuda=False,mode=2)

voicefixer/base.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,19 +61,41 @@ def _pre(self, model, input, cuda):
6161
# return models.to_log(sp), models.to_log(mel_orig)
6262
return sp, mel_orig
6363

64+
def remove_higher_frequency(self, wav, ratio=0.95):
65+
stft = librosa.stft(wav)
66+
real, img = np.real(stft), np.imag(stft)
67+
mag = (real ** 2 + img ** 2) ** 0.5
68+
cos, sin = real / mag, img / mag
69+
spec = np.abs(stft) # [1025,T]
70+
feature = spec.copy()
71+
feature = np.log10(feature)
72+
feature[feature < 0] = 0
73+
energy_level = np.sum(feature, axis=1)
74+
threshold = np.sum(energy_level) * ratio
75+
curent_level, i = energy_level[0], 0
76+
while (i < energy_level.shape[0] and curent_level < threshold):
77+
curent_level += energy_level[i + 1, ...]
78+
i += 1
79+
spec[i:, ...] = np.zeros_like(spec[i:, ...])
80+
stft = spec * cos + 1j * spec * sin
81+
return librosa.istft(stft)
82+
6483
def restore(self, input, output, cuda=False, mode=0):
6584
if(cuda and torch.cuda.is_available()):
6685
self._model = self._model.cuda()
6786
# metrics = {}
68-
if(mode == 1):
69-
self._model.train() # More effective on seriously demaged speech
70-
elif(mode == 2):
71-
self._model.generator.denoiser.train() # Another option worth trying
72-
else:
87+
if(mode == 0):
7388
self._model.eval()
89+
elif(mode == 1):
90+
self._model.eval()
91+
elif(mode == 2):
92+
self._model.train() # More effective on seriously demaged speech
7493

7594
with torch.no_grad():
7695
wav_10k = self._load_wav(input, sample_rate=44100)
96+
if(mode == 0):
97+
# print("In mode 0, we will remove part of the higher frequency part before processing")
98+
wav_10k = self.remove_higher_frequency(wav_10k)
7799
res = []
78100
seg_length = 44100*60
79101
break_point = seg_length

0 commit comments

Comments
 (0)