Skip to content

Commit 7ac6299

Browse files
committed
update readme
1 parent ef5cb1c commit 7ac6299

File tree

3 files changed

+11
-10
lines changed

3 files changed

+11
-10
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,17 +54,17 @@ from voicefixer import Vocoder
5454
voicefixer = VoiceFixer()
5555
# Speech restoration
5656

57-
# Mode 0
57+
# Mode 0: Original Model (suggested by default)
5858
voicefixer.restore(input="", # input wav file path
5959
output="", # output wav file path
6060
cuda=False, # whether to use gpu acceleration
6161
mode = 0) # You can try out mode 0, 1, 2 to find out the best result
62-
# Mode 1
62+
# Mode 1: Add preprocessing module (remove higher frequency)
6363
voicefixer.restore(input="", # input wav file path
6464
output="", # output wav file path
6565
cuda=False, # whether to use gpu acceleration
6666
mode = 1) # You can try out mode 0, 1, 2 to find out the best result
67-
# Mode 2
67+
# Mode 2: Train mode (might work sometimes on seriously degraded real speech)
6868
voicefixer.restore(input="", # input wav file path
6969
output="", # output wav file path
7070
cuda=False, # whether to use gpu acceleration

test/streamlit.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def init_voicefixer():
3131
st.write('Inference : ')
3232

3333
# choose options
34-
mode = st.radio('Voice fixer mode (0: rm high frequency, 1: none, 2: train fixer)', [0, 1, 2])
34+
mode = st.radio('Voice fixer modes (0: original mode, 1: Add preprocessing module 2: Train mode (may work sometimes on seriously degraded speech))', [0, 1, 2])
3535
if torch.cuda.is_available():
3636
is_cuda = st.radio('Turn on GPU', [True, False])
3737
if is_cuda != list(voice_fixer._model.parameters())[0].is_cuda:
@@ -62,5 +62,5 @@ def init_voicefixer():
6262
# make buffer
6363
with BytesIO() as buffer:
6464
soundfile.write(buffer, pred_wav.T, samplerate=sample_rate, format='WAV')
65-
st.write("Time: {:.3f}".format(pred_time))
65+
st.write("Time: {:.3f}s".format(pred_time))
6666
st.audio(buffer.getvalue(), format='audio/wav')

voicefixer/base.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,17 +65,18 @@ def remove_higher_frequency(self, wav, ratio=0.95):
6565
stft = librosa.stft(wav)
6666
real, img = np.real(stft), np.imag(stft)
6767
mag = (real ** 2 + img ** 2) ** 0.5
68-
cos, sin = real / mag, img / mag
68+
cos, sin = real / (mag+EPS), img / (mag+EPS)
6969
spec = np.abs(stft) # [1025,T]
7070
feature = spec.copy()
71-
feature = np.log10(feature)
71+
feature = np.log10(feature+EPS)
7272
feature[feature < 0] = 0
7373
energy_level = np.sum(feature, axis=1)
7474
threshold = np.sum(energy_level) * ratio
7575
curent_level, i = energy_level[0], 0
7676
while (i < energy_level.shape[0] and curent_level < threshold):
7777
curent_level += energy_level[i + 1, ...]
7878
i += 1
79+
print(i)
7980
spec[i:, ...] = np.zeros_like(spec[i:, ...])
8081
stft = spec * cos + 1j * spec * sin
8182
return librosa.istft(stft)
@@ -92,13 +93,13 @@ def restore_inmem(self, wav_10k, cuda=False, mode=0, your_vocoder_func=None):
9293
elif(mode == 2):
9394
self._model.train() # More effective on seriously demaged speech
9495

95-
if(mode == 0):
96-
wav_10k = self.remove_higher_frequency(wav_10k)
9796
res = []
98-
seg_length = 44100*60
97+
seg_length = 44100*30
9998
break_point = seg_length
10099
while break_point < wav_10k.shape[0]+seg_length:
101100
segment = wav_10k[break_point-seg_length:break_point]
101+
if (mode == 1):
102+
segment = self.remove_higher_frequency(segment)
102103
sp,mel_noisy = self._pre(self._model, segment, cuda)
103104
out_model = self._model(sp, mel_noisy)
104105
denoised_mel = from_log(out_model['mel'])

0 commit comments

Comments
 (0)