haoheliu
diff --git a/‎README.md‎
Lines changed: 28 additions & 36 deletions b/‎README.md‎
Lines changed: 28 additions & 36 deletions
diff --git a/‎test/test.py‎
Lines changed: 44 additions & 11 deletions b/‎test/test.py‎
Lines changed: 44 additions & 11 deletions
diff --git a/‎test/utterance/original/original.flac‎
124 KB b/‎test/utterance/original/original.flac‎
124 KB
diff --git a/‎test/utterance/target/oracle.flac‎
150 KB b/‎test/utterance/target/oracle.flac‎
150 KB
diff --git a/‎test/utterance/target/output_mode_0.flac‎
134 KB b/‎test/utterance/target/output_mode_0.flac‎
134 KB
diff --git a/‎test/utterance/target/output_mode_1.flac‎
124 KB b/‎test/utterance/target/output_mode_1.flac‎
124 KB
diff --git a/‎test/utterance/target/output_mode_2.flac‎
143 KB b/‎test/utterance/target/output_mode_2.flac‎
143 KB
diff --git a/‎voicefixer/base.py‎
Lines changed: 2 additions & 5 deletions b/‎voicefixer/base.py‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎voicefixer/tools/pytorch_util.py‎
Lines changed: 11 additions & 0 deletions b/‎voicefixer/tools/pytorch_util.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎voicefixer/vocoder/base.py‎
Lines changed: 8 additions & 3 deletions b/‎voicefixer/vocoder/base.py‎
Lines changed: 8 additions & 3 deletions
@@ -47,47 +47,39 @@ streamlit run test/streamlit.py
 **Important:** When you run the above command for the first time, the web page may leave blank for several minutes for downloading models. You can checkout the terminal for downloading progresses.  
 
 
-### Python interface
+### Example 
 
-Basic examples: 
+- *test/test.py*:
 
 ```python
-# Will automatically download model parameters.
-from voicefixer import VoiceFixer
-from voicefixer import Vocoder
+...
 
-# Initialize model
+# TEST VOICEFIXER
+## Initialize a voicefixer
 voicefixer = VoiceFixer()
-# Speech restoration
-
-# Mode 0: Original Model (suggested by default)
-voicefixer.restore(input="", # input wav file path
-                   output="", # output wav file path
-                   cuda=False, # whether to use gpu acceleration
-                   mode = 0) # You can try out mode 0, 1, 2 to find out the best result
-# Mode 1: Add preprocessing module (remove higher frequency)
-voicefixer.restore(input="", # input wav file path
-                   output="", # output wav file path
-                   cuda=False, # whether to use gpu acceleration
-                   mode = 1) # You can try out mode 0, 1, 2 to find out the best result
-# Mode 2: Train mode (might work sometimes on seriously degraded real speech)
-voicefixer.restore(input="", # input wav file path
-                   output="", # output wav file path
-                   cuda=False, # whether to use gpu acceleration
-                   mode = 2) # You can try out mode 0, 1, 2 to find out the best result
-
-# Another similar function
-# voicefixer.restore_inmem()
-
-# Universal speaker independent vocoder
-vocoder = Vocoder(sample_rate=44100) # Only 44100 sampling rate is supported.
-
-# Convert mel spectrogram to waveform
-wave = vocoder.forward(mel=mel_spec) # This forward function is used in the following oracle function.
-
-# Test vocoder using the mel spectrogram of 'fpath', save output to file out_path
-vocoder.oracle(fpath="", # input wav file path
-               out_path="") # output wav file path
+## Mode 0: Original Model (suggested by default)
+## Mode 1: Add preprocessing module (remove higher frequency)
+## Mode 2: Train mode (might work sometimes on seriously degraded real speech)
+for mode in [0,1,2]:
+    voicefixer.restore(input=os.path.join(git_root,"test/utterance/original/original.flac"), # low quality .wav/.flac file
+                       output=os.path.join(git_root,"test/utterance/output/output_mode_"+str(mode)+".flac"), # save file path
+                       cuda=False, # GPU acceleration
+                       mode=mode)
+
+
+# TEST VOCODER
+## Initialize a vocoder. Only 44100 sampling rate is supported.
+vocoder = Vocoder(sample_rate=44100)
+
+### read wave (fpath) -> mel spectrogram -> vocoder -> wave -> save wave (out_path)
+vocoder.oracle(fpath=os.path.join(git_root,"test/utterance/original/original.flac"),
+               out_path=os.path.join(git_root,"test/utterance/output/oracle.flac"),
+               cuda=False) # GPU acceleration
+
+# Other interfaces
+# voicefixer.restore_inmem
+# vocoder.forward 
+...
 ```
 
 ### Others Features
 
@@ -10,20 +10,53 @@
 9/14/21 11:02 AM   Haohe Liu      1.0         None
 '''
 
-from voicefixer import VoiceFixer
-
+import git
+import os
+import librosa
+import numpy as np
+from voicefixer import VoiceFixer, Vocoder
+
+git_root = git.Repo("", search_parent_directories=True).git.rev_parse("--show-toplevel")
+os.makedirs(os.path.join(git_root,"test/utterance/output"),exist_ok=True)
+
+def check(fname):
+    """
+    check if the output is normal
+    """
+    output = os.path.join(git_root,"test/utterance/output",fname)
+    target = os.path.join(git_root, "test/utterance/target", fname)
+    output, _ = librosa.load(output,sr=44100)
+    target, _ = librosa.load(target, sr=44100)
+    assert np.mean(np.abs(output-target)) < 0.01
+
+# TEST VOICEFIXER
+## Initialize a voicefixer
 voicefixer = VoiceFixer()
+# Mode 0: Original Model (suggested by default)
+# Mode 1: Add preprocessing module (remove higher frequency)
+# Mode 2: Train mode (might work sometimes on seriously degraded real speech)
+for mode in [0,1,2]:
+    voicefixer.restore(input=os.path.join(git_root,"test/utterance/original/original.flac"), # low quality .wav/.flac file
+                       output=os.path.join(git_root,"test/utterance/output/output_mode_"+str(mode)+".flac"), # save file path
+                       cuda=False, # GPU acceleration
+                       mode=mode)
+    if(mode != 2):
+        check("output_mode_"+str(mode)+".flac")
+
+
+# TEST VOCODER
+## Initialize a vocoder
+vocoder = Vocoder(sample_rate=44100)
 
-voicefixer.restore(input="/Users/liuhaohe/Downloads/vocals.wav",
-                   output="/Users/liuhaohe/Downloads/vocals_mode_0.wav",
-                   cuda=False,mode=0)
+### read wave (fpath) -> mel spectrogram -> vocoder -> wave -> save wave (out_path)
+vocoder.oracle(fpath=os.path.join(git_root,"test/utterance/original/original.flac"),
+               out_path=os.path.join(git_root,"test/utterance/output/oracle.flac"),
+               cuda=False) # GPU acceleration
 
+# Another interface
+# vocoder.forward(mel=mel)
 
-voicefixer.restore(input="/Users/liuhaohe/Downloads/vocals.wav",
-                   output="/Users/liuhaohe/Downloads/vocals_mode_1.wav",
-                   cuda=False,mode=1)
+check("oracle.flac")
 
+print("Pass")
 
-voicefixer.restore(input="/Users/liuhaohe/Downloads/vocals.wav",
-                   output="/Users/liuhaohe/Downloads/vocals_mode_2.wav",
-                   cuda=False,mode=2)
 
@@ -76,23 +76,20 @@ def remove_higher_frequency(self, wav, ratio=0.95):
         while (i < energy_level.shape[0] and curent_level < threshold):
             curent_level += energy_level[i + 1, ...]
             i += 1
-        print(i)
         spec[i:, ...] = np.zeros_like(spec[i:, ...])
         stft = spec * cos + 1j * spec * sin
         return librosa.istft(stft)
 
     @torch.no_grad()
     def restore_inmem(self, wav_10k, cuda=False, mode=0, your_vocoder_func=None):
-        if(cuda and torch.cuda.is_available()):
-            self._model = self._model.cuda()
-        # metrics = {}
+        check_cuda_availability(cuda=cuda)
+        try_tensor_cuda(self._model,cuda=cuda)
         if(mode == 0):
             self._model.eval()
         elif(mode == 1):
             self._model.eval()
         elif(mode == 2):
             self._model.train() # More effective on seriously demaged speech
-
         res = []
         seg_length = 44100*30
         break_point = seg_length
 
@@ -2,6 +2,17 @@
 import torch.nn as nn
 import numpy as np
 
+
+def check_cuda_availability(cuda):
+    if(cuda and not torch.cuda.is_available()):
+        raise RuntimeError("Error: You set cuda=True but no cuda device found.")
+
+def try_tensor_cuda(tensor, cuda):
+    if(cuda and torch.cuda.is_available()):
+        return tensor.cuda()
+    else:
+        return tensor.cpu()
+
 def to_log(input):
     assert torch.sum(input < 0) == 0, str(input)+" has negative values counts "+str(torch.sum(input < 0))
     return torch.log10(torch.clip(input, min=1e-8))
 
@@ -32,26 +32,31 @@ def _load_pretrain(self, pth):
     #         wav_re = self.model(mel) # torch.Size([1, 1, 104076])
     #         save_wave(tensor2numpy(wav_re)*2**15,save_dir,sample_rate=sample_rate)
 
-    def forward(self,mel):
+    def forward(self,mel, cuda=False):
         """
         :param non normalized mel spectrogram: [batchsize, 1, t-steps, n_mel]
         :return: [batchsize, 1, samples]
         """
         assert mel.size()[-1] == 128
+        check_cuda_availability(cuda=cuda)
+        try_tensor_cuda(self.model,cuda=cuda)
+        try_tensor_cuda(mel,cuda=cuda)
         self.weight_torch = self.weight_torch.type_as(mel)
         mel = mel / self.weight_torch
         mel = tr_normalize(tr_amp_to_db(torch.abs(mel)) - 20.0)
         mel = tr_pre(mel[:,0,...])
         wav_re = self.model(mel)
         return wav_re
 
-    def oracle(self, fpath, out_path):
+    def oracle(self, fpath, out_path, cuda=False):
+        check_cuda_availability(cuda=cuda)
+        try_tensor_cuda(self.model,cuda=cuda)
         wav = read_wave(fpath, sample_rate=self.rate)[..., 0]
         wav = wav/np.max(np.abs(wav))
         stft = np.abs(librosa.stft(wav,hop_length=Config.hop_length,win_length=Config.win_size,n_fft=Config.n_fft))
         mel = linear_to_mel(stft)
         mel = normalize(amp_to_db(np.abs(mel)) - 20)
-        mel = pre(np.transpose(mel, (1, 0)))
+        mel = pre(np.transpose(mel, (1, 0)),cuda=cuda)
         with torch.no_grad():
             wav_re = self.model(mel)
             save_wave(tensor2numpy(wav_re*2**15), out_path, sample_rate=self.rate)