fix-torch-stft-error-on-gpus-sm-53

BushyToaster88 · BushyToaster88 · commit b92361b99fd6 · 2023-03-03T16:17:28.000+11:00
This pull request addresses an issue that arises when executing the finetune_speaker_v2.py script on GPUs with compute capability less than SM_53. The error occurs at line 104 of mel_processing.py, where the torch.stft() function is called with a half data type. To fix this, I updated the data type to float.
diff --git a/mel_processing.py b/mel_processing.py
@@ -101,8 +101,8 @@ def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size,
     y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect')
     y = y.squeeze(1)
 
-    spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
-                      center=center, pad_mode='reflect', normalized=False, onesided=True)
+    spec = torch.stft(y.float(), n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
+        center=center, pad_mode='reflect', normalized=False, onesided=True)
 
     spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)