@@ -59,18 +59,28 @@ def __init__(self, gpu_id="cpu", title="Controllable Embeddings", article="", av
5959 'Chinese Accent' ,
6060 'Vietnamese Accent' ], type = "value" ,
6161 value = 'English Accent' , label = "Select the Accent of the Speaker" ),
62+ gr .Textbox (lines = 3 ,
63+ placeholder = "\n The sliders below control the speaker embedding" ,
64+ value = "\n The sliders below control the speaker embedding" ,
65+ label = " " ,
66+ show_label = False ),
6267 gr .Slider (minimum = 0 , maximum = available_artificial_voices , step = 1 ,
6368 value = 279 ,
6469 label = "Random Seed for the artificial Voice" ),
65- gr .Slider (minimum = 0.5 , maximum = 1.5 , step = 0.1 , value = 1.0 , label = "Duration Scale" ),
66- gr .Slider (minimum = 0.0 , maximum = 2.0 , step = 0.1 , value = 1.0 , label = "Pause Duration Scale" ),
67- gr .Slider (minimum = 0.0 , maximum = 2.0 , step = 0.1 , value = 1.0 , label = "Pitch Variance Scale" ),
68- gr .Slider (minimum = 0.0 , maximum = 2.0 , step = 0.1 , value = 1.0 , label = "Energy Variance Scale" ),
6970 gr .Slider (minimum = - 50.0 , maximum = 50.0 , step = 0.1 , value = 0.0 , label = "Femininity / Masculinity" ),
7071 gr .Slider (minimum = - 30.0 , maximum = 30.0 , step = 0.1 , value = 0.0 , label = "Sibilance" ),
7172 gr .Slider (minimum = - 30.0 , maximum = 30.0 , step = 0.1 , value = 0.0 , label = "Accentuated High / Low Frequencies" ),
7273 gr .Slider (minimum = - 30.0 , maximum = 30.0 , step = 0.1 , value = 0.0 , label = "Loudness / Arousal / Calmness" ),
73- gr .Slider (minimum = - 20.0 , maximum = 20.0 , step = 0.1 , value = 0.0 , label = "Tone / Timbre" )
74+ gr .Slider (minimum = - 20.0 , maximum = 20.0 , step = 0.1 , value = 0.0 , label = "Tone / Timbre" ),
75+ gr .Textbox (lines = 3 ,
76+ placeholder = "\n The sliders below directly control the TTS" ,
77+ value = "\n The sliders below directly control the TTS" ,
78+ label = " " ,
79+ show_label = False ),
80+ gr .Slider (minimum = 0.5 , maximum = 1.5 , step = 0.1 , value = 1.0 , label = "Duration Scale" ),
81+ gr .Slider (minimum = 0.0 , maximum = 2.0 , step = 0.1 , value = 1.0 , label = "Pause Duration Scale" ),
82+ gr .Slider (minimum = 0.0 , maximum = 2.0 , step = 0.1 , value = 1.0 , label = "Pitch Variance Scale" ),
83+ gr .Slider (minimum = 0.0 , maximum = 2.0 , step = 0.1 , value = 1.0 , label = "Energy Variance Scale" )
7484 ],
7585 outputs = [gr .Audio (type = "numpy" , label = "Speech" ),
7686 gr .Image (label = "Visualization" )],
@@ -84,16 +94,18 @@ def read(self,
8494 prompt ,
8595 language ,
8696 accent ,
97+ ignore_1 ,
8798 voice_seed ,
88- duration_scaling_factor ,
89- pause_duration_scaling_factor ,
90- pitch_variance_scale ,
91- energy_variance_scale ,
9299 emb1 ,
93100 emb2 ,
94101 emb3 ,
95102 emb5 ,
96- emb6 ):
103+ emb6 ,
104+ ignore_2 ,
105+ duration_scaling_factor ,
106+ pause_duration_scaling_factor ,
107+ pitch_variance_scale ,
108+ energy_variance_scale ):
97109 sr , wav , fig = self .controllable_ui .read (prompt ,
98110 language ,
99111 accent ,
0 commit comments