33now_dir = os .getcwd ()
44sys .path .append (now_dir )
55from infer .modules .vc .modules import VC
6+ from infer .modules .vc .utils import download_and_split_audio , combine_audio
7+ from infer .lib .setting import change_audio_mode , show_description , use_microphone
68from configs .config import Config
79import numpy as np
810import gradio as gr
1214import logging
1315import zipfile
1416import glob
17+ import asyncio
18+ import edge_tts
1519
1620logging .getLogger ("numba" ).setLevel (logging .WARNING )
1721logger = logging .getLogger (__name__ )
1822config = Config ()
1923vc = VC (config )
2024
25+ tts_voice_list = asyncio .new_event_loop ().run_until_complete (edge_tts .list_voices ())
26+ tts_voices = [f"{ v ['ShortName' ]} -{ v ['Gender' ]} " for v in tts_voice_list ]
27+
2128os .makedirs ("models" , exist_ok = True )
2229os .makedirs (os .path .join ("models" , "weights" ), exist_ok = True )
2330os .makedirs (os .path .join ("models" , "indexs" ), exist_ok = True )
2835indexs_path = os .path .join ("models" , "indexs" )
2936covers_path = os .path .join ("models" , "covers" )
3037
38+ force_support = None
39+ if config .force_support is False :
40+ if config .device == "mps" or config .device == "cpu" :
41+ force_support = False
42+ else :
43+ print ("\033 [93mWARNING: Unsupported feature is enabled.\033 [0m" )
44+ print ("\033 [93mWARNING: It may not work properly.\033 [0m" )
45+ force_support = True
46+
47+ audio_mode = []
48+ f0method_mode = []
49+ f0method_info = ""
50+
51+ if force_support is False :
52+ audio_mode = ["Upload audio" , "Input path" , "TTS Audio" ]
53+ f0method_mode = ["pm" , "rmvpe" , "harvest" ]
54+ else :
55+ audio_mode = ["Upload audio" , "Input path" , "Youtube" , "TTS Audio" ]
56+ f0method_mode = ["pm" , "rmvpe" , "harvest" , "crepe" ]
57+
3158json_files = []
3259for root , dirs , files in os .walk (models_path ):
3360 for file in files :
@@ -105,21 +132,22 @@ def clean():
105132 return {"value" : "" , "__type__" : "update" }
106133
107134with gr .Blocks (title = "RVC WebUI" , theme = gr .themes .Base ()) as app :
108- gr .Markdown ("## RVC WebUI" )
109- gr .Markdown (
110- value = "Alpha testing."
111- )
135+ gr .Markdown ("<center> # RVC WebUI" )
136+ gr .Markdown ("v1.0.0Beta" )
112137 with gr .Tabs ():
113138 with gr .TabItem ("Inference" ):
114139 with gr .Row ():
115140 modelSelect = gr .Dropdown (label = "Model" , choices = sorted (modelList ))
116141 downloadModel = gr .Button ("Download Model" , variant = "primary" )
117142 downloadModel .click (fn = download_model , inputs = [modelSelect ], outputs = [])
118143 with gr .Row ():
119- sid0 = gr .Dropdown (label = "Selected Model" , choices = sorted (names ))
120- with gr .Column ():
121- refresh_button = gr .Button ("Refresh model" , variant = "primary" )
122- clean_button = gr .Button ("Clean memory" , variant = "primary" )
144+ sid0 = gr .Dropdown (label = "Selected Model" , choices = sorted (names ), allow_custom_value = False )
145+ file_index = gr .Dropdown (
146+ label = "Index file dropdown" ,
147+ choices = sorted (indexs ),
148+ allow_custom_value = False ,
149+ interactive = True ,
150+ )
123151 spk_item = gr .Slider (
124152 minimum = 0 ,
125153 maximum = 2333 ,
@@ -129,33 +157,44 @@ def clean():
129157 visible = True ,
130158 interactive = True ,
131159 )
132- clean_button .click (
133- fn = clean , inputs = [], outputs = [sid0 ], api_name = "infer_clean"
134- )
160+ with gr .Column ():
161+ refresh_button = gr .Button ("Refresh model" , variant = "primary" )
162+ clean_button = gr .Button ("Clean memory" , variant = "primary" )
163+ clean_button .click (fn = clean , inputs = [], outputs = [sid0 ], api_name = "infer_clean" )
135164 with gr .TabItem ("Inference Setting" ):
136165 with gr .Row ():
137166 with gr .Column ():
138- vc_transform0 = gr .Number (
139- label = "Transpose" , value = 0
140- )
141- input_audio0 = gr .Textbox (
142- label = "Audio Input Path" ,
143- placeholder = "C:\\ Users\\ Desktop\\ audio_example.wav" ,
144- )
145- file_index = gr .Dropdown (
146- label = "Index file dropdown" ,
147- choices = sorted (indexs ),
148- interactive = True ,
149- )
167+ vc_audio_mode = gr .Dropdown (label = "Input voice" , choices = audio_mode , allow_custom_value = False , value = "Upload audio" , visible = True , interactive = True )
168+ # Upload Audio
169+ vc_upload = gr .Audio (label = "Upload audio file" , sources = "upload" , visible = True , interactive = True )
170+ vc_microphone_mode = gr .Checkbox (label = "Use Microphone" , value = False , visible = True , interactive = True )
171+ # Audio Path
172+ vc_audio_input = gr .Textbox (label = "Audio Input Path" , placeholder = "C:\\ Users\\ Desktop\\ audio_example.wav" , visible = False , interactive = True )
173+ # Youtube Audio
174+ vc_link = gr .Textbox (label = "Youtube URL" , visible = False , placeholder = "https://www.youtube.com/watch?v=..." , interactive = True )
175+ vc_split_model = gr .Dropdown (label = "Splitter Model" , choices = ["hdemucs_mmi" , "htdemucs" , "htdemucs_ft" , "mdx" , "mdx_extra" , "mdx_extra_q" ], allow_custom_value = False , visible = False , value = "htdemucs" , interactive = True )
176+ vc_download_button = gr .Button ("Download Audio" , variant = "primary" , visible = False )
177+ vc_vocal_preview = gr .Audio (label = "Vocal Preview" , visible = False )
178+ # TTS Audio
179+ vc_tts_text = gr .Textbox (label = "TTS text" , placeholder = "Hello world" , visible = False , interactive = True )
180+ vc_tts_voice = gr .Dropdown (label = "Edge-tts speaker" , choices = tts_voices , visible = False , allow_custom_value = False , value = "en-US-AnaNeural-Female" , interactive = True )
181+ with gr .Column ():
150182 f0method0 = gr .Radio (
151183 label = "Pitch extraction algorithm" ,
152- choices = [ "pm" , "harvest" , "crepe" , "rmvpe" ]
184+ choices = f0method_mode
153185 if config .dml == False
154186 else ["pm" , "harvest" , "rmvpe" ],
155187 value = "rmvpe" ,
156188 interactive = True ,
157189 )
158- with gr .Column ():
190+ vc_transform0 = gr .Slider (
191+ label = "Transpose" ,
192+ minimum = - 256 ,
193+ maximum = 256 ,
194+ step = 0.01 ,
195+ value = 0 ,
196+ interactive = True ,
197+ )
159198 index_rate = gr .Slider (
160199 minimum = 0 ,
161200 maximum = 1 ,
@@ -194,10 +233,6 @@ def clean():
194233 step = 1 ,
195234 interactive = True ,
196235 )
197- f0_file = gr .File (
198- label = "F0 curve file (Optional)" ,
199- visible = False ,
200- )
201236 refresh_button .click (
202237 fn = change_choices ,
203238 inputs = [],
@@ -206,32 +241,91 @@ def clean():
206241 )
207242 with gr .Column ():
208243 but0 = gr .Button ("Run" , variant = "primary" )
209- vc_output1 = gr .Textbox (label = "Output Log" )
210- vc_output2 = gr .Audio (label = "Output Audio" )
211- but0 .click (
212- vc .vc_single ,
213- [
214- spk_item ,
215- input_audio0 ,
216- vc_transform0 ,
217- f0_file ,
218- f0method0 ,
219- file_index ,
220- index_rate ,
221- filter_radius0 ,
222- resample_sr0 ,
223- rms_mix_rate0 ,
224- protect0 ,
225- ],
226- [vc_output1 , vc_output2 ],
227- api_name = "infer_convert" ,
228- )
229- sid0 .change (
230- fn = vc .get_vc ,
231- inputs = [sid0 , protect0 ],
232- outputs = [spk_item , protect0 , file_index ],
233- api_name = "infer_change_voice" ,
234- )
244+ vc_output = gr .Audio (label = "Output Audio" )
245+ vc_combined = gr .Button ("Combine" , variant = "primary" )
246+ vc_combined_output = gr .Audio (label = "Combined Audio" )
247+ with gr .TabItem ("Log" ):
248+ gr .Markdown ("## Log" )
249+ vc_log = gr .Textbox (label = "Output Log" )
250+ with gr .TabItem ("Settings" ):
251+ gr .Markdown ("## Setting" )
252+ description_mode = gr .Checkbox (label = "Show description" , value = False )
253+ description_mode .change (
254+ fn = show_description ,
255+ inputs = description_mode ,
256+ outputs = [
257+ vc_audio_input ,
258+ vc_link ,
259+ vc_split_model ,
260+ vc_tts_text ,
261+ vc_tts_voice ,
262+ f0method0 ,
263+ vc_transform0 ,
264+ index_rate ,
265+ resample_sr0 ,
266+ rms_mix_rate0 ,
267+ protect0 ,
268+ filter_radius0
269+ ]
270+ )
271+ but0 .click (
272+ vc .vc_single ,
273+ [
274+ spk_item ,
275+ vc_audio_input ,
276+ vc_upload ,
277+ vc_tts_text ,
278+ vc_tts_voice ,
279+ vc_transform0 ,
280+ f0method0 ,
281+ file_index ,
282+ index_rate ,
283+ filter_radius0 ,
284+ resample_sr0 ,
285+ rms_mix_rate0 ,
286+ protect0 ,
287+ ],
288+ [vc_log , vc_output ],
289+ api_name = "infer_convert" ,
290+ )
291+ sid0 .change (
292+ fn = vc .get_vc ,
293+ inputs = [sid0 , protect0 ],
294+ outputs = [spk_item , protect0 , file_index ],
295+ api_name = "infer_change_voice" ,
296+ )
297+ vc_microphone_mode .change (
298+ fn = use_microphone ,
299+ inputs = vc_microphone_mode ,
300+ outputs = vc_upload
301+ )
302+ vc_download_button .click (
303+ fn = download_and_split_audio ,
304+ inputs = [vc_link , vc_split_model ],
305+ outputs = [vc_vocal_preview , vc_log ]
306+ )
307+ vc_combined .click (
308+ fn = combine_audio ,
309+ inputs = [vc_split_model ],
310+ outputs = [vc_combined_output , vc_log ]
311+ )
312+ vc_audio_mode .change (
313+ fn = change_audio_mode ,
314+ inputs = [vc_audio_mode ],
315+ outputs = [
316+ vc_upload ,
317+ vc_microphone_mode ,
318+ vc_audio_input ,
319+ vc_link ,
320+ vc_split_model ,
321+ vc_download_button ,
322+ vc_vocal_preview ,
323+ vc_tts_text ,
324+ vc_tts_voice ,
325+ vc_combined ,
326+ vc_combined_output ,
327+ ]
328+ )
235329 if config .iscolab :
236330 app .queue (max_size = 1022 ).launch (share = True )
237331 else :
0 commit comments