import gradio as gr from rvc import * from argparse import ArgumentParser import os, sys if __name__ == '__main__': parser = ArgumentParser(description='gorge rvc', add_help=True) parser.add_argument("--share", action="store_true", dest="share_enabled", default=False, help="Enable sharing") parser.add_argument("--listen", action="store_true", default=False, help="Make the WebUI reachable from your local network.") parser.add_argument('--listen-host', type=str, help='The hostname that the server will use.') parser.add_argument('--listen-port', type=int, help='The listening port that the server will use.') args = parser.parse_args() warning = sys.argv[0] if warning == 'True': warning = True elif warning == 'False': warning = False with gr.Blocks(theme=gr.themes.Base(), title='Mangio-RVC-Web 💻') as app: gr.HTML("

RVC V2 Huggingface Version

") if warning: gr.HTML(" Huggingface version of Easy GUI by Rejekts ") with gr.Tabs(): with gr.TabItem("Inference"): # Inference Preset Row # with gr.Row(): # mangio_preset = gr.Dropdown(label="Inference Preset", choices=sorted(get_presets())) # mangio_preset_name_save = gr.Textbox( # label="Your preset name" # ) # mangio_preset_save_btn = gr.Button('Save Preset', variant="primary") # Other RVC stuff with gr.Row(): sid0 = gr.Dropdown(label="1.Choose your Model.", choices=sorted(names), value=check_for_name()) refresh_button = gr.Button("Refresh", variant="primary") if check_for_name() != '': get_vc(sorted(names)[0]) vc_transform0 = gr.Number(label="Optional: You can change the pitch here or leave it at 0.", value=0) #clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary") spk_item = gr.Slider( minimum=0, maximum=2333, step=1, label=i18n("请选择说话人id"), value=0, visible=False, interactive=True, ) #clean_button.click(fn=clean, inputs=[], outputs=[sid0]) sid0.change( fn=get_vc, inputs=[sid0], outputs=[spk_item], ) but0 = gr.Button("Convert", variant="primary") with gr.Row(): with gr.Column(): with gr.Row(): dropbox = gr.File(label="Drop your audio here & hit the Reload button.") with gr.Row(): record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath") with gr.Row(): input_audio0 = gr.Dropdown( label="2.Choose your audio.", value="./audios/someguy.mp3", choices=audio_files ) dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0]) dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0]) refresh_button2 = gr.Button("Refresh", variant="primary", size='sm') record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0]) record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0]) with gr.Row(): with gr.Accordion('Text To Speech', open=False): with gr.Column(): lang = gr.Radio(label='Chinese & Japanese do not work with ElevenLabs currently.',choices=['en','es','fr','pt','zh-CN','de','hi','ja'], value='en') api_box = gr.Textbox(label="Enter your API Key for ElevenLabs, or leave empty to use GoogleTTS", value='') elevenid=gr.Dropdown(label="Voice:", choices=eleven_voices) with gr.Column(): tfs = gr.Textbox(label="Input your Text", interactive=True, value="This is a test.") tts_button = gr.Button(value="Speak") tts_button.click(fn=elevenTTS, inputs=[api_box,tfs, elevenid, lang], outputs=[record_button, input_audio0]) with gr.Row(): with gr.Accordion('Wav2Lip', open=False): with gr.Row(): size = gr.Radio(label='Resolution:',choices=['Half','Full']) face = gr.UploadButton("Upload A Character",type='file') faces = gr.Dropdown(label="OR Choose one:", choices=['None','Ben Shapiro','Andrew Tate']) with gr.Row(): preview = gr.Textbox(label="Status:",interactive=False) face.upload(fn=success_message,inputs=[face], outputs=[preview, faces]) with gr.Row(): animation = gr.Video(type='filepath') refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation]) with gr.Row(): animate_button = gr.Button('Animate') with gr.Column(): with gr.Accordion("Index Settings", open=False): file_index1 = gr.Dropdown( label="3. Path to your added.index file (if it didn't automatically find it.)", choices=get_indexes(), value=get_index(), interactive=True, ) sid0.change(fn=match_index, inputs=[sid0],outputs=[file_index1]) refresh_button.click( fn=change_choices, inputs=[], outputs=[sid0, file_index1] ) # file_big_npy1 = gr.Textbox( # label=i18n("特征文件路径"), # value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy", # interactive=True, # ) index_rate1 = gr.Slider( minimum=0, maximum=1, label=i18n("检索特征占比"), value=0.66, interactive=True, ) vc_output2 = gr.Audio( label="Output Audio (Click on the Three Dots in the Right Corner to Download)", type='filepath', interactive=False, ) animate_button.click(fn=mouth, inputs=[size, face, vc_output2, faces], outputs=[animation, preview]) with gr.Accordion("Advanced Settings", open=False): f0method0 = gr.Radio( label="Optional: Change the Pitch Extraction Algorithm.\nExtraction methods are sorted from 'worst quality' to 'best quality'.\nmangio-crepe may or may not be better than rmvpe in cases where 'smoothness' is more important, but rmvpe is the best overall.", choices=["pm", "dio", "crepe-tiny", "mangio-crepe-tiny", "crepe", "harvest", "mangio-crepe", "rmvpe"], # Fork Feature. Add Crepe-Tiny value="rmvpe", interactive=True, ) crepe_hop_length = gr.Slider( minimum=1, maximum=512, step=1, label="Mangio-Crepe Hop Length. Higher numbers will reduce the chance of extreme pitch changes but lower numbers will increase accuracy. 64-192 is a good range to experiment with.", value=120, interactive=True, visible=False, ) f0method0.change(fn=whethercrepeornah, inputs=[f0method0], outputs=[crepe_hop_length]) filter_radius0 = gr.Slider( minimum=0, maximum=7, label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"), value=3, step=1, interactive=True, ) resample_sr0 = gr.Slider( minimum=0, maximum=48000, label=i18n("后处理重采样至最终采样率,0为不进行重采样"), value=0, step=1, interactive=True, visible=False ) rms_mix_rate0 = gr.Slider( minimum=0, maximum=1, label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"), value=0.21, interactive=True, ) protect0 = gr.Slider( minimum=0, maximum=0.5, label=i18n("保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"), value=0.33, step=0.01, interactive=True, ) formanting = gr.Checkbox( value=bool(DoFormant), label="[EXPERIMENTAL] Formant shift inference audio", info="Used for male to female and vice-versa conversions", interactive=True, visible=True, ) formant_preset = gr.Dropdown( value='', choices=get_fshift_presets(), label="browse presets for formanting", visible=bool(DoFormant), ) formant_refresh_button = gr.Button( value='\U0001f504', visible=bool(DoFormant), variant='primary', ) #formant_refresh_button = ToolButton( elem_id='1') #create_refresh_button(formant_preset, lambda: {"choices": formant_preset}, "refresh_list_shiftpresets") qfrency = gr.Slider( value=Quefrency, info="Default value is 1.0", label="Quefrency for formant shifting", minimum=0.0, maximum=16.0, step=0.1, visible=bool(DoFormant), interactive=True, ) tmbre = gr.Slider( value=Timbre, info="Default value is 1.0", label="Timbre for formant shifting", minimum=0.0, maximum=16.0, step=0.1, visible=bool(DoFormant), interactive=True, ) formant_preset.change(fn=preset_apply, inputs=[formant_preset, qfrency, tmbre], outputs=[qfrency, tmbre]) frmntbut = gr.Button("Apply", variant="primary", visible=bool(DoFormant)) formanting.change(fn=formant_enabled,inputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button],outputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button]) frmntbut.click(fn=formant_apply,inputs=[qfrency, tmbre], outputs=[qfrency, tmbre]) formant_refresh_button.click(fn=update_fshift_presets,inputs=[formant_preset, qfrency, tmbre],outputs=[formant_preset, qfrency, tmbre]) with gr.Row(): vc_output1 = gr.Textbox("") f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"), visible=False) but0.click( vc_single, [ spk_item, input_audio0, vc_transform0, f0_file, f0method0, file_index1, # file_index2, # file_big_npy1, index_rate1, filter_radius0, resample_sr0, rms_mix_rate0, protect0, crepe_hop_length ], [vc_output1, vc_output2], ) with gr.Accordion("Batch Conversion",open=False): with gr.Row(): with gr.Column(): vc_transform1 = gr.Number( label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0 ) opt_input = gr.Textbox(label=i18n("指定输出文件夹"), value="opt") f0method1 = gr.Radio( label=i18n( "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU" ), choices=["pm", "harvest", "crepe", "rmvpe"], value="rmvpe", interactive=True, ) filter_radius1 = gr.Slider( minimum=0, maximum=7, label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"), value=3, step=1, interactive=True, ) with gr.Column(): file_index3 = gr.Textbox( label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"), value="", interactive=True, ) file_index4 = gr.Dropdown( label=i18n("自动检测index路径,下拉式选择(dropdown)"), choices=sorted(index_paths), interactive=True, ) refresh_button.click( fn=lambda: change_choices()[1], inputs=[], outputs=file_index4, ) # file_big_npy2 = gr.Textbox( # label=i18n("特征文件路径"), # value="E:\\codes\\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy", # interactive=True, # ) index_rate2 = gr.Slider( minimum=0, maximum=1, label=i18n("检索特征占比"), value=1, interactive=True, ) with gr.Column(): resample_sr1 = gr.Slider( minimum=0, maximum=48000, label=i18n("后处理重采样至最终采样率,0为不进行重采样"), value=0, step=1, interactive=True, ) rms_mix_rate1 = gr.Slider( minimum=0, maximum=1, label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"), value=1, interactive=True, ) protect1 = gr.Slider( minimum=0, maximum=0.5, label=i18n( "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果" ), value=0.33, step=0.01, interactive=True, ) with gr.Column(): dir_input = gr.Textbox( label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"), value="E:\codes\py39\\test-20230416b\\todo-songs", ) inputs = gr.File( file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹") ) with gr.Row(): format1 = gr.Radio( label=i18n("导出文件格式"), choices=["wav", "flac", "mp3", "m4a"], value="flac", interactive=True, ) but1 = gr.Button(i18n("转换"), variant="primary") vc_output3 = gr.Textbox(label=i18n("输出信息")) but1.click( vc_multi, [ spk_item, dir_input, opt_input, inputs, vc_transform1, f0method1, file_index3, file_index4, # file_big_npy2, index_rate2, filter_radius1, resample_sr1, rms_mix_rate1, protect1, format1, crepe_hop_length, ], [vc_output3], ) but1.click(fn=lambda: easy_uploader.clear()) with gr.TabItem("Download Model"): with gr.Row(): url=gr.Textbox(label="Enter the URL to the Model:") with gr.Row(): model = gr.Textbox(label="Name your model:") download_button=gr.Button("Download") with gr.Row(): status_bar=gr.Textbox(label="") download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar]) with gr.Row(): gr.Markdown( """ Original RVC:https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI Mangio’s RVC Fork:https://github.com/Mangio621/Mangio-RVC-Fork ❤️ If you like the EasyGUI, help me keep it.❤️ https://paypal.me/lesantillan """ ) app.launch(share=args.share_enabled)