Spaces:

ArkanDash
/

rvc-genshin-impact

Running on CPU Upgrade

App Files Files Community

ArkanDash commited on May 25, 2023

Commit

1fce9cf

•

1 Parent(s): 1f7b400

feat(app): minor changes

Browse files

Files changed (4) hide show

app-full.py +14 -17
app.py +64 -77
weights/folder_info.json +1 -2
weights/genshin-impact/cover.jpg +0 -3

app-full.py CHANGED Viewed

@@ -158,12 +158,11 @@ if __name__ == '__main__':
     for name, info in folder_info.items():
         if not info['enable']:
             continue
-        title = name
         folder = info['folder_path']
-        cover = f"{folder}/{info['cover']}"
-        markdown = info['markdown']
-        catergories.append([title, folder, cover, markdown])
-    for (title, folder, cover, markdown) in categories:
         with open(f"weights/{folder}/model_info.json", "r", encoding="utf-8") as f:
             models_info = json.load(f)
         for name, info in models_info.items():
@@ -171,9 +170,9 @@ if __name__ == '__main__':
                 continue
             title = info['title']
             author = info.get("author", None)
-            cover = f"weights/{name}/{info['cover']}"
-            index = f"weights/{name}/{info['feature_retrieval_library']}"
-            cpt = torch.load(f"weights/{name}/{name}.pth", map_location="cpu")
             tgt_sr = cpt["config"][-1]
             cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
             if_f0 = cpt.get("f0", 1)
@@ -199,17 +198,15 @@ if __name__ == '__main__':
             "[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/110kiMZTdP6Ri1lY9-NbQf17GVPPhHyeT?usp=sharing)\n\n"
             "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
         )
-        for (title, folder, cover, markdown) in categories:
-            gr.Markdown(
-                '<div align="center">'
-                (f'<img style="width:auto;height:500px;" src="file/{cover}">' if cover else "")+
-                '<div>'
-            )
-            with gr.TabItem(title):
                 with gr.Tabs():
-                    if not models == True:
                         gr.Markdown("# <center> No Model Loaded.")
-                        return gr.Markdown("## <center> Please added the model or fix your model path.")
                     for (name, title, author, cover, vc_fn) in models:
                         with gr.TabItem(name):
                             with gr.Row():

     for name, info in folder_info.items():
         if not info['enable']:
             continue
+        title = info['title']
         folder = info['folder_path']
+        description = info['description']
+        categories.append([title, folder, description])
+    for (title, folder, description) in categories:
         with open(f"weights/{folder}/model_info.json", "r", encoding="utf-8") as f:
             models_info = json.load(f)
         for name, info in models_info.items():
                 continue
             title = info['title']
             author = info.get("author", None)
+            cover = f"weights/{folder}/{name}/{info['cover']}"
+            index = f"weights/{folder}/{name}/{info['feature_retrieval_library']}"
+            cpt = torch.load(f"weights/{folder}/{name}/{name}.pth", map_location="cpu")
             tgt_sr = cpt["config"][-1]
             cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
             if_f0 = cpt.get("f0", 1)
             "[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/110kiMZTdP6Ri1lY9-NbQf17GVPPhHyeT?usp=sharing)\n\n"
             "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
         )
+        for (folder_title, folder, description) in categories:
+            with gr.TabItem(folder_title):
+                if description:
+                    gr.Markdown(f"<center>{description}")
                 with gr.Tabs():
+                    if not models:
                         gr.Markdown("# <center> No Model Loaded.")
+                        gr.Markdown("## <center> Please added the model or fix your model path.")
+                        continue
                     for (name, title, author, cover, vc_fn) in models:
                         with gr.TabItem(name):
                             with gr.Row():

app.py CHANGED Viewed

@@ -97,19 +97,29 @@ def change_to_tts_mode(tts_mode):
 if __name__ == '__main__':
     load_hubert()
     models = []
     tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
     voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
-    if limitation:
-        with open("weights/model_info.json", "r", encoding="utf-8") as f:
             models_info = json.load(f)
         for name, info in models_info.items():
             if not info['enable']:
                 continue
             title = info['title']
             author = info.get("author", None)
-            cover = f"weights/{name}/{info['cover']}"
-            index = f"weights/{name}/{info['feature_retrieval_library']}"
-            cpt = torch.load(f"weights/{name}/{name}.pth", map_location="cpu")
             tgt_sr = cpt["config"][-1]
             cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
             if_f0 = cpt.get("f0", 1)
@@ -118,90 +128,67 @@ if __name__ == '__main__':
             else:
                 net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
             del net_g.enc_q
-            print(net_g.load_state_dict(cpt["weight"], strict=False))  # 不加这一行清不干净, 真奇葩
-            net_g.eval().to(config.device)
-            if config.is_half:
-                net_g = net_g.half()
-            else:
-                net_g = net_g.float()
-            vc = VC(tgt_sr, config)
-            models.append((name, title, author, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
-    else:
-        folder_path = "weights"
-        for name in os.listdir(folder_path):
-            print("check folder: " + name)
-            if name.startswith("."): break
-            cover_path = glob.glob(f"{folder_path}/{name}/*.png") + glob.glob(f"{folder_path}/{name}/*.jpg")
-            index_path = glob.glob(f"{folder_path}/{name}/*.index")
-            checkpoint_path = glob.glob(f"{folder_path}/{name}/*.pth")
-            title = name
-            author = ""
-            if cover_path:
-                cover = cover_path[0]
-            else:
-                cover = ""
-            index = index_path[0]
-            cpt = torch.load(checkpoint_path[0], map_location="cpu")
-            tgt_sr = cpt["config"][-1]
-            cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
-            if_f0 = cpt.get("f0", 1)
-            if if_f0 == 1:
-                net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
-            else:
-                net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
-            del net_g.enc_q
-            print(net_g.load_state_dict(cpt["weight"], strict=False))  # 不加这一行清不干净, 真奇葩
             net_g.eval().to(config.device)
             if config.is_half:
                 net_g = net_g.half()
             else:
                 net_g = net_g.float()
             vc = VC(tgt_sr, config)
             models.append((name, title, author, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
     with gr.Blocks() as app:
         gr.Markdown(
             "# <center> RVC Models [(Latest Update)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/releases/tag/20230428updated)\n"
             "## <center> The input audio should be clean and pure voice without background music.\n"
-            "### <center> [Recommended to use google colab for more features](https://colab.research.google.com/drive/110kiMZTdP6Ri1lY9-NbQf17GVPPhHyeT?usp=sharing) \n"
-            "#### <center> Please regenerate your model to latest RVC to fully applied this new rvc.\n"
             "[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/110kiMZTdP6Ri1lY9-NbQf17GVPPhHyeT?usp=sharing)\n\n"
             "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
         )
-        with gr.Tabs():
-            for (name, title, author, cover, vc_fn) in models:
-                with gr.TabItem(name):
-                    with gr.Row():
-                        gr.Markdown(
-                            '<div align="center">'
-                            f'<div>{title}</div>\n'+
-                            (f'<div>Model author: {author}</div>' if author else "")+
-                            (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
-                            '</div>'
-                        )
-                    with gr.Row():
-                        with gr.Column():
-                            vc_input = gr.Audio(label="Input audio"+' (less than 20 seconds)' if limitation else '')
-                            vc_transpose = gr.Number(label="Transpose", value=0)
-                            vc_f0method = gr.Radio(
-                                label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
-                                choices=["pm", "harvest"],
-                                value="pm",
-                                interactive=True,
-                            )
-                            vc_index_ratio = gr.Slider(
-                                minimum=0,
-                                maximum=1,
-                                label="Retrieval feature ratio",
-                                value=0.6,
-                                interactive=True,
-                            )
-                            tts_mode = gr.Checkbox(label="tts (use edge-tts as input)", value=False)
-                            tts_text = gr.Textbox(visible=False,label="TTS text (100 words limitation)" if limitation else "TTS text")
-                            tts_voice = gr.Dropdown(label="Edge-tts speaker", choices=voices, visible=False, allow_custom_value=False, value="en-US-AnaNeural-Female")
-                            vc_submit = gr.Button("Generate", variant="primary")
-                        with gr.Column():
-                            vc_output1 = gr.Textbox(label="Output Message")
-                            vc_output2 = gr.Audio(label="Output Audio")
-                vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio, tts_mode, tts_text, tts_voice], [vc_output1, vc_output2])
-                tts_mode.change(change_to_tts_mode, [tts_mode], [vc_input, tts_text, tts_voice])
         app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)

 if __name__ == '__main__':
     load_hubert()
     models = []
+    categories = []
     tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
     voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
+    with open("weights/folder_info.json", "r", encoding="utf-8") as f:
+        folder_info = json.load(f)
+    for name, info in folder_info.items():
+        if not info['enable']:
+            continue
+        title = info['title']
+        folder = info['folder_path']
+        description = info['description']
+        categories.append([title, folder, description])
+    for (title, folder, description) in categories:
+        with open(f"weights/{folder}/model_info.json", "r", encoding="utf-8") as f:
             models_info = json.load(f)
         for name, info in models_info.items():
             if not info['enable']:
                 continue
             title = info['title']
             author = info.get("author", None)
+            cover = f"weights/{folder}/{name}/{info['cover']}"
+            index = f"weights/{folder}/{name}/{info['feature_retrieval_library']}"
+            cpt = torch.load(f"weights/{folder}/{name}/{name}.pth", map_location="cpu")
             tgt_sr = cpt["config"][-1]
             cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
             if_f0 = cpt.get("f0", 1)
             else:
                 net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
             del net_g.enc_q
+            print(net_g.load_state_dict(cpt["weight"], strict=False))
             net_g.eval().to(config.device)
             if config.is_half:
                 net_g = net_g.half()
             else:
                 net_g = net_g.float()
             vc = VC(tgt_sr, config)
+            print(f"Model loaded: {name}")
             models.append((name, title, author, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
     with gr.Blocks() as app:
         gr.Markdown(
             "# <center> RVC Models [(Latest Update)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/releases/tag/20230428updated)\n"
             "## <center> The input audio should be clean and pure voice without background music.\n"
+            "### <center> This project was inspired by [zomehwh](https://huggingface.co/spaces/zomehwh/rvc-models) and [ardha27](https://huggingface.co/spaces/ardha27/rvc-models)\n"
             "[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/110kiMZTdP6Ri1lY9-NbQf17GVPPhHyeT?usp=sharing)\n\n"
             "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
         )
+        for (folder_title, folder, description) in categories:
+            with gr.TabItem(folder_title):
+                if description:
+                    gr.Markdown(f"<center>{description}")
+                with gr.Tabs():
+                    if not models:
+                        gr.Markdown("# <center> No Model Loaded.")
+                        gr.Markdown("## <center> Please added the model or fix your model path.")
+                        continue
+                    with gr.Tabs():
+                        for (name, title, author, cover, vc_fn) in models:
+                            with gr.TabItem(name):
+                                with gr.Row():
+                                    gr.Markdown(
+                                        '<div align="center">'
+                                        f'<div>{title}</div>\n'+
+                                        (f'<div>Model author: {author}</div>' if author else "")+
+                                        (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
+                                        '</div>'
+                                    )
+                                with gr.Row():
+                                    with gr.Column():
+                                        vc_input = gr.Audio(label="Input audio"+' (less than 20 seconds)' if limitation else '')
+                                        vc_transpose = gr.Number(label="Transpose", value=0)
+                                        vc_f0method = gr.Radio(
+                                            label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
+                                            choices=["pm", "harvest"],
+                                            value="pm",
+                                            interactive=True,
+                                        )
+                                        vc_index_ratio = gr.Slider(
+                                            minimum=0,
+                                            maximum=1,
+                                            label="Retrieval feature ratio",
+                                            value=0.6,
+                                            interactive=True,
+                                        )
+                                        tts_mode = gr.Checkbox(label="tts (use edge-tts as input)", value=False)
+                                        tts_text = gr.Textbox(visible=False,label="TTS text (100 words limitation)" if limitation else "TTS text")
+                                        tts_voice = gr.Dropdown(label="Edge-tts speaker", choices=voices, visible=False, allow_custom_value=False, value="en-US-AnaNeural-Female")
+                                        vc_submit = gr.Button("Generate", variant="primary")
+                                    with gr.Column():
+                                        vc_output1 = gr.Textbox(label="Output Message")
+                                        vc_output2 = gr.Audio(label="Output Audio")
+                            vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio, tts_mode, tts_text, tts_voice], [vc_output1, vc_output2])
+                            tts_mode.change(change_to_tts_mode, [tts_mode], [vc_input, tts_text, tts_voice])
         app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)

weights/folder_info.json CHANGED Viewed

@@ -3,7 +3,6 @@
         "enable": true,
         "title": "Genshin Impact",
         "folder_path": "genshin-impact",
-        "cover": "cover.png",
-        "markdown": ""
     }
 }

         "enable": true,
         "title": "Genshin Impact",
         "folder_path": "genshin-impact",
+        "description": ""
     }
 }

weights/genshin-impact/cover.jpg DELETED Viewed

Git LFS Details

SHA256: 807ab8a2096ef2e92ad85e9675ac3d1123b0e14955f8cd5cedee08f772b7317f
Pointer size: 130 Bytes
Size of remote file: 45.8 kB