Ilaria
commited on
Commit
•
7cfb6ba
1
Parent(s):
2afc955
New Version - 3.0
Browse files- New support for Ilaria TTS (best tts at the moment for HF, resources speaking)
- Fixed support for ElevenLabs and Google TTS
- Faster inference
- Better looking UI
- Various bug fixes
- Removed Herobrine
- app.py +58 -33
- ilariatts.py +230 -0
- requirements.txt +2 -1
- vc_infer_pipeline.py +9 -8
app.py
CHANGED
@@ -21,6 +21,11 @@ warnings.filterwarnings("ignore")
|
|
21 |
torch.manual_seed(114514)
|
22 |
from i18n import I18nAuto
|
23 |
|
|
|
|
|
|
|
|
|
|
|
24 |
import signal
|
25 |
|
26 |
import math
|
@@ -1445,6 +1450,12 @@ def elevenTTS(xiapi, text, id, lang):
|
|
1445 |
aud_path = save_to_wav('./temp_gTTS.mp3')
|
1446 |
return aud_path, aud_path
|
1447 |
|
|
|
|
|
|
|
|
|
|
|
|
|
1448 |
def upload_to_dataset(files, dir):
|
1449 |
if dir == '':
|
1450 |
dir = './dataset'
|
@@ -1470,7 +1481,7 @@ def zip_downloader(model):
|
|
1470 |
else:
|
1471 |
return f'./weights/{model}.pth', "Could not find Index file."
|
1472 |
|
1473 |
-
with gr.Blocks(theme=gr.themes.
|
1474 |
with gr.Tabs():
|
1475 |
with gr.TabItem("Inference"):
|
1476 |
gr.HTML("<h1> Ilaria RVC 💖 </h1>")
|
@@ -1525,11 +1536,11 @@ with gr.Blocks(theme=gr.themes.Base (), title='Mangio-RVC-Web 💻') as app:
|
|
1525 |
dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0])
|
1526 |
refresh_button2 = gr.Button("Refresh", variant="primary", size='sm')
|
1527 |
record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0])
|
1528 |
-
record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0])
|
1529 |
with gr.Row():
|
1530 |
-
with gr.Accordion('
|
1531 |
with gr.Column():
|
1532 |
-
lang = gr.Radio(label='Chinese & Japanese do not work with ElevenLabs currently.',choices=['en','es','fr','pt','zh-CN','de','hi','ja'], value='en')
|
1533 |
api_box = gr.Textbox(label="Enter your API Key for ElevenLabs, or leave empty to use GoogleTTS", value='')
|
1534 |
elevenid=gr.Dropdown(label="Voice:", choices=eleven_voices)
|
1535 |
with gr.Column():
|
@@ -1537,7 +1548,7 @@ with gr.Blocks(theme=gr.themes.Base (), title='Mangio-RVC-Web 💻') as app:
|
|
1537 |
tts_button = gr.Button(value="Speak")
|
1538 |
tts_button.click(fn=elevenTTS, inputs=[api_box,tfs, elevenid, lang], outputs=[record_button, input_audio0])
|
1539 |
with gr.Row():
|
1540 |
-
with gr.Accordion('Wav2Lip', open=False):
|
1541 |
with gr.Row():
|
1542 |
size = gr.Radio(label='Resolution:',choices=['Half','Full'])
|
1543 |
face = gr.UploadButton("Upload A Character",type='file')
|
@@ -1550,37 +1561,50 @@ with gr.Blocks(theme=gr.themes.Base (), title='Mangio-RVC-Web 💻') as app:
|
|
1550 |
refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation])
|
1551 |
with gr.Row():
|
1552 |
animate_button = gr.Button('Animate')
|
1553 |
-
|
1554 |
with gr.Column():
|
1555 |
-
with gr.Accordion("Index Settings", open=False):
|
1556 |
-
file_index1 = gr.Dropdown(
|
1557 |
-
label="3. Choose the index file (in case it wasn't automatically found.)",
|
1558 |
-
choices=get_indexes(),
|
1559 |
-
value=get_index(),
|
1560 |
-
interactive=True,
|
1561 |
-
)
|
1562 |
-
sid0.change(fn=match_index, inputs=[sid0],outputs=[file_index1])
|
1563 |
-
refresh_button.click(
|
1564 |
-
fn=change_choices, inputs=[], outputs=[sid0, file_index1]
|
1565 |
-
)
|
1566 |
-
# file_big_npy1 = gr.Textbox(
|
1567 |
-
# label=i18n("特征文件路径"),
|
1568 |
-
# value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
|
1569 |
-
# interactive=True,
|
1570 |
-
# )
|
1571 |
-
index_rate1 = gr.Slider(
|
1572 |
-
minimum=0,
|
1573 |
-
maximum=1,
|
1574 |
-
label=i18n("检索特征占比"),
|
1575 |
-
value=0.66,
|
1576 |
-
interactive=True,
|
1577 |
-
)
|
1578 |
vc_output2 = gr.Audio(
|
1579 |
label="Final Result! (Click on the three dots to download the audio)",
|
1580 |
type='filepath',
|
1581 |
interactive=False,
|
1582 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1583 |
animate_button.click(fn=mouth, inputs=[size, face, vc_output2, faces], outputs=[animation, preview])
|
|
|
1584 |
with gr.Accordion("Advanced Options", open=False):
|
1585 |
f0method0 = gr.Radio(
|
1586 |
label="Optional: Change the Pitch Extraction Algorithm. Extraction methods are sorted from 'worst quality' to 'best quality'. If you don't know what you're doing, leave rmvpe.",
|
@@ -1679,6 +1703,7 @@ with gr.Blocks(theme=gr.themes.Base (), title='Mangio-RVC-Web 💻') as app:
|
|
1679 |
formanting.change(fn=formant_enabled,inputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button],outputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button])
|
1680 |
frmntbut.click(fn=formant_apply,inputs=[qfrency, tmbre], outputs=[qfrency, tmbre])
|
1681 |
formant_refresh_button.click(fn=update_fshift_presets,inputs=[formant_preset, qfrency, tmbre],outputs=[formant_preset, qfrency, tmbre])
|
|
|
1682 |
with gr.Row():
|
1683 |
vc_output1 = gr.Textbox("")
|
1684 |
f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"), visible=False)
|
@@ -1704,7 +1729,7 @@ with gr.Blocks(theme=gr.themes.Base (), title='Mangio-RVC-Web 💻') as app:
|
|
1704 |
[vc_output1, vc_output2],
|
1705 |
)
|
1706 |
|
1707 |
-
with gr.Accordion("Batch Conversion",open=False):
|
1708 |
with gr.Row():
|
1709 |
with gr.Column():
|
1710 |
vc_transform1 = gr.Number(
|
@@ -1828,7 +1853,7 @@ with gr.Blocks(theme=gr.themes.Base (), title='Mangio-RVC-Web 💻') as app:
|
|
1828 |
model = gr.Textbox(label="Name of the model (without spaces):")
|
1829 |
download_button=gr.Button("Download")
|
1830 |
with gr.Row():
|
1831 |
-
status_bar=gr.Textbox(label="")
|
1832 |
download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar])
|
1833 |
with gr.Row():
|
1834 |
gr.Markdown(
|
@@ -2080,9 +2105,9 @@ with gr.Blocks(theme=gr.themes.Base (), title='Mangio-RVC-Web 💻') as app:
|
|
2080 |
else:
|
2081 |
print(
|
2082 |
"Pretrained weights not downloaded. Disabling training tab.\n"
|
2083 |
-
"Wondering how to train a voice?
|
2084 |
"-------------------------------\n"
|
2085 |
)
|
2086 |
|
2087 |
-
app.queue(concurrency_count=511, max_size=1022).launch(share=False, quiet=
|
2088 |
#endregion
|
|
|
21 |
torch.manual_seed(114514)
|
22 |
from i18n import I18nAuto
|
23 |
|
24 |
+
import edge_tts, asyncio
|
25 |
+
from ilariatts import tts_order_voice
|
26 |
+
language_dict = tts_order_voice
|
27 |
+
ilariavoices = language_dict.keys()
|
28 |
+
|
29 |
import signal
|
30 |
|
31 |
import math
|
|
|
1450 |
aud_path = save_to_wav('./temp_gTTS.mp3')
|
1451 |
return aud_path, aud_path
|
1452 |
|
1453 |
+
def ilariaTTS(text, ttsvoice):
|
1454 |
+
vo=language_dict[ttsvoice]
|
1455 |
+
asyncio.run(edge_tts.Communicate(text, vo).save("./temp_ilaria.mp3"))
|
1456 |
+
aud_path = save_to_wav('./temp_ilaria.mp3')
|
1457 |
+
return aud_path, aud_path
|
1458 |
+
|
1459 |
def upload_to_dataset(files, dir):
|
1460 |
if dir == '':
|
1461 |
dir = './dataset'
|
|
|
1481 |
else:
|
1482 |
return f'./weights/{model}.pth', "Could not find Index file."
|
1483 |
|
1484 |
+
with gr.Blocks(theme=gr.themes.Default(primary_hue="pink", secondary_hue="rose"), title="Ilaria RVC 💖") as app:
|
1485 |
with gr.Tabs():
|
1486 |
with gr.TabItem("Inference"):
|
1487 |
gr.HTML("<h1> Ilaria RVC 💖 </h1>")
|
|
|
1536 |
dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0])
|
1537 |
refresh_button2 = gr.Button("Refresh", variant="primary", size='sm')
|
1538 |
record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0])
|
1539 |
+
record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0])
|
1540 |
with gr.Row():
|
1541 |
+
with gr.Accordion('ElevenLabs / Google TTS', open=False):
|
1542 |
with gr.Column():
|
1543 |
+
lang = gr.Radio(label='Chinese & Japanese do not work with ElevenLabs currently.',choices=['en','it','es','fr','pt','zh-CN','de','hi','ja'], value='en')
|
1544 |
api_box = gr.Textbox(label="Enter your API Key for ElevenLabs, or leave empty to use GoogleTTS", value='')
|
1545 |
elevenid=gr.Dropdown(label="Voice:", choices=eleven_voices)
|
1546 |
with gr.Column():
|
|
|
1548 |
tts_button = gr.Button(value="Speak")
|
1549 |
tts_button.click(fn=elevenTTS, inputs=[api_box,tfs, elevenid, lang], outputs=[record_button, input_audio0])
|
1550 |
with gr.Row():
|
1551 |
+
with gr.Accordion('Wav2Lip', open=False, visible=False):
|
1552 |
with gr.Row():
|
1553 |
size = gr.Radio(label='Resolution:',choices=['Half','Full'])
|
1554 |
face = gr.UploadButton("Upload A Character",type='file')
|
|
|
1561 |
refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation])
|
1562 |
with gr.Row():
|
1563 |
animate_button = gr.Button('Animate')
|
1564 |
+
|
1565 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1566 |
vc_output2 = gr.Audio(
|
1567 |
label="Final Result! (Click on the three dots to download the audio)",
|
1568 |
type='filepath',
|
1569 |
interactive=False,
|
1570 |
)
|
1571 |
+
|
1572 |
+
with gr.Accordion('IlariaTTS', open=True):
|
1573 |
+
with gr.Column():
|
1574 |
+
ilariaid=gr.Dropdown(label="Voice:", choices=ilariavoices, value="English-Jenny (Female)")
|
1575 |
+
ilariatext = gr.Textbox(label="Input your Text", interactive=True, value="This is a test.")
|
1576 |
+
ilariatts_button = gr.Button(value="Speak")
|
1577 |
+
ilariatts_button.click(fn=ilariaTTS, inputs=[ilariatext, ilariaid], outputs=[record_button, input_audio0])
|
1578 |
+
|
1579 |
+
#with gr.Column():
|
1580 |
+
with gr.Accordion("Index Settings", open=False):
|
1581 |
+
#with gr.Row():
|
1582 |
+
|
1583 |
+
file_index1 = gr.Dropdown(
|
1584 |
+
label="3. Choose the index file (in case it wasn't automatically found.)",
|
1585 |
+
choices=get_indexes(),
|
1586 |
+
value=get_index(),
|
1587 |
+
interactive=True,
|
1588 |
+
)
|
1589 |
+
sid0.change(fn=match_index, inputs=[sid0],outputs=[file_index1])
|
1590 |
+
refresh_button.click(
|
1591 |
+
fn=change_choices, inputs=[], outputs=[sid0, file_index1]
|
1592 |
+
)
|
1593 |
+
# file_big_npy1 = gr.Textbox(
|
1594 |
+
# label=i18n("特征文件路径"),
|
1595 |
+
# value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
|
1596 |
+
# interactive=True,
|
1597 |
+
# )
|
1598 |
+
index_rate1 = gr.Slider(
|
1599 |
+
minimum=0,
|
1600 |
+
maximum=1,
|
1601 |
+
label=i18n("检索特征占比"),
|
1602 |
+
value=0.66,
|
1603 |
+
interactive=True,
|
1604 |
+
)
|
1605 |
+
|
1606 |
animate_button.click(fn=mouth, inputs=[size, face, vc_output2, faces], outputs=[animation, preview])
|
1607 |
+
|
1608 |
with gr.Accordion("Advanced Options", open=False):
|
1609 |
f0method0 = gr.Radio(
|
1610 |
label="Optional: Change the Pitch Extraction Algorithm. Extraction methods are sorted from 'worst quality' to 'best quality'. If you don't know what you're doing, leave rmvpe.",
|
|
|
1703 |
formanting.change(fn=formant_enabled,inputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button],outputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button])
|
1704 |
frmntbut.click(fn=formant_apply,inputs=[qfrency, tmbre], outputs=[qfrency, tmbre])
|
1705 |
formant_refresh_button.click(fn=update_fshift_presets,inputs=[formant_preset, qfrency, tmbre],outputs=[formant_preset, qfrency, tmbre])
|
1706 |
+
|
1707 |
with gr.Row():
|
1708 |
vc_output1 = gr.Textbox("")
|
1709 |
f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"), visible=False)
|
|
|
1729 |
[vc_output1, vc_output2],
|
1730 |
)
|
1731 |
|
1732 |
+
with gr.Accordion("Batch Conversion",open=False, visible=False):
|
1733 |
with gr.Row():
|
1734 |
with gr.Column():
|
1735 |
vc_transform1 = gr.Number(
|
|
|
1853 |
model = gr.Textbox(label="Name of the model (without spaces):")
|
1854 |
download_button=gr.Button("Download")
|
1855 |
with gr.Row():
|
1856 |
+
status_bar=gr.Textbox(label="Download Status")
|
1857 |
download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar])
|
1858 |
with gr.Row():
|
1859 |
gr.Markdown(
|
|
|
2105 |
else:
|
2106 |
print(
|
2107 |
"Pretrained weights not downloaded. Disabling training tab.\n"
|
2108 |
+
"Wondering how to train a voice? Join AI HUB Discord Server! https://discord.gg/aihub\n"
|
2109 |
"-------------------------------\n"
|
2110 |
)
|
2111 |
|
2112 |
+
app.queue(concurrency_count=511, max_size=1022).launch(share=False, quiet=False)
|
2113 |
#endregion
|
ilariatts.py
ADDED
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
tts_order_voice = {'English-Jenny (Female)': 'en-US-JennyNeural',
|
2 |
+
'English-Guy (Male)': 'en-US-GuyNeural',
|
3 |
+
'English-Ana (Female)': 'en-US-AnaNeural',
|
4 |
+
'English-Aria (Female)': 'en-US-AriaNeural',
|
5 |
+
'English-Christopher (Male)': 'en-US-ChristopherNeural',
|
6 |
+
'English-Eric (Male)': 'en-US-EricNeural',
|
7 |
+
'English-Michelle (Female)': 'en-US-MichelleNeural',
|
8 |
+
'English-Roger (Male)': 'en-US-RogerNeural',
|
9 |
+
'Spanish (Mexican)-Dalia (Female)': 'es-MX-DaliaNeural',
|
10 |
+
'Spanish (Mexican)-Jorge- (Male)': 'es-MX-JorgeNeural',
|
11 |
+
'Korean-Sun-Hi- (Female)': 'ko-KR-SunHiNeural',
|
12 |
+
'Korean-InJoon- (Male)': 'ko-KR-InJoonNeural',
|
13 |
+
'Thai-Premwadee- (Female)': 'th-TH-PremwadeeNeural',
|
14 |
+
'Thai-Niwat- (Male)': 'th-TH-NiwatNeural',
|
15 |
+
'Vietnamese-HoaiMy- (Female)': 'vi-VN-HoaiMyNeural',
|
16 |
+
'Vietnamese-NamMinh- (Male)': 'vi-VN-NamMinhNeural',
|
17 |
+
'Japanese-Nanami- (Female)': 'ja-JP-NanamiNeural',
|
18 |
+
'Japanese-Keita- (Male)': 'ja-JP-KeitaNeural',
|
19 |
+
'French-Denise- (Female)': 'fr-FR-DeniseNeural',
|
20 |
+
'French-Eloise- (Female)': 'fr-FR-EloiseNeural',
|
21 |
+
'French-Henri- (Male)': 'fr-FR-HenriNeural',
|
22 |
+
'Brazilian-Francisca- (Female)': 'pt-BR-FranciscaNeural',
|
23 |
+
'Brazilian-Antonio- (Male)': 'pt-BR-AntonioNeural',
|
24 |
+
'Indonesian-Ardi- (Male)': 'id-ID-ArdiNeural',
|
25 |
+
'Indonesian-Gadis- (Female)': 'id-ID-GadisNeural',
|
26 |
+
'Hebrew-Avri- (Male)': 'he-IL-AvriNeural',
|
27 |
+
'Hebrew-Hila- (Female)': 'he-IL-HilaNeural',
|
28 |
+
'Italian-Isabella- (Female)': 'it-IT-IsabellaNeural',
|
29 |
+
'Italian-Diego- (Male)': 'it-IT-DiegoNeural',
|
30 |
+
'Italian-Elsa- (Female)': 'it-IT-ElsaNeural',
|
31 |
+
'Dutch-Colette- (Female)': 'nl-NL-ColetteNeural',
|
32 |
+
'Dutch-Fenna- (Female)': 'nl-NL-FennaNeural',
|
33 |
+
'Dutch-Maarten- (Male)': 'nl-NL-MaartenNeural',
|
34 |
+
'Malese-Osman- (Male)': 'ms-MY-OsmanNeural',
|
35 |
+
'Malese-Yasmin- (Female)': 'ms-MY-YasminNeural',
|
36 |
+
'Norwegian-Pernille- (Female)': 'nb-NO-PernilleNeural',
|
37 |
+
'Norwegian-Finn- (Male)': 'nb-NO-FinnNeural',
|
38 |
+
'Swedish-Sofie- (Female)': 'sv-SE-SofieNeural',
|
39 |
+
'ArabicSwedish-Mattias- (Male)': 'sv-SE-MattiasNeural',
|
40 |
+
'Arabic-Hamed- (Male)': 'ar-SA-HamedNeural',
|
41 |
+
'Arabic-Zariyah- (Female)': 'ar-SA-ZariyahNeural',
|
42 |
+
'Greek-Athina- (Female)': 'el-GR-AthinaNeural',
|
43 |
+
'Greek-Nestoras- (Male)': 'el-GR-NestorasNeural',
|
44 |
+
'German-Katja- (Female)': 'de-DE-KatjaNeural',
|
45 |
+
'German-Amala- (Female)': 'de-DE-AmalaNeural',
|
46 |
+
'German-Conrad- (Male)': 'de-DE-ConradNeural',
|
47 |
+
'German-Killian- (Male)': 'de-DE-KillianNeural',
|
48 |
+
'Afrikaans-Adri- (Female)': 'af-ZA-AdriNeural',
|
49 |
+
'Afrikaans-Willem- (Male)': 'af-ZA-WillemNeural',
|
50 |
+
'Ethiopian-Ameha- (Male)': 'am-ET-AmehaNeural',
|
51 |
+
'Ethiopian-Mekdes- (Female)': 'am-ET-MekdesNeural',
|
52 |
+
'Arabic (UAD)-Fatima- (Female)': 'ar-AE-FatimaNeural',
|
53 |
+
'Arabic (UAD)-Hamdan- (Male)': 'ar-AE-HamdanNeural',
|
54 |
+
'Arabic (Bahrain)-Ali- (Male)': 'ar-BH-AliNeural',
|
55 |
+
'Arabic (Bahrain)-Laila- (Female)': 'ar-BH-LailaNeural',
|
56 |
+
'Arabic (Algeria)-Ismael- (Male)': 'ar-DZ-IsmaelNeural',
|
57 |
+
'Arabic (Egypt)-Salma- (Female)': 'ar-EG-SalmaNeural',
|
58 |
+
'Arabic (Egypt)-Shakir- (Male)': 'ar-EG-ShakirNeural',
|
59 |
+
'Arabic (Iraq)-Bassel- (Male)': 'ar-IQ-BasselNeural',
|
60 |
+
'Arabic (Iraq)-Rana- (Female)': 'ar-IQ-RanaNeural',
|
61 |
+
'Arabic (Jordan)-Sana- (Female)': 'ar-JO-SanaNeural',
|
62 |
+
'Arabic (Jordan)-Taim- (Male)': 'ar-JO-TaimNeural',
|
63 |
+
'Arabic (Kuwait)-Fahed- (Male)': 'ar-KW-FahedNeural',
|
64 |
+
'Arabic (Kuwait)-Noura- (Female)': 'ar-KW-NouraNeural',
|
65 |
+
'Arabic (Lebanon)-Layla- (Female)': 'ar-LB-LaylaNeural',
|
66 |
+
'Arabic (Lebanon)-Rami- (Male)': 'ar-LB-RamiNeural',
|
67 |
+
'Arabic (Libya)-Iman- (Female)': 'ar-LY-ImanNeural',
|
68 |
+
'Arabic (Libya)-Omar- (Male)': 'ar-LY-OmarNeural',
|
69 |
+
'Arabic (Morocco)-Jamal- (Male)': 'ar-MA-JamalNeural',
|
70 |
+
'Arabic (Morocco)-Mouna- (Female)': 'ar-MA-MounaNeural',
|
71 |
+
'Arabic (Oman)-Abdullah- (Male)': 'ar-OM-AbdullahNeural',
|
72 |
+
'Arabic (Oman)-Aysha- (Female)': 'ar-OM-AyshaNeural',
|
73 |
+
'Arabic (Qatar)-Amal- (Female)': 'ar-QA-AmalNeural',
|
74 |
+
'Arabic (Qatar)-Moaz- (Male)': 'ar-QA-MoazNeural',
|
75 |
+
'Arabic (Syrian Arab Republic)-Amany- (Female)': 'ar-SY-AmanyNeural',
|
76 |
+
'Arabic (Syrian Arab Republic)-Laith- (Male)': 'ar-SY-LaithNeural',
|
77 |
+
'Arabic (Tunisia)-Hedi- (Male)': 'ar-TN-HediNeural',
|
78 |
+
'Arabic (Tunisia)-Reem- (Female)': 'ar-TN-ReemNeural',
|
79 |
+
'Arabic (Yemen )-Maryam- (Female)': 'ar-YE-MaryamNeural',
|
80 |
+
'Arabic (Yemen )-Saleh- (Male)': 'ar-YE-SalehNeural',
|
81 |
+
'Azerbaijani-Babek- (Male)': 'az-AZ-BabekNeural',
|
82 |
+
'Azerbaijani-Banu- (Female)': 'az-AZ-BanuNeural',
|
83 |
+
'Bulgarian-Borislav- (Male)': 'bg-BG-BorislavNeural',
|
84 |
+
'Bulgarian-Kalina- (Female)': 'bg-BG-KalinaNeural',
|
85 |
+
'Bengali (Bangladesh)-Nabanita- (Female)': 'bn-BD-NabanitaNeural',
|
86 |
+
'Bengali (Bangladesh)-Pradeep- (Male)': 'bn-BD-PradeepNeural',
|
87 |
+
'Bengali (India)-Bashkar- (Male)': 'bn-IN-BashkarNeural',
|
88 |
+
'Bengali (India)-Tanishaa- (Female)': 'bn-IN-TanishaaNeural',
|
89 |
+
'Bosniak (Bosnia and Herzegovina)-Goran- (Male)': 'bs-BA-GoranNeural',
|
90 |
+
'Bosniak (Bosnia and Herzegovina)-Vesna- (Female)': 'bs-BA-VesnaNeural',
|
91 |
+
'Catalan (Spain)-Joana- (Female)': 'ca-ES-JoanaNeural',
|
92 |
+
'Catalan (Spain)-Enric- (Male)': 'ca-ES-EnricNeural',
|
93 |
+
'Czech (Czech Republic)-Antonin- (Male)': 'cs-CZ-AntoninNeural',
|
94 |
+
'Czech (Czech Republic)-Vlasta- (Female)': 'cs-CZ-VlastaNeural',
|
95 |
+
'Welsh (UK)-Aled- (Male)': 'cy-GB-AledNeural',
|
96 |
+
'Welsh (UK)-Nia- (Female)': 'cy-GB-NiaNeural',
|
97 |
+
'Danish (Denmark)-Christel- (Female)': 'da-DK-ChristelNeural',
|
98 |
+
'Danish (Denmark)-Jeppe- (Male)': 'da-DK-JeppeNeural',
|
99 |
+
'German (Austria)-Ingrid- (Female)': 'de-AT-IngridNeural',
|
100 |
+
'German (Austria)-Jonas- (Male)': 'de-AT-JonasNeural',
|
101 |
+
'German (Switzerland)-Jan- (Male)': 'de-CH-JanNeural',
|
102 |
+
'German (Switzerland)-Leni- (Female)': 'de-CH-LeniNeural',
|
103 |
+
'English (Australia)-Natasha- (Female)': 'en-AU-NatashaNeural',
|
104 |
+
'English (Australia)-William- (Male)': 'en-AU-WilliamNeural',
|
105 |
+
'English (Canada)-Clara- (Female)': 'en-CA-ClaraNeural',
|
106 |
+
'English (Canada)-Liam- (Male)': 'en-CA-LiamNeural',
|
107 |
+
'English (UK)-Libby- (Female)': 'en-GB-LibbyNeural',
|
108 |
+
'English (UK)-Maisie- (Female)': 'en-GB-MaisieNeural',
|
109 |
+
'English (UK)-Ryan- (Male)': 'en-GB-RyanNeural',
|
110 |
+
'English (UK)-Sonia- (Female)': 'en-GB-SoniaNeural',
|
111 |
+
'English (UK)-Thomas- (Male)': 'en-GB-ThomasNeural',
|
112 |
+
'English (Hong Kong)-Sam- (Male)': 'en-HK-SamNeural',
|
113 |
+
'English (Hong Kong)-Yan- (Female)': 'en-HK-YanNeural',
|
114 |
+
'English (Ireland)-Connor- (Male)': 'en-IE-ConnorNeural',
|
115 |
+
'English (Ireland)-Emily- (Female)': 'en-IE-EmilyNeural',
|
116 |
+
'English (India)-Neerja- (Female)': 'en-IN-NeerjaNeural',
|
117 |
+
'English (India)-Prabhat- (Male)': 'en-IN-PrabhatNeural',
|
118 |
+
'English (Kenya)-Asilia- (Female)': 'en-KE-AsiliaNeural',
|
119 |
+
'English (Kenya)-Chilemba- (Male)': 'en-KE-ChilembaNeural',
|
120 |
+
'English (Nigeria)-Abeo- (Male)': 'en-NG-AbeoNeural',
|
121 |
+
'English (Nigeria)-Ezinne- (Female)': 'en-NG-EzinneNeural',
|
122 |
+
'English (New Zealand)-Mitchell- (Male)': 'en-NZ-MitchellNeural',
|
123 |
+
'English (Philippines)-James- (Male)': 'en-PH-JamesNeural',
|
124 |
+
'English (Philippines)-Rosa- (Female)': 'en-PH-RosaNeural',
|
125 |
+
'English (Singapore)-Luna- (Female)': 'en-SG-LunaNeural',
|
126 |
+
'English (Singapore)-Wayne- (Male)': 'en-SG-WayneNeural',
|
127 |
+
'English (Tanzania)-Elimu- (Male)': 'en-TZ-ElimuNeural',
|
128 |
+
'English (Tanzania)-Imani- (Female)': 'en-TZ-ImaniNeural',
|
129 |
+
'English (South Africa)-Leah- (Female)': 'en-ZA-LeahNeural',
|
130 |
+
'English (South Africa)-Luke- (Male)': 'en-ZA-LukeNeural',
|
131 |
+
'Spanish (Argentina)-Elena- (Female)': 'es-AR-ElenaNeural',
|
132 |
+
'Spanish (Argentina)-Tomas- (Male)': 'es-AR-TomasNeural',
|
133 |
+
'Spanish (Bolivia)-Marcelo- (Male)': 'es-BO-MarceloNeural',
|
134 |
+
'Spanish (Bolivia)-Sofia- (Female)': 'es-BO-SofiaNeural',
|
135 |
+
'Spanish (Colombia)-Gonzalo- (Male)': 'es-CO-GonzaloNeural',
|
136 |
+
'Spanish (Colombia)-Salome- (Female)': 'es-CO-SalomeNeural',
|
137 |
+
'Spanish (Costa Rica)-Juan- (Male)': 'es-CR-JuanNeural',
|
138 |
+
'Spanish (Costa Rica)-Maria- (Female)': 'es-CR-MariaNeural',
|
139 |
+
'Spanish (Cuba)-Belkys- (Female)': 'es-CU-BelkysNeural',
|
140 |
+
'Spanish (Dominican Republic)-Emilio- (Male)': 'es-DO-EmilioNeural',
|
141 |
+
'Spanish (Dominican Republic)-Ramona- (Female)': 'es-DO-RamonaNeural',
|
142 |
+
'Spanish (Ecuador)-Andrea- (Female)': 'es-EC-AndreaNeural',
|
143 |
+
'Spanish (Ecuador)-Luis- (Male)': 'es-EC-LuisNeural',
|
144 |
+
'Spanish (Spain)-Alvaro- (Male)': 'es-ES-AlvaroNeural',
|
145 |
+
'Spanish (Spain)-Elvira- (Female)': 'es-ES-ElviraNeural',
|
146 |
+
'Spanish (Equatorial Guinea)-Teresa- (Female)': 'es-GQ-TeresaNeural',
|
147 |
+
'Spanish (Guatemala)-Andres- (Male)': 'es-GT-AndresNeural',
|
148 |
+
'Spanish (Guatemala)-Marta- (Female)': 'es-GT-MartaNeural',
|
149 |
+
'Spanish (Honduras)-Carlos- (Male)': 'es-HN-CarlosNeural',
|
150 |
+
'Spanish (Honduras)-Karla- (Female)': 'es-HN-KarlaNeural',
|
151 |
+
'Spanish (Nicaragua)-Federico- (Male)': 'es-NI-FedericoNeural',
|
152 |
+
'Spanish (Nicaragua)-Yolanda- (Female)': 'es-NI-YolandaNeural',
|
153 |
+
'Spanish (Panama)-Margarita- (Female)': 'es-PA-MargaritaNeural',
|
154 |
+
'Spanish (Panama)-Roberto- (Male)': 'es-PA-RobertoNeural',
|
155 |
+
'Spanish (Peru)-Alex- (Male)': 'es-PE-AlexNeural',
|
156 |
+
'Spanish (Peru)-Camila- (Female)': 'es-PE-CamilaNeural',
|
157 |
+
'Spanish (Puerto Rico)-Karina- (Female)': 'es-PR-KarinaNeural',
|
158 |
+
'Spanish (Puerto Rico)-Victor- (Male)': 'es-PR-VictorNeural',
|
159 |
+
'Spanish (Paraguay)-Mario- (Male)': 'es-PY-MarioNeural',
|
160 |
+
'Spanish (Paraguay)-Tania- (Female)': 'es-PY-TaniaNeural',
|
161 |
+
'Spanish (El Salvador)-Lorena- (Female)': 'es-SV-LorenaNeural',
|
162 |
+
'Spanish (El Salvador)-Rodrigo- (Male)': 'es-SV-RodrigoNeural',
|
163 |
+
'Spanish (United States)-Alonso- (Male)': 'es-US-AlonsoNeural',
|
164 |
+
'Spanish (United States)-Paloma- (Female)': 'es-US-PalomaNeural',
|
165 |
+
'Spanish (Uruguay)-Mateo- (Male)': 'es-UY-MateoNeural',
|
166 |
+
'Spanish (Uruguay)-Valentina- (Female)': 'es-UY-ValentinaNeural',
|
167 |
+
'Spanish (Venezuela)-Paola- (Female)': 'es-VE-PaolaNeural',
|
168 |
+
'Spanish (Venezuela)-Sebastian- (Male)': 'es-VE-SebastianNeural',
|
169 |
+
'Estonian (Estonia)-Anu- (Female)': 'et-EE-AnuNeural',
|
170 |
+
'Estonian (Estonia)-Kert- (Male)': 'et-EE-KertNeural',
|
171 |
+
'Persian (Iran)-Dilara- (Female)': 'fa-IR-DilaraNeural',
|
172 |
+
'Persian (Iran)-Farid- (Male)': 'fa-IR-FaridNeural',
|
173 |
+
'Finnish (Finland)-Harri- (Male)': 'fi-FI-HarriNeural',
|
174 |
+
'Finnish (Finland)-Noora- (Female)': 'fi-FI-NooraNeural',
|
175 |
+
'French (Belgium)-Charline- (Female)': 'fr-BE-CharlineNeural',
|
176 |
+
'French (Belgium)-Gerard- (Male)': 'fr-BE-GerardNeural',
|
177 |
+
'French (Canada)-Sylvie- (Female)': 'fr-CA-SylvieNeural',
|
178 |
+
'French (Canada)-Antoine- (Male)': 'fr-CA-AntoineNeural',
|
179 |
+
'French (Canada)-Jean- (Male)': 'fr-CA-JeanNeural',
|
180 |
+
'French (Switzerland)-Ariane- (Female)': 'fr-CH-ArianeNeural',
|
181 |
+
'French (Switzerland)-Fabrice- (Male)': 'fr-CH-FabriceNeural',
|
182 |
+
'Irish (Ireland)-Colm- (Male)': 'ga-IE-ColmNeural',
|
183 |
+
'Irish (Ireland)-Orla- (Female)': 'ga-IE-OrlaNeural',
|
184 |
+
'Galician (Spain)-Roi- (Male)': 'gl-ES-RoiNeural',
|
185 |
+
'Galician (Spain)-Sabela- (Female)': 'gl-ES-SabelaNeural',
|
186 |
+
'Gujarati (India)-Dhwani- (Female)': 'gu-IN-DhwaniNeural',
|
187 |
+
'Gujarati (India)-Niranjan- (Male)': 'gu-IN-NiranjanNeural',
|
188 |
+
'Hindi (India)-Madhur- (Male)': 'hi-IN-MadhurNeural',
|
189 |
+
'Hindi (India)-Swara- (Female)': 'hi-IN-SwaraNeural',
|
190 |
+
'Croatian (Croatia)-Gabrijela- (Female)': 'hr-HR-GabrijelaNeural',
|
191 |
+
'Croatian (Croatia)-Srecko- (Male)': 'hr-HR-SreckoNeural',
|
192 |
+
'Hungarian (Hungary)-Noemi- (Female)': 'hu-HU-NoemiNeural',
|
193 |
+
'Hungarian (Hungary)-Tamas- (Male)': 'hu-HU-TamasNeural',
|
194 |
+
'Icelandic (Iceland)-Gudrun- (Female)': 'is-IS-GudrunNeural',
|
195 |
+
'Icelandic (Iceland)-Gunnar- (Male)': 'is-IS-GunnarNeural',
|
196 |
+
'Javanese (Indonesia)-Dimas- (Male)': 'jv-ID-DimasNeural',
|
197 |
+
'Javanese (Indonesia)-Siti- (Female)': 'jv-ID-SitiNeural',
|
198 |
+
'Georgian (Georgia)-Eka- (Female)': 'ka-GE-EkaNeural',
|
199 |
+
'Georgian (Georgia)-Giorgi- (Male)': 'ka-GE-GiorgiNeural',
|
200 |
+
'Kazakh (Kazakhstan)-Aigul- (Female)': 'kk-KZ-AigulNeural',
|
201 |
+
'Kazakh (Kazakhstan)-Daulet- (Male)': 'kk-KZ-DauletNeural',
|
202 |
+
'Khmer (Cambodia)-Piseth- (Male)': 'km-KH-PisethNeural',
|
203 |
+
'Khmer (Cambodia)-Sreymom- (Female)': 'km-KH-SreymomNeural',
|
204 |
+
'Kannada (India)-Gagan- (Male)': 'kn-IN-GaganNeural',
|
205 |
+
'Kannada (India)-Sapna- (Female)': 'kn-IN-SapnaNeural',
|
206 |
+
'Lao (Laos)-Chanthavong- (Male)': 'lo-LA-ChanthavongNeural',
|
207 |
+
'Lao (Laos)-Keomany- (Female)': 'lo-LA-KeomanyNeural',
|
208 |
+
'Lithuanian (Lithuania)-Leonas- (Male)': 'lt-LT-LeonasNeural',
|
209 |
+
'Lithuanian (Lithuania)-Ona- (Female)': 'lt-LT-OnaNeural',
|
210 |
+
'Latvian (Latvia)-Everita- (Female)': 'lv-LV-EveritaNeural',
|
211 |
+
'Latvian (Latvia)-Nils- (Male)': 'lv-LV-NilsNeural',
|
212 |
+
'Macedonian (North Macedonia)-Aleksandar- (Male)': 'mk-MK-AleksandarNeural',
|
213 |
+
'Macedonian (North Macedonia)-Marija- (Female)': 'mk-MK-MarijaNeural',
|
214 |
+
'Malayalam (India)-Midhun- (Male)': 'ml-IN-MidhunNeural',
|
215 |
+
'Malayalam (India)-Sobhana- (Female)': 'ml-IN-SobhanaNeural',
|
216 |
+
'Mongolian (Mongolia)-Bataa- (Male)': 'mn-MN-BataaNeural',
|
217 |
+
'Mongolian (Mongolia)-Yesui- (Female)': 'mn-MN-YesuiNeural',
|
218 |
+
'Marathi (India)-Aarohi- (Female)': 'mr-IN-AarohiNeural',
|
219 |
+
'Marathi (India)-Manohar- (Male)': 'mr-IN-ManoharNeural',
|
220 |
+
'Maltese (Malta)-Grace- (Female)': 'mt-MT-GraceNeural',
|
221 |
+
'Maltese (Malta)-Joseph- (Male)': 'mt-MT-JosephNeural',
|
222 |
+
'Burmese (Myanmar)-Nilar- (Female)': 'my-MM-NilarNeural',
|
223 |
+
'Burmese (Myanmar)-Thiha- (Male)': 'my-MM-ThihaNeural',
|
224 |
+
'Nepali (Nepal)-Hemkala- (Female)': 'ne-NP-HemkalaNeural',
|
225 |
+
'Nepali (Nepal)-Sagar- (Male)': 'ne-NP-SagarNeural',
|
226 |
+
'Dutch (Belgium)-Arnaud- (Male)': 'nl-BE-ArnaudNeural',
|
227 |
+
'Dutch (Belgium)-Dena- (Female)': 'nl-BE-DenaNeural',
|
228 |
+
'Polish (Poland)-Marek- (Male)': 'pl-PL-MarekNeural',
|
229 |
+
'Polish (Poland)-Zofia- (Female)': 'pl-PL-ZofiaNeural',
|
230 |
+
'Pashto (Afghanistan)-Gul Nawaz- (Male)': 'ps-AF-Gul',}
|
requirements.txt
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
gTTS
|
2 |
elevenlabs
|
|
|
3 |
stftpitchshift==1.5.1
|
4 |
torchcrepe
|
5 |
setuptools
|
@@ -19,4 +20,4 @@ mega.py
|
|
19 |
gdown
|
20 |
onnxruntime
|
21 |
pyngrok==4.1.12
|
22 |
-
torch
|
|
|
1 |
gTTS
|
2 |
elevenlabs
|
3 |
+
edge-tts
|
4 |
stftpitchshift==1.5.1
|
5 |
torchcrepe
|
6 |
setuptools
|
|
|
20 |
gdown
|
21 |
onnxruntime
|
22 |
pyngrok==4.1.12
|
23 |
+
torch
|
vc_infer_pipeline.py
CHANGED
@@ -15,6 +15,14 @@ bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
|
|
15 |
|
16 |
input_audio_path2wav = {}
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
@lru_cache
|
20 |
def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
|
@@ -312,14 +320,7 @@ class VC(object):
|
|
312 |
x, f0_min, f0_max, p_len, crepe_hop_length, "tiny"
|
313 |
)
|
314 |
elif f0_method == "rmvpe":
|
315 |
-
|
316 |
-
from rmvpe import RMVPE
|
317 |
-
|
318 |
-
print("loading rmvpe model")
|
319 |
-
self.model_rmvpe = RMVPE(
|
320 |
-
"rmvpe.pt", is_half=self.is_half, device=self.device
|
321 |
-
)
|
322 |
-
f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
|
323 |
|
324 |
elif "hybrid" in f0_method:
|
325 |
# Perform hybrid median pitch estimation
|
|
|
15 |
|
16 |
input_audio_path2wav = {}
|
17 |
|
18 |
+
#A fun little addition from my personal RVC branch.
|
19 |
+
#You don't have to implement it if you don't have to
|
20 |
+
from config import Config
|
21 |
+
config=Config()
|
22 |
+
from rmvpe import RMVPE
|
23 |
+
print("Preloading RMVPE model...")
|
24 |
+
model_rmvpe = RMVPE("rmvpe.pt", is_half=config.is_half, device=config.device)
|
25 |
+
del config
|
26 |
|
27 |
@lru_cache
|
28 |
def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
|
|
|
320 |
x, f0_min, f0_max, p_len, crepe_hop_length, "tiny"
|
321 |
)
|
322 |
elif f0_method == "rmvpe":
|
323 |
+
f0 = model_rmvpe.infer_from_audio(x, thred=0.03)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
324 |
|
325 |
elif "hybrid" in f0_method:
|
326 |
# Perform hybrid median pitch estimation
|