Spaces:

Kevin676
/

Telephone-Interviewing_PpaddleSpeech-TTS

Build error

App Files Files Community

Kevin676 commited on Apr 17, 2023

Commit

cc6e289

•

1 Parent(s): d37479b

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -20

app.py CHANGED Viewed

@@ -4,27 +4,120 @@ import os
 os.system('pip install paddlespeech')
 os.system('pip install paddlepaddle')
 def inference(text):
-  os.system("paddlespeech tts --input '"+text+"' --output output.wav")
-  return  "output.wav"
-def guess(texts):
-    return inference(texts + "我喜欢你")
-description = "用 PaddleSpeech-TTS 來生成電話訪問的語音內容"
-article = "<p style='text-align: center'><a href='https://github.com/PaddlePaddle/PaddleSpeech' target='_blank'>Github Repo</a></p>"
-examples=[['我比較想試試看換成自己的聲音來輸出'], ['你是不是也想試試看如果可以變成自己的聲音輸出呢'], ['這個可能會有點久']]
-gr.Interface(
-    guess,
-    gr.inputs.Textbox(label="input text",lines=10),
-    gr.outputs.Audio(type="filepath", label="Output"),
-    title="<p style='text-align: center'><a href='https://www.twman.org/AI' target='_blank'>電話訪問：PaddleSpeech-TTS</a>",
-    description=description,
-    article=article,
-    enable_queue=True,
-    examples=examples
-    ).launch(debug=True)

 os.system('pip install paddlespeech')
 os.system('pip install paddlepaddle')
+from transformers import AutoModel, AutoTokenizer
+from TTS.api import TTS
+tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False, gpu=True)
+tts1 = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
+import torch
+import torchaudio
+from speechbrain.pretrained import SpectralMaskEnhancement
+enhance_model = SpectralMaskEnhancement.from_hparams(
+source="speechbrain/metricgan-plus-voicebank",
+savedir="pretrained_models/metricgan-plus-voicebank",
+run_opts={"device":"cuda"},
+)
+tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
+model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
+model = model.eval()
 def inference(text):
+    os.system("paddlespeech tts --input '"+text+"' --output output.wav")
+    return  "output.wav"
+def predict(input, history=None):
+    if history is None:
+        history = []
+    response, history = model.chat(tokenizer, input, history)
+    return history, history, inference(response)
+def chinese(audio, upload1, VoiceMicrophone1):
+    if upload1 is not None:
+        tts.voice_conversion_to_file(source_wav=audio, target_wav=upload1, file_path="output0.wav")
+    else:
+        tts.voice_conversion_to_file(source_wav=audio, target_wav=VoiceMicrophone1, file_path="output0.wav")
+    noisy = enhance_model.load_audio(
+    "output0.wav"
+    ).unsqueeze(0)
+    enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
+    torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
+    return "enhanced.wav"
+def english(text_en, upload, VoiceMicrophone):
+    if upload is not None:
+        tts1.tts_to_file(text_en.strip(), speaker_wav = upload, language="en", file_path="output.wav")
+    else:
+        tts1.tts_to_file(text_en.strip(), speaker_wav = VoiceMicrophone, language="en", file_path="output.wav")
+    noisy = enhance_model.load_audio(
+    "output.wav"
+    ).unsqueeze(0)
+    enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
+    torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
+    return "enhanced.wav"
+with gr.Blocks() as demo:
+    gr.Markdown(
+            """ # <center>🥳💬💕 - TalktoAI，随时随地，谈天说地！</center>
+            ### <center>🤖 - 让有人文关怀的AI造福每一个人！AI向善，文明璀璨！TalktoAI - Enable the future！</center>
+      """
+    )
+    state = gr.State([])
+    chatbot = gr.Chatbot([], elem_id="chatbot").style(height=300)
+    audio_res = gr.Audio(label = "自动生成的声音")
+    with gr.Row():
+#        with gr.Column(scale=4):
+        txt = gr.Textbox(label = "说点什么吧(中英皆可)", lines=1)
+#        with gr.Column(scale=1):
+        button = gr.Button("开始对话吧")
+    txt.submit(predict, [txt, state], [chatbot, state, audio_res])
+    button.click(predict, [txt, state], [chatbot, state, audio_res])
+    with gr.Row().style(mobile_collapse=False, equal_height=True):
+        inp3 = audio_res
+        inp4 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件)；长语音(90s左右)效果更好", type="filepath")
+        inp5 = gr.Audio(source="microphone", type="filepath", label = '请用麦克风上传您喜欢的声音，与文件上传二选一即可')
+        btn1 = gr.Button("用喜欢的声音听一听吧(中文)")
+        btn2 = gr.Button("用喜欢的声音听一听吧(英文)")
+    with gr.Row():
+        out1 = gr.Audio(label="为您合成的专属声音(中文)")
+        out2 = gr.Audio(label="为您合成的专属声音(英文)")
+    btn1.click(chinese, [inp3, inp4, inp5], [out1])
+    btn2.click(english, [inp3, inp4, inp5], [out2])
+    gr.Markdown(
+            """ ### <center>注意❗：请不要输入或生成会对个人以及组织造成侵害的内容，此程序仅供科研、学习及娱乐使用���用户输入或生成的内容与程序开发者无关，请自觉合法合规使用，违反者一切后果自负。</center>
+            ### <center>Model by [ChatGLM-6B](https://huggingface.co/THUDM/chatglm-6b). Thanks to [THUDM](https://github.com/THUDM). Please follow me on [Bilibili](https://space.bilibili.com/501495851?spm_id_from=333.1007.0.0).</center>
+      """
+        )
+    gr.HTML('''
+        <div class="footer">
+                    <p>🎶🖼️🎡 - It’s the intersection of technology and liberal arts that makes our hearts sing. - Steve Jobs
+                    </p>
+                    <p>注：中文声音克隆实际上是通过声音转换(Voice Conversion)实现，所以输出结果可能更像是一种新的声音，效果不一定很理想，希望大家多多包涵，之后我们也会不断迭代该程序的！为了实现更好的效果，使用中文声音克隆时请尽量上传女声。
+                    </p>
+        </div>
+        ''')
+demo.queue().launch(show_error=True)