Kevin676 commited on
Commit
cc6e289
1 Parent(s): d37479b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -20
app.py CHANGED
@@ -4,27 +4,120 @@ import os
4
  os.system('pip install paddlespeech')
5
  os.system('pip install paddlepaddle')
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  def inference(text):
8
- os.system("paddlespeech tts --input '"+text+"' --output output.wav")
9
- return "output.wav"
 
 
 
 
 
10
 
11
- def guess(texts):
12
- return inference(texts + "我喜欢你")
13
 
 
14
 
15
- description = "用 PaddleSpeech-TTS 來生成電話訪問的語音內容"
16
-
17
- article = "<p style='text-align: center'><a href='https://github.com/PaddlePaddle/PaddleSpeech' target='_blank'>Github Repo</a></p>"
18
-
19
- examples=[['我比較想試試看換成自己的聲音來輸出'], ['你是不是也想試試看如果可以變成自己的聲音輸出呢'], ['這個可能會有點久']]
20
-
21
- gr.Interface(
22
- guess,
23
- gr.inputs.Textbox(label="input text",lines=10),
24
- gr.outputs.Audio(type="filepath", label="Output"),
25
- title="<p style='text-align: center'><a href='https://www.twman.org/AI' target='_blank'>電話訪問:PaddleSpeech-TTS</a>",
26
- description=description,
27
- article=article,
28
- enable_queue=True,
29
- examples=examples
30
- ).launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  os.system('pip install paddlespeech')
5
  os.system('pip install paddlepaddle')
6
 
7
+ from transformers import AutoModel, AutoTokenizer
8
+ from TTS.api import TTS
9
+
10
+ tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False, gpu=True)
11
+
12
+ tts1 = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
13
+
14
+ import torch
15
+ import torchaudio
16
+ from speechbrain.pretrained import SpectralMaskEnhancement
17
+
18
+ enhance_model = SpectralMaskEnhancement.from_hparams(
19
+ source="speechbrain/metricgan-plus-voicebank",
20
+ savedir="pretrained_models/metricgan-plus-voicebank",
21
+ run_opts={"device":"cuda"},
22
+ )
23
+
24
+ tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
25
+ model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
26
+ model = model.eval()
27
+
28
  def inference(text):
29
+ os.system("paddlespeech tts --input '"+text+"' --output output.wav")
30
+ return "output.wav"
31
+
32
+ def predict(input, history=None):
33
+ if history is None:
34
+ history = []
35
+ response, history = model.chat(tokenizer, input, history)
36
 
37
+ return history, history, inference(response)
 
38
 
39
+ def chinese(audio, upload1, VoiceMicrophone1):
40
 
41
+ if upload1 is not None:
42
+
43
+ tts.voice_conversion_to_file(source_wav=audio, target_wav=upload1, file_path="output0.wav")
44
+
45
+ else:
46
+ tts.voice_conversion_to_file(source_wav=audio, target_wav=VoiceMicrophone1, file_path="output0.wav")
47
+
48
+
49
+ noisy = enhance_model.load_audio(
50
+ "output0.wav"
51
+ ).unsqueeze(0)
52
+
53
+ enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
54
+ torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
55
+
56
+ return "enhanced.wav"
57
+
58
+ def english(text_en, upload, VoiceMicrophone):
59
+ if upload is not None:
60
+ tts1.tts_to_file(text_en.strip(), speaker_wav = upload, language="en", file_path="output.wav")
61
+
62
+ else:
63
+ tts1.tts_to_file(text_en.strip(), speaker_wav = VoiceMicrophone, language="en", file_path="output.wav")
64
+
65
+ noisy = enhance_model.load_audio(
66
+ "output.wav"
67
+ ).unsqueeze(0)
68
+
69
+ enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
70
+ torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
71
+
72
+ return "enhanced.wav"
73
+
74
+ with gr.Blocks() as demo:
75
+ gr.Markdown(
76
+ """ # <center>🥳💬💕 - TalktoAI,随时随地,谈天说地!</center>
77
+
78
+ ### <center>🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!</center>
79
+
80
+ """
81
+ )
82
+ state = gr.State([])
83
+ chatbot = gr.Chatbot([], elem_id="chatbot").style(height=300)
84
+ audio_res = gr.Audio(label = "自动生成的声音")
85
+ with gr.Row():
86
+ # with gr.Column(scale=4):
87
+ txt = gr.Textbox(label = "说点什么吧(中英皆可)", lines=1)
88
+ # with gr.Column(scale=1):
89
+ button = gr.Button("开始对话吧")
90
+ txt.submit(predict, [txt, state], [chatbot, state, audio_res])
91
+ button.click(predict, [txt, state], [chatbot, state, audio_res])
92
+
93
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
94
+ inp3 = audio_res
95
+ inp4 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件);长语音(90s左右)效果更好", type="filepath")
96
+ inp5 = gr.Audio(source="microphone", type="filepath", label = '请用麦克风上传您喜欢的声音,与文件上传二选一即可')
97
+ btn1 = gr.Button("用喜欢的声音听一听吧(中文)")
98
+
99
+ btn2 = gr.Button("用喜欢的声音听一听吧(英文)")
100
+ with gr.Row():
101
+ out1 = gr.Audio(label="为您合成的专属声音(中文)")
102
+ out2 = gr.Audio(label="为您合成的专属声音(英文)")
103
+ btn1.click(chinese, [inp3, inp4, inp5], [out1])
104
+ btn2.click(english, [inp3, inp4, inp5], [out2])
105
+
106
+ gr.Markdown(
107
+ """ ### <center>注意❗:请不要输入或生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及娱乐使用���用户输入或生成的内容与程序开发者无关,请自觉合法合规使用,违反者一切后果自负。</center>
108
+
109
+ ### <center>Model by [ChatGLM-6B](https://huggingface.co/THUDM/chatglm-6b). Thanks to [THUDM](https://github.com/THUDM). Please follow me on [Bilibili](https://space.bilibili.com/501495851?spm_id_from=333.1007.0.0).</center>
110
+
111
+ """
112
+ )
113
+
114
+ gr.HTML('''
115
+ <div class="footer">
116
+ <p>🎶🖼️🎡 - It’s the intersection of technology and liberal arts that makes our hearts sing. - Steve Jobs
117
+ </p>
118
+ <p>注:中文声音克隆实际上是通过声音转换(Voice Conversion)实现,所以输出结果可能更像是一种新的声音,效果不一定很理想,希望大家多多包涵,之后我们也会不断迭代该程序的!为了实现更好的效果,使用中文声音克隆时请尽量上传女声。
119
+ </p>
120
+ </div>
121
+ ''')
122
+
123
+ demo.queue().launch(show_error=True)