import gradio as gr import os os.system('pip install paddlespeech') os.system('pip install paddlepaddle') from transformers import AutoModel, AutoTokenizer from TTS.api import TTS tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False, gpu=True) tts1 = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True) import torch import torchaudio from speechbrain.pretrained import SpectralMaskEnhancement enhance_model = SpectralMaskEnhancement.from_hparams( source="speechbrain/metricgan-plus-voicebank", savedir="pretrained_models/metricgan-plus-voicebank", run_opts={"device":"cuda"}, ) tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda() model = model.eval() def inference(text): os.system("paddlespeech tts --input '"+text+"' --output output.wav") return "output.wav" def predict(input, history=None): if history is None: history = [] response, history = model.chat(tokenizer, input, history) return history, history, response def chinese(text_cn, upload1, VoiceMicrophone1): if upload1 is not None: tts.voice_conversion_to_file(source_wav=inference(text_cn), target_wav=upload1, file_path="output0.wav") else: tts.voice_conversion_to_file(source_wav=inference(text_cn), target_wav=VoiceMicrophone1, file_path="output0.wav") noisy = enhance_model.load_audio( "output0.wav" ).unsqueeze(0) enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.])) torchaudio.save("enhanced.wav", enhanced.cpu(), 16000) return "enhanced.wav" def english(text_en, upload, VoiceMicrophone): if upload is not None: tts1.tts_to_file(text_en.strip(), speaker_wav = upload, language="en", file_path="output.wav") else: tts1.tts_to_file(text_en.strip(), speaker_wav = VoiceMicrophone, language="en", file_path="output.wav") noisy = enhance_model.load_audio( "output.wav" ).unsqueeze(0) enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.])) torchaudio.save("enhanced.wav", enhanced.cpu(), 16000) return "enhanced.wav" with gr.Blocks() as demo: gr.Markdown( """ #