import gradio as gr from transformers import pipeline from espnet2.bin.tts_inference import Text2Speech import warnings warnings.filterwarnings('ignore') def generateTextAndAudio(inputText, numGen): # --- Generating the Text --- # With the provided text from user, generate more text up to `numGen` tokens/sub-words textOutput = textGenerator(inputText, max_length=numGen) # The output of the text generator is a list of dictionaries, grab the first dictionary # then get the generated text from the dictionary using the `generated_text` key genText = textOutput[0]['generated_text'] print("-" * 75) print("Input Text:", inputText) print("Generated Text:", genText) print("-" * 75) # --- Generating the Audio --- # With the newly generated text, generate some speech #audioOutput = audioGenerator(genText) # Get the wav data #genAudio = audioOutput['wav'] # Return two things # 1) Generated Text # 2) 24k sampling rate, and the Generated Audio (wav) as numpy (instead of tensor) # return genText, (24000, genAudio.numpy()) return genText # Main textGenerator = pipeline('text-generation', model='rifkat/GPTuz') audioGenerator = Text2Speech.from_pretrained("espnet/kan-bayashi_ljspeech_joint_finetune_conformer_fastspeech2_hifigan") input1_textbox = gr.Textbox(label="Tekst*") input2_slider = gr.Slider(minimum=1, maximum=100, step=1, default=30, label="Generatsiya bo'ladigan so'zlar soni") output1_textbox = gr.Textbox(label="Generatsiya bo'lgan tekst") # output2_Audio = gr.Audio(label="Generatsiya bo'lgan audio") title = "Tekst gereratsiya qiling!" description = "Tekst kiriting va nechta so'z generatsiya qilishini" examples = [ ["Давлат хавфсизлик хизмати", 50], ["Шунинг учун биз", 30], ["Лекин бугун бу нарсани қилмасак", 60] ] iface = gr.Interface(fn=generateTextAndAudio, inputs=[input1_textbox, input2_slider], # outputs=[output1_textbox, output2_Audio], outputs=output1_textbox, title=title, description=description, examples=examples).launch(debug=True)