Spaces:
Running
Running
from transformers import pipeline, Conversation | |
import gradio as gr | |
from diffusers import DiffusionPipeline | |
import scipy | |
#Initializing Models | |
chatbot = pipeline(model="facebook/blenderbot-400M-distill") | |
ldm = DiffusionPipeline.from_pretrained("CompVis/ldm-text2im-large-256") | |
synthesiser = pipeline("text-to-audio", "facebook/musicgen-small") | |
message_list = [] | |
response_list = [] | |
def vanilla_chatbot(message): | |
conversation = Conversation(text=message, past_user_inputs=message_list, generated_responses=response_list) | |
bot = chatbot(conversation.messages[0]['content']) # working code | |
return bot[-1]['generated_text'] | |
def generate_image(Prompt): | |
images = ldm([Prompt], num_inference_steps=50, eta=.3, guidance_scale=6) | |
return images.images[0] | |
def generate_music(Prompt): | |
music = synthesiser(Prompt, forward_params={"do_sample": True, "max_new_tokens":100}) | |
rate = music["sampling_rate"] | |
mus = music["audio"][0].reshape(-1) | |
return rate,mus | |
def process_input(Prompt,choice): | |
if choice == "Chat": | |
return vanilla_chatbot(Prompt),None,None | |
elif choice == 'Music': | |
rate,audio = generate_music(Prompt) | |
return None, (rate,audio), None | |
else: | |
return None , None , generate_image(Prompt) | |
# demo=gr.Blocks() | |
# with demo: | |
# with gr.Row(): | |
# text_input = gr.Textbox() | |
# choice = gr.Radio(choices=["Chat","Music","Image"]) | |
# with gr.Row(): | |
# chatbot_output = gr.Textbox() | |
# music_output =gr.Audio() | |
# image_output =gr.Image() | |
# submit_btn = gr.Button("Generate") | |
# submit_btn.click(fn=process_input,inputs=[text_input,choice],outputs=[chatbot_output,music_output,image_output]) | |
# demo.launch(debug=True) | |
demo =gr.Interface( | |
fn=process_input, | |
inputs=[gr.Textbox(),gr.Radio(["Chat","Music","Image"])], | |
outputs = [gr.Textbox(),gr.Audio(),gr.Image()], | |
# outputs =["text","audio","image"] | |
title="Multimodal Assistant" | |
) | |
demo.launch(debug=True) |