testing / app.py
pm6six's picture
Rename audio.py to app.py
7b26cd8 verified
raw
history blame
1.47 kB
import streamlit as st
from io import BytesIO
from urllib.request import urlopen
import librosa
from transformers import Qwen2AudioForConditionalGeneration, AutoProcessor
import pyttsx3 # For text-to-speech
# Load Qwen2Audio model and processor
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-Audio-7B-Instruct")
model = Qwen2AudioForConditionalGeneration.from_pretrained("Qwen/Qwen2-Audio-7B-Instruct", device_map="auto")
tts_engine = pyttsx3.init()
# Streamlit app UI
st.title("Text-to-Audio App")
st.text("This app generates audio from text input using Hugging Face models.")
# User input
text_input = st.text_area("Enter some text for the model:")
if st.button("Generate Audio"):
conversation = [{"role": "user", "content": text_input}]
# Preprocess conversation
text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
inputs = processor(text=text, return_tensors="pt", padding=True)
inputs.input_ids = inputs.input_ids.to("cuda")
# Generate response
generate_ids = model.generate(**inputs, max_length=256)
generate_ids = generate_ids[:, inputs.input_ids.size(1):]
# Decode response
response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
st.text(f"Model Response: {response}")
# Convert response to speech
tts_engine.say(response)
tts_engine.runAndWait()
st.success("Audio generated and played!")