Spaces:
Build error
Build error
import streamlit as st | |
import torch | |
from parler_tts import ParlerTTSForConditionalGeneration | |
from transformers import AutoTokenizer | |
import soundfile as sf | |
# Set up the device | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
# Load the model and tokenizer | |
model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1").to(device) | |
tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1") | |
# Neon-themed styling | |
st.markdown(""" | |
<style> | |
body { | |
background-color: #0f0f0f; | |
color: #0fff0f; | |
} | |
.stTextInput, .stTextArea { | |
background-color: #333333; | |
color: #0fff0f; | |
} | |
.stButton > button { | |
background-color: #0fff0f; | |
color: #0f0f0f; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
st.title("π€ Neon TTS Converter") | |
# Predefined voice options | |
voices = { | |
"Smooth Female": "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch.", | |
"Monotone Male": "Jon's voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise.", | |
"Energetic Youth": "An energetic young speaker with a lively tone and rapid speech, creating a sense of excitement.", | |
"Calm Elderly": "An elderly speaker with a calm and slow-paced voice, bringing wisdom and serenity to the speech.", | |
"Robotic": "A robotic, artificial voice with a consistent pitch and no variation in tone.", | |
"Narrator": "A deep and clear voice, with a strong presence and a slightly slower pace, suitable for narrations.", | |
"Whisper": "A soft, whispered voice, with very low volume and an intimate tone.", | |
"Formal": "A formal, authoritative voice with clear articulation and a steady pace.", | |
"Happy": "A cheerful, upbeat voice with a positive tone and lively intonation.", | |
"Mysterious": "A mysterious and low-pitched voice, with slow delivery and a sense of intrigue.", | |
"Bass-Heavy Male": "A deep, resonant male voice with a strong bass, ideal for dramatic and powerful delivery.", | |
"Actor Voice 1": "An actor's voice with a dynamic range, capable of various emotional tones and expressions.", | |
"Actor Voice 2": "A distinct and engaging actor's voice, providing a unique flair and character to the speech." | |
} | |
# Sidebar for voice selection | |
st.sidebar.header("Select Voice") | |
voice_choice = st.sidebar.selectbox("Choose a Voice", list(voices.keys())) | |
# Display the selected voice description | |
st.sidebar.markdown(f"**Description:** {voices[voice_choice]}") | |
# Input for custom prompt | |
st.sidebar.header("Custom Prompt") | |
prompt = st.sidebar.text_area("Enter your custom prompt", value="Hey, how are you doing today?") | |
# Error handling | |
try: | |
# Generate the TTS output | |
if st.sidebar.button("Generate Speech"): | |
description = voices[voice_choice] | |
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device) | |
prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device) | |
# Create attention masks | |
attention_mask = tokenizer(description, return_tensors="pt").attention_mask.to(device) | |
prompt_attention_mask = tokenizer(prompt, return_tensors="pt").attention_mask.to(device) | |
# Generate speech | |
generation = model.generate( | |
input_ids=input_ids, | |
prompt_input_ids=prompt_input_ids, | |
attention_mask=attention_mask, | |
prompt_attention_mask=prompt_attention_mask | |
) | |
audio_arr = generation.cpu().numpy().squeeze() | |
# Save the audio file | |
output_file = "parler_tts_out.wav" | |
sf.write(output_file, audio_arr, model.config.sampling_rate) | |
# Display the audio player | |
st.audio(output_file) | |
st.success("Speech generation complete!") | |
except Exception as e: | |
st.error(f"An error occurred: {e}") | |