Amelia-James's picture
Update app.py
27002e6 verified
raw
history blame
2.8 kB
import os
from dotenv import load_dotenv
import streamlit as st
from groq import Groq
import tempfile
import requests # Use for calling APIs if Groq's SDK doesn't support transcription
# Load environment variables
load_dotenv()
# Initialize Groq client
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
# Streamlit UI
st.title("Voice-Voice Application")
st.markdown("Listen your voice using Whisper for transcription and TTS for voice generation.")
# Upload audio file
uploaded_file = st.file_uploader(
"Upload your audio file for transcription",
type=["wav", "mp3", "mp4", "m4a"]
)
if uploaded_file is not None:
# Display uploaded audio
audio_format = uploaded_file.type.split('/')[-1]
st.audio(uploaded_file, format=f"audio/{audio_format}")
st.write("Processing your audio file...")
# Save the uploaded file to a temporary location
with tempfile.NamedTemporaryFile(delete=False, suffix='.' + audio_format) as temp_audio:
temp_audio.write(uploaded_file.read())
temp_audio_path = temp_audio.name
try:
# Call transcription API (adjust as per Groq API documentation)
transcription_response = client.audio.transcriptions.create(
file=open(temp_audio_path, "rb"), # Use binary file for API
model="whisper-large-v3-turbo",
response_format="text" # Adjust format if needed
)
# Extract the transcribed text
transcribed_text = transcription_response # May vary; adjust based on API response
st.success("Transcription completed!")
st.write("**Transcribed Text:**", transcribed_text)
# Voice Cloning (TTS Integration)
st.markdown("---")
st.subheader("Generate Speech from Transcription")
tts_input = st.text_area("Enter text to generate speech:", value=transcribed_text)
if st.button("Generate Speech"):
if tts_input:
# Use a TTS system to generate audio (placeholder)
tts_response = requests.post(
"https://tts.api.url", # Replace with actual TTS API URL
json={"text": tts_input, "voice": "en-US-Wavenet-D"} # Adjust parameters
)
if tts_response.status_code == 200:
st.audio(tts_response.content, format="audio/mp3")
st.success("Speech generation successful!")
else:
st.error(f"Error in TTS: {tts_response.json()}")
else:
st.warning("Please enter some text.")
except Exception as e:
st.error(f"Error during processing: {e}")
finally:
# Clean up temporary file
os.remove(temp_audio_path)
# Footer
st.markdown("Developed with ❤️ by Sanam Iftakhar")