Spaces:
Running
Running
import os | |
from dotenv import load_dotenv | |
import streamlit as st | |
from groq import Groq | |
import tempfile | |
import requests # Use for calling APIs if Groq's SDK doesn't support transcription | |
# Load environment variables | |
load_dotenv() | |
# Initialize Groq client | |
client = Groq(api_key=os.getenv("GROQ_API_KEY")) | |
# Streamlit UI | |
st.title("Voice-Voice Application") | |
st.markdown("Listen your voice using Whisper for transcription and TTS for voice generation.") | |
# Upload audio file | |
uploaded_file = st.file_uploader( | |
"Upload your audio file for transcription", | |
type=["wav", "mp3", "mp4", "m4a"] | |
) | |
if uploaded_file is not None: | |
# Display uploaded audio | |
audio_format = uploaded_file.type.split('/')[-1] | |
st.audio(uploaded_file, format=f"audio/{audio_format}") | |
st.write("Processing your audio file...") | |
# Save the uploaded file to a temporary location | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.' + audio_format) as temp_audio: | |
temp_audio.write(uploaded_file.read()) | |
temp_audio_path = temp_audio.name | |
try: | |
# Call transcription API (adjust as per Groq API documentation) | |
transcription_response = client.audio.transcriptions.create( | |
file=open(temp_audio_path, "rb"), # Use binary file for API | |
model="whisper-large-v3-turbo", | |
response_format="text" # Adjust format if needed | |
) | |
# Extract the transcribed text | |
transcribed_text = transcription_response # May vary; adjust based on API response | |
st.success("Transcription completed!") | |
st.write("**Transcribed Text:**", transcribed_text) | |
# Voice Cloning (TTS Integration) | |
st.markdown("---") | |
st.subheader("Generate Speech from Transcription") | |
tts_input = st.text_area("Enter text to generate speech:", value=transcribed_text) | |
if st.button("Generate Speech"): | |
if tts_input: | |
# Use a TTS system to generate audio (placeholder) | |
tts_response = requests.post( | |
"https://tts.api.url", # Replace with actual TTS API URL | |
json={"text": tts_input, "voice": "en-US-Wavenet-D"} # Adjust parameters | |
) | |
if tts_response.status_code == 200: | |
st.audio(tts_response.content, format="audio/mp3") | |
st.success("Speech generation successful!") | |
else: | |
st.error(f"Error in TTS: {tts_response.json()}") | |
else: | |
st.warning("Please enter some text.") | |
except Exception as e: | |
st.error(f"Error during processing: {e}") | |
finally: | |
# Clean up temporary file | |
os.remove(temp_audio_path) | |
# Footer | |
st.markdown("Developed with ❤️ by Sanam Iftakhar") | |