Spaces:
Running
Running
import os | |
import gradio as gr | |
import whisper | |
from gtts import gTTS | |
import io | |
from groq import Groq | |
# Access the API key using environment Variable we set | |
groq_api_key = os.getenv("GROQ_API_KEY") | |
groq_client = Groq(api_key= groq_api_key) | |
# Load the Whisper model | |
model = whisper.load_model("base") # You can choose other models like "small", "medium", "large" | |
def process_audio(file_path): | |
try: | |
# Load the audio file | |
audio = whisper.load_audio(file_path) | |
# Transcribe the audio using Whisper | |
result = model.transcribe(audio) | |
text = result["text"] | |
# Generate a response using Groq | |
chat_completion = groq_client.chat.completions.create( | |
messages=[{"role": "user", "content": text}], | |
model="llama3-8b-8192", # Replace with the correct model if necessary | |
) | |
# Access the response using dot notation | |
response_message = chat_completion.choices[0].message.content.strip() | |
# Convert the response text to speech | |
tts = gTTS(response_message) | |
response_audio_io = io.BytesIO() | |
tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object | |
response_audio_io.seek(0) | |
# Save audio to a file to ensure it's generated correctly | |
with open("response.mp3", "wb") as audio_file: | |
audio_file.write(response_audio_io.getvalue()) | |
# Return the response text and the path to the saved audio file | |
return response_message, "response.mp3" | |
except Exception as e: | |
return f"An error occurred: {e}", None | |
iface = gr.Interface( | |
fn=process_audio, | |
inputs=gr.Audio(type="filepath"), # Use type="filepath" | |
outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")], | |
live=True | |
) | |
iface.launch(share=True) |