Spaces:
Running
Running
import os | |
import torch | |
from transformers import pipeline | |
from gtts import gTTS | |
import gradio as gr | |
from groq import Groq | |
# Load Whisper model from Hugging Face | |
try: | |
pipe = pipeline(model="openai/whisper-small", device="cuda" if torch.cuda.is_available() else "cpu") | |
except Exception as e: | |
print(f"Error loading Whisper model: {e}") | |
raise | |
GROQ_API_KEY = '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' | |
Client = Groq(api_key=GROQ_API_KEY) | |
# Function to get response from Groq LLM | |
def get_llm_response(transcribed_text): | |
try: | |
chat_completion = client.chat.completions.create( | |
messages=[{"role": "user", "content": transcribed_text}], | |
model="llama3-8b-8192", | |
) | |
return chat_completion.choices[0].message.content | |
except Exception as e: | |
print(f"Error getting response from LLM: {e}") | |
return "Sorry, I couldn't process your request." | |
# Function to convert text to speech | |
def text_to_speech(response_text): | |
try: | |
tts = gTTS(response_text, lang='en') | |
tts.save("response_audio.mp3") | |
return "response_audio.mp3" # Returning the file path | |
except Exception as e: | |
print(f"Error converting text to speech: {e}") | |
return "Sorry, I couldn't convert the response to audio." | |
# Function to handle the entire voice chat process | |
def voice_chat(audio_input): | |
try: | |
# Transcribe the input audio using Hugging Face Whisper model | |
result = pipe(audio_input)["text"] | |
transcribed_text = result | |
print(f"Transcribed Text: {transcribed_text}") | |
# Get the LLM response | |
response_text = get_llm_response(transcribed_text) | |
print(f"LLM Response: {response_text}") | |
# Convert the response text to speech and return the audio file | |
response_audio = text_to_speech(response_text) | |
return response_audio | |
except Exception as e: | |
print(f"Error in voice chat process: {e}") | |
return "Sorry, there was an error processing your audio." | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=voice_chat, | |
inputs=gr.Audio(type="filepath"), # Specify input type only | |
outputs="audio" | |
) | |
# Launch the Gradio interface | |
iface.launch() | |