Spaces:

MSaadZia
/

VoicetoVoicebyZia

Running

App Files Files Community

VoicetoVoicebyZia / app.py

MSaadZia

Create app.py

55b4bbc verified 30 days ago

raw

history blame contribute delete

2.24 kB

	import os
	import torch
	from transformers import pipeline
	from gtts import gTTS
	import gradio as gr
	from groq import Groq

	# Load Whisper model from Hugging Face
	try:
	pipe = pipeline(model="openai/whisper-small", device="cuda" if torch.cuda.is_available() else "cpu")
	except Exception as e:
	print(f"Error loading Whisper model: {e}")
	raise

	GROQ_API_KEY = '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'

	Client = Groq(api_key=GROQ_API_KEY)

	# Function to get response from Groq LLM
	def get_llm_response(transcribed_text):
	try:
	chat_completion = client.chat.completions.create(
	messages=[{"role": "user", "content": transcribed_text}],
	model="llama3-8b-8192",
	)
	return chat_completion.choices[0].message.content
	except Exception as e:
	print(f"Error getting response from LLM: {e}")
	return "Sorry, I couldn't process your request."

	# Function to convert text to speech
	def text_to_speech(response_text):
	try:
	tts = gTTS(response_text, lang='en')
	tts.save("response_audio.mp3")
	return "response_audio.mp3" # Returning the file path
	except Exception as e:
	print(f"Error converting text to speech: {e}")
	return "Sorry, I couldn't convert the response to audio."

	# Function to handle the entire voice chat process
	def voice_chat(audio_input):
	try:
	# Transcribe the input audio using Hugging Face Whisper model
	result = pipe(audio_input)["text"]
	transcribed_text = result
	print(f"Transcribed Text: {transcribed_text}")

	# Get the LLM response
	response_text = get_llm_response(transcribed_text)
	print(f"LLM Response: {response_text}")

	# Convert the response text to speech and return the audio file
	response_audio = text_to_speech(response_text)

	return response_audio
	except Exception as e:
	print(f"Error in voice chat process: {e}")
	return "Sorry, there was an error processing your audio."

	# Create the Gradio interface
	iface = gr.Interface(
	fn=voice_chat,
	inputs=gr.Audio(type="filepath"), # Specify input type only
	outputs="audio"
	)

	# Launch the Gradio interface
	iface.launch()