Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer | |
from datasets import load_dataset | |
import torch | |
import soundfile as sf | |
from pdfminer.high_level import extract_text | |
from llama_cpp import Llama | |
# Check if MPS is available and set the device | |
if torch.backends.mps.is_available(): | |
device = torch.device("mps") | |
print("Using MPS device") | |
else: | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
print(f"MPS not available, using {device}") | |
def toText(audio): | |
asr = pipeline( | |
"automatic-speech-recognition", | |
model="openai/whisper-tiny.en", | |
chunk_length_s=30, | |
device=device, | |
) | |
question = asr(audio, batch_size=8)["text"] | |
return question | |
# Global variable to store chat history | |
chat_history = [] | |
def extract_answer(question, text): | |
global chat_history | |
# Load the LLaMA model | |
model_path="/Users/chandima/.cache/lm-studio/models/lmstudio-community/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct-Q3_K_L.gguf" | |
# Load the LLaMA model with MPS acceleration | |
llm = Llama( | |
model_path=model_path, | |
n_gpu_layers=-1, # Use all available layers for GPU acceleration | |
n_ctx=2048, # Adjust context size as needed | |
verbose=True, # Optional: for debugging | |
use_mlock=True, # Optional: for better memory management | |
n_threads=6, # Adjust based on your CPU | |
use_mmap=True, # Optional: for faster loading | |
) | |
# Construct the conversation history | |
conversation = "\n".join([f"Human: {q}\nAI: {a}" for q, a in chat_history]) | |
# Use LLaMA to extract skills | |
prompt = f""" | |
You are an AI assistant answering questions based on a resume. Here's the conversation so far: | |
{conversation} | |
Human: {question} | |
Resume: | |
{text} | |
AI: """ | |
response = llm(prompt, max_tokens=800, stop=["Human:", "\n\n"]) | |
answer = response['choices'][0]['text'].strip() | |
# Append the new question and answer to the chat history | |
chat_history.append((question, answer)) | |
print(answer) | |
return answer | |
def toAudio(text): | |
synthesiser = pipeline("text-to-speech", "microsoft/speecht5_tts", device=device) | |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") | |
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) | |
speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding}) | |
return speech | |
def clone(audio, file): | |
if audio is None or file is None: | |
return None | |
question = toText(audio=audio) | |
text = extract_text(file.name) | |
res = extract_answer(question, text) | |
print(res) | |
speech = toAudio(res) | |
sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"]) | |
return "./speech.wav" | |
def start_recording(): | |
return None | |
def reset_conversation(): | |
global chat_history | |
chat_history = [] | |
return None | |
with gr.Blocks() as iface: | |
with gr.Row(): | |
audio_input = gr.Audio(sources="microphone", type="filepath", label='Question from Resume') | |
file_input = gr.File(label="Resume") | |
output = gr.Audio(label='Says', autoplay=True) | |
inputs = [audio_input, file_input] | |
btn = gr.Button("Submit") | |
btn.click(fn=clone, inputs=inputs, outputs=output) | |
audio_input.stop_recording(fn=clone, inputs=inputs, outputs=output) | |
# Add event to start recording after output audio finishes | |
output.play(fn=start_recording, outputs=audio_input) | |
# Add a button to reset the conversation | |
reset_btn = gr.Button("Reset Conversation") | |
reset_btn.click(fn=reset_conversation, inputs=None, outputs=None) | |
iface.launch() |