Spaces:
Running
Running
#Praise Jesus | |
#Stable version working with Llama but not satisfied with poor output | |
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
import torch | |
import docx2txt | |
import os | |
# Authenticate with Hugging Face using an environment variable token if set | |
hf_token = os.getenv('HF_TOKEN') | |
if hf_token: | |
from huggingface_hub import login | |
login(hf_token) | |
# Whisper model for audio transcription | |
whisper_model = pipeline("automatic-speech-recognition", model="openai/whisper-large") | |
# LLaMA 3.2 model for text processing | |
llama_model_id = "meta-llama/Llama-3.2-3B-Instruct" | |
tokenizer = AutoTokenizer.from_pretrained(llama_model_id, token=hf_token) | |
llama_model = AutoModelForCausalLM.from_pretrained(llama_model_id, torch_dtype=torch.bfloat16, token=hf_token) | |
# Function to generate response using LLaMA 3.2 model | |
def get_llama_response(input_text): | |
# Ensure input is detailed enough | |
if len(input_text.split()) < 10: | |
return "Please provide a more detailed user story to help generate relevant needs and wants." | |
# Define prompt for LLaMA model | |
prompt = f""" | |
Based on the user story "{input_text}", extract any unarticulated needs and wants. | |
Only provide essential needs and wants directly relevant to the given story. | |
Do not speculate or over-extrapolate. | |
""" | |
# Process the prompt with LLaMA 3.2 | |
inputs = tokenizer(prompt, return_tensors="pt") | |
llama_output = llama_model.generate(**inputs, max_new_tokens=100) | |
response_text = tokenizer.decode(llama_output[0], skip_special_tokens=True) | |
return response_text | |
# Main processing function for Gradio interface | |
def process_input(user_story=None, user_audio=None, user_file=None): | |
# Process audio input if provided | |
if user_audio: | |
transcription = whisper_model(user_audio)["text"] | |
user_story = transcription | |
# Process file input if provided and if text is empty | |
if user_file and not user_story: | |
user_story = docx2txt.process(user_file) | |
# Ensure there's text to process | |
if not user_story: | |
return "Please provide a user story, an audio file, or upload a Word file." | |
# Generate response with LLaMA 3.2 | |
llama_response = get_llama_response(user_story) | |
return f"LLaMA Output:\n{llama_response}" | |
# Gradio interface with text, audio, and file inputs | |
interface = gr.Interface( | |
fn=process_input, | |
inputs=[ | |
gr.Textbox(label="User Story (Text Input)", placeholder="Enter your user story here..."), | |
gr.Audio(type="filepath", label="User Story (Audio Input)"), | |
gr.File(label="Upload Word File (.docx)") # Removed `optional=True` | |
], | |
outputs="text", | |
title="Multimodal Needs & Wants Extractor", | |
description="**Author:** VictorDaniel\n\nEnter a detailed user story or upload an audio/Word file to extract the unarticulated needs and wants.", | |
examples=[ | |
["The user often speaks about wanting to improve their health but is hesitant to join a gym."] | |
] | |
) | |
# Launch the Gradio app | |
interface.launch() | |
#app2 |