#Praise Jesus
#Stable version working with Llama but not satisfied with poor output
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import docx2txt
import os

# Authenticate with Hugging Face using an environment variable token if set
hf_token = os.getenv('HF_TOKEN')
if hf_token:
    from huggingface_hub import login
    login(hf_token)

# Whisper model for audio transcription
whisper_model = pipeline("automatic-speech-recognition", model="openai/whisper-large")

# LLaMA 3.2 model for text processing
llama_model_id = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(llama_model_id, token=hf_token)
llama_model = AutoModelForCausalLM.from_pretrained(llama_model_id, torch_dtype=torch.bfloat16, token=hf_token)

# Function to generate response using LLaMA 3.2 model
def get_llama_response(input_text):
    # Ensure input is detailed enough
    if len(input_text.split()) < 10:
        return "Please provide a more detailed user story to help generate relevant needs and wants."

    # Define prompt for LLaMA model
    prompt = f"""
    Based on the user story "{input_text}", extract any unarticulated needs and wants.
    Only provide essential needs and wants directly relevant to the given story.
    Do not speculate or over-extrapolate.
    """
    
    # Process the prompt with LLaMA 3.2
    inputs = tokenizer(prompt, return_tensors="pt")
    llama_output = llama_model.generate(**inputs, max_new_tokens=100)
    response_text = tokenizer.decode(llama_output[0], skip_special_tokens=True)

    return response_text

# Main processing function for Gradio interface
def process_input(user_story=None, user_audio=None, user_file=None):
    # Process audio input if provided
    if user_audio:
        transcription = whisper_model(user_audio)["text"]
        user_story = transcription

    # Process file input if provided and if text is empty
    if user_file and not user_story:
        user_story = docx2txt.process(user_file)

    # Ensure there's text to process
    if not user_story:
        return "Please provide a user story, an audio file, or upload a Word file."

    # Generate response with LLaMA 3.2
    llama_response = get_llama_response(user_story)

    return f"LLaMA Output:\n{llama_response}"

# Gradio interface with text, audio, and file inputs
interface = gr.Interface(
    fn=process_input,
    inputs=[
        gr.Textbox(label="User Story (Text Input)", placeholder="Enter your user story here..."),
        gr.Audio(type="filepath", label="User Story (Audio Input)"),
        gr.File(label="Upload Word File (.docx)")  # Removed `optional=True`
    ],
    outputs="text",
    title="Multimodal Needs & Wants Extractor",
    description="**Author:** VictorDaniel\n\nEnter a detailed user story or upload an audio/Word file to extract the unarticulated needs and wants.",
    examples=[
        ["The user often speaks about wanting to improve their health but is hesitant to join a gym."]
    ]
)

# Launch the Gradio app
interface.launch()
#app2