#Praise Jesus #Stable version working with Llama but not satisfied with poor output import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import torch import docx2txt import os # Authenticate with Hugging Face using an environment variable token if set hf_token = os.getenv('HF_TOKEN') if hf_token: from huggingface_hub import login login(hf_token) # Whisper model for audio transcription whisper_model = pipeline("automatic-speech-recognition", model="openai/whisper-large") # LLaMA 3.2 model for text processing llama_model_id = "meta-llama/Llama-3.2-3B-Instruct" tokenizer = AutoTokenizer.from_pretrained(llama_model_id, token=hf_token) llama_model = AutoModelForCausalLM.from_pretrained(llama_model_id, torch_dtype=torch.bfloat16, token=hf_token) # Function to generate response using LLaMA 3.2 model def get_llama_response(input_text): # Ensure input is detailed enough if len(input_text.split()) < 10: return "Please provide a more detailed user story to help generate relevant needs and wants." # Define prompt for LLaMA model prompt = f""" Based on the user story "{input_text}", extract any unarticulated needs and wants. Only provide essential needs and wants directly relevant to the given story. Do not speculate or over-extrapolate. """ # Process the prompt with LLaMA 3.2 inputs = tokenizer(prompt, return_tensors="pt") llama_output = llama_model.generate(**inputs, max_new_tokens=100) response_text = tokenizer.decode(llama_output[0], skip_special_tokens=True) return response_text # Main processing function for Gradio interface def process_input(user_story=None, user_audio=None, user_file=None): # Process audio input if provided if user_audio: transcription = whisper_model(user_audio)["text"] user_story = transcription # Process file input if provided and if text is empty if user_file and not user_story: user_story = docx2txt.process(user_file) # Ensure there's text to process if not user_story: return "Please provide a user story, an audio file, or upload a Word file." # Generate response with LLaMA 3.2 llama_response = get_llama_response(user_story) return f"LLaMA Output:\n{llama_response}" # Gradio interface with text, audio, and file inputs interface = gr.Interface( fn=process_input, inputs=[ gr.Textbox(label="User Story (Text Input)", placeholder="Enter your user story here..."), gr.Audio(type="filepath", label="User Story (Audio Input)"), gr.File(label="Upload Word File (.docx)") # Removed `optional=True` ], outputs="text", title="Multimodal Needs & Wants Extractor", description="**Author:** VictorDaniel\n\nEnter a detailed user story or upload an audio/Word file to extract the unarticulated needs and wants.", examples=[ ["The user often speaks about wanting to improve their health but is hesitant to join a gym."] ] ) # Launch the Gradio app interface.launch() #app2