Spaces:
Runtime error
Runtime error
import os | |
import re | |
import gradio as gr | |
import PyPDF2 | |
from tqdm import tqdm | |
from gtts import gTTS | |
from transformers import pipeline | |
from diffusers import StableDiffusionPipeline | |
import speech_recognition as sr | |
import ollama | |
# Disable Gradio analytics | |
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False" | |
history = [] | |
recognizer = sr.Recognizer() | |
# Load the text-to-image pipeline | |
text_to_image = StableDiffusionPipeline.from_pretrained('CompVis/stable-diffusion-v1-4') | |
text_to_image.to("cuda") # If you have a CUDA-capable GPU | |
# Initialize the summarization pipeline | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
# Initialize the question-answering pipeline | |
qa_model = pipeline("question-answering") | |
# Variable to store extracted text from PDF | |
extracted_text = "" | |
def clean_text(text): | |
# Remove special characters and emojis | |
return re.sub(r'[^\w\s]', '', text) | |
def generate_response(prompt, image_path=None, audio=None, text_to_image_prompt=None): | |
if audio: | |
with tqdm(total=100, desc="Processing Audio") as pbar: | |
with sr.AudioFile(audio) as source: | |
audio_data = recognizer.record(source) | |
pbar.update(50) | |
try: | |
prompt = recognizer.recognize_google(audio_data) | |
pbar.update(50) | |
except sr.UnknownValueError: | |
pbar.update(50) | |
return "Sorry, I could not understand the audio.", None, None | |
if image_path: | |
try: | |
with tqdm(total=100, desc="Describing Image") as pbar: | |
res = ollama.chat( | |
model="llava", | |
messages=[ | |
{ | |
'role': 'user', | |
'content': 'Describe this image:', | |
'images': [image_path] | |
} | |
] | |
) | |
pbar.update(100) | |
response_text = res['message']['content'] | |
except Exception as e: | |
response_text = f"Error describing image: {str(e)}" | |
elif text_to_image_prompt: | |
try: | |
with tqdm(total=50, desc="Generating Image") as pbar: | |
images = text_to_image(text_to_image_prompt, num_inference_steps=50).images | |
for _ in range(50): | |
pbar.update(1) | |
image_path = "generated_image.png" | |
images[0].save(image_path) | |
response_text = f"Generated an image for the prompt: {text_to_image_prompt}" | |
except Exception as e: | |
response_text = f"Error generating image: {str(e)}" | |
else: | |
history.append(prompt) | |
final_prompt = "\n".join(history) | |
try: | |
with tqdm(total=100, desc="Generating Text") as pbar: | |
res = ollama.chat( | |
model="gemma2", | |
messages=[ | |
{ | |
'role': 'user', | |
'content': final_prompt | |
} | |
] | |
) | |
pbar.update(100) | |
response_text = res['message']['content'] | |
except Exception as e: | |
response_text = f"Error generating text: {str(e)}" | |
# Clean the response text for voice output | |
cleaned_response_text = clean_text(response_text) | |
with tqdm(total=100, desc="Generating Voice Output") as pbar: | |
tts = gTTS(cleaned_response_text) | |
tts.save("response.mp3") | |
pbar.update(100) | |
return response_text, "response.mp3", image_path if text_to_image_prompt else None | |
# Function to handle document summarization | |
def summarize_document(document): | |
global extracted_text # Use the global variable to store extracted text | |
try: | |
reader = PyPDF2.PdfReader(document.name) | |
full_text = "" | |
for page in reader.pages: | |
full_text += page.extract_text() | |
extracted_text = full_text # Store the extracted text | |
# Split the text into manageable chunks | |
chunk_size = 1000 # You can adjust this size based on your needs | |
chunks = [full_text[i:i + chunk_size] for i in range(0, len(full_text), chunk_size)] | |
# Initialize progress bar | |
pbar = tqdm(total=len(chunks), desc="Summarizing Document") | |
# Summarize each chunk | |
summaries = [] | |
for chunk in chunks: | |
summary = summarizer(chunk, max_length=150, min_length=30, do_sample=False)[0]['summary_text'] | |
summaries.append(summary) | |
pbar.update(1) | |
# Combine the summaries | |
combined_summary = " ".join(summaries) | |
pbar.close() | |
return combined_summary | |
except Exception as e: | |
return f"Error summarizing document: {str(e)}" | |
# Function to handle question answering | |
def answer_question(question): | |
try: | |
if not extracted_text: | |
return "Please upload a document first." | |
response = qa_model(question=question, context=extracted_text) | |
answer = response['answer'] | |
# Check if the answer is brief or insufficient | |
if len(answer.split()) < 20: # Adjust the threshold as needed | |
# Generate explanation using AI model | |
explanation_res = ollama.chat( | |
model="gemma2", | |
messages=[ | |
{ | |
'role': 'user', | |
'content': f"Why {question}?" | |
} | |
] | |
) | |
explanation = explanation_res['message']['content'] | |
return f"Answer: {answer}\nExplanation: {explanation}" | |
else: | |
return f"Answer: {answer}" | |
except Exception as e: | |
return f"Error answering question: {str(e)}" | |
# Define Gradio interface for chat functionality | |
chat_interface = gr.Interface( | |
fn=generate_response, | |
inputs=[ | |
gr.Textbox(lines=4, placeholder="Enter your Prompt", label="Text Input"), | |
gr.Image(type="filepath", label="Upload an Image"), | |
gr.Audio(type="filepath", label="Voice Input"), | |
gr.Textbox(lines=2, placeholder="Enter text to generate an image", label="Text to Image Input") | |
], | |
outputs=[ | |
"text", | |
gr.Audio(type="filepath", label="Voice Output"), | |
gr.Image(type="filepath", label="Generated Image Output") | |
], | |
title="Jarvis", | |
description="Enter a text prompt, upload an image to describe it, use your voice, or generate an image from text." | |
) | |
# Define a separate interface for document summarization | |
document_interface = gr.Interface( | |
fn=summarize_document, | |
inputs=gr.File(label="Upload a Document"), | |
outputs="text", | |
title="Document Summarizer", | |
description="Upload a document and get a summarized version of its content." | |
) | |
# Define a separate interface for question answering | |
qa_interface = gr.Interface( | |
fn=answer_question, | |
inputs=gr.Textbox(lines=2, placeholder="Enter your question", label="Question"), | |
outputs="text", | |
title="Document Question Answering", | |
description="Ask questions based on the uploaded document. If the answer is brief, an explanation will be provided." | |
) | |
# Combine all interfaces | |
combined_interface = gr.TabbedInterface( | |
[chat_interface, document_interface, qa_interface], | |
["Chat Interface", "Document Summarizer", "Document Q&A"], | |
theme="light", # Optional: Set default theme to light mode | |
title="Jarvis - AI Assistant" | |
) | |
# Launch the interface | |
combined_interface.launch(share=True) | |