Spaces:
Runtime error
Runtime error
File size: 7,608 Bytes
ab0378c 9b5968f ab0378c 9b5968f ab0378c 9b5968f ab0378c cf3232f ab0378c b88d6a3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
import os
import re
import gradio as gr
import PyPDF2
from tqdm import tqdm
from gtts import gTTS
from transformers import pipeline
from diffusers import StableDiffusionPipeline
import speech_recognition as sr
import ollama
# Disable Gradio analytics
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
history = []
recognizer = sr.Recognizer()
# Load the text-to-image pipeline
text_to_image = StableDiffusionPipeline.from_pretrained('CompVis/stable-diffusion-v1-4')
text_to_image.to("cuda") # If you have a CUDA-capable GPU
# Initialize the summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Initialize the question-answering pipeline
qa_model = pipeline("question-answering")
# Variable to store extracted text from PDF
extracted_text = ""
def clean_text(text):
# Remove special characters and emojis
return re.sub(r'[^\w\s]', '', text)
def generate_response(prompt, image_path=None, audio=None, text_to_image_prompt=None):
if audio:
with tqdm(total=100, desc="Processing Audio") as pbar:
with sr.AudioFile(audio) as source:
audio_data = recognizer.record(source)
pbar.update(50)
try:
prompt = recognizer.recognize_google(audio_data)
pbar.update(50)
except sr.UnknownValueError:
pbar.update(50)
return "Sorry, I could not understand the audio.", None, None
if image_path:
try:
with tqdm(total=100, desc="Describing Image") as pbar:
res = ollama.chat(
model="llava",
messages=[
{
'role': 'user',
'content': 'Describe this image:',
'images': [image_path]
}
]
)
pbar.update(100)
response_text = res['message']['content']
except Exception as e:
response_text = f"Error describing image: {str(e)}"
elif text_to_image_prompt:
try:
with tqdm(total=50, desc="Generating Image") as pbar:
images = text_to_image(text_to_image_prompt, num_inference_steps=50).images
for _ in range(50):
pbar.update(1)
image_path = "generated_image.png"
images[0].save(image_path)
response_text = f"Generated an image for the prompt: {text_to_image_prompt}"
except Exception as e:
response_text = f"Error generating image: {str(e)}"
else:
history.append(prompt)
final_prompt = "\n".join(history)
try:
with tqdm(total=100, desc="Generating Text") as pbar:
res = ollama.chat(
model="gemma2",
messages=[
{
'role': 'user',
'content': final_prompt
}
]
)
pbar.update(100)
response_text = res['message']['content']
except Exception as e:
response_text = f"Error generating text: {str(e)}"
# Clean the response text for voice output
cleaned_response_text = clean_text(response_text)
with tqdm(total=100, desc="Generating Voice Output") as pbar:
tts = gTTS(cleaned_response_text)
tts.save("response.mp3")
pbar.update(100)
return response_text, "response.mp3", image_path if text_to_image_prompt else None
# Function to handle document summarization
def summarize_document(document):
global extracted_text # Use the global variable to store extracted text
try:
reader = PyPDF2.PdfReader(document.name)
full_text = ""
for page in reader.pages:
full_text += page.extract_text()
extracted_text = full_text # Store the extracted text
# Split the text into manageable chunks
chunk_size = 1000 # You can adjust this size based on your needs
chunks = [full_text[i:i + chunk_size] for i in range(0, len(full_text), chunk_size)]
# Initialize progress bar
pbar = tqdm(total=len(chunks), desc="Summarizing Document")
# Summarize each chunk
summaries = []
for chunk in chunks:
summary = summarizer(chunk, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
summaries.append(summary)
pbar.update(1)
# Combine the summaries
combined_summary = " ".join(summaries)
pbar.close()
return combined_summary
except Exception as e:
return f"Error summarizing document: {str(e)}"
# Function to handle question answering
def answer_question(question):
try:
if not extracted_text:
return "Please upload a document first."
response = qa_model(question=question, context=extracted_text)
answer = response['answer']
# Check if the answer is brief or insufficient
if len(answer.split()) < 20: # Adjust the threshold as needed
# Generate explanation using AI model
explanation_res = ollama.chat(
model="gemma2",
messages=[
{
'role': 'user',
'content': f"Why {question}?"
}
]
)
explanation = explanation_res['message']['content']
return f"Answer: {answer}\nExplanation: {explanation}"
else:
return f"Answer: {answer}"
except Exception as e:
return f"Error answering question: {str(e)}"
# Define Gradio interface for chat functionality
chat_interface = gr.Interface(
fn=generate_response,
inputs=[
gr.Textbox(lines=4, placeholder="Enter your Prompt", label="Text Input"),
gr.Image(type="filepath", label="Upload an Image"),
gr.Audio(type="filepath", label="Voice Input"),
gr.Textbox(lines=2, placeholder="Enter text to generate an image", label="Text to Image Input")
],
outputs=[
"text",
gr.Audio(type="filepath", label="Voice Output"),
gr.Image(type="filepath", label="Generated Image Output")
],
title="Jarvis",
description="Enter a text prompt, upload an image to describe it, use your voice, or generate an image from text."
)
# Define a separate interface for document summarization
document_interface = gr.Interface(
fn=summarize_document,
inputs=gr.File(label="Upload a Document"),
outputs="text",
title="Document Summarizer",
description="Upload a document and get a summarized version of its content."
)
# Define a separate interface for question answering
qa_interface = gr.Interface(
fn=answer_question,
inputs=gr.Textbox(lines=2, placeholder="Enter your question", label="Question"),
outputs="text",
title="Document Question Answering",
description="Ask questions based on the uploaded document. If the answer is brief, an explanation will be provided."
)
# Combine all interfaces
combined_interface = gr.TabbedInterface(
[chat_interface, document_interface, qa_interface],
["Chat Interface", "Document Summarizer", "Document Q&A"],
theme="light", # Optional: Set default theme to light mode
title="Jarvis - AI Assistant"
)
# Launch the interface
combined_interface.launch(share=True)
|