Spaces:

yesh987
/

Jarvis-AI_assist

Runtime error

App Files Files Community

Jarvis-AI_assist / app.py

yesh987

Update app.py

9b5968f verified 3 months ago

raw

history blame contribute delete

7.61 kB

	import os
	import re
	import gradio as gr
	import PyPDF2
	from tqdm import tqdm
	from gtts import gTTS
	from transformers import pipeline
	from diffusers import StableDiffusionPipeline
	import speech_recognition as sr
	import ollama

	# Disable Gradio analytics
	os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"

	history = []
	recognizer = sr.Recognizer()

	# Load the text-to-image pipeline
	text_to_image = StableDiffusionPipeline.from_pretrained('CompVis/stable-diffusion-v1-4')
	text_to_image.to("cuda") # If you have a CUDA-capable GPU

	# Initialize the summarization pipeline
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
	# Initialize the question-answering pipeline
	qa_model = pipeline("question-answering")

	# Variable to store extracted text from PDF
	extracted_text = ""

	def clean_text(text):
	# Remove special characters and emojis
	return re.sub(r'[^\w\s]', '', text)

	def generate_response(prompt, image_path=None, audio=None, text_to_image_prompt=None):
	if audio:
	with tqdm(total=100, desc="Processing Audio") as pbar:
	with sr.AudioFile(audio) as source:
	audio_data = recognizer.record(source)
	pbar.update(50)
	try:
	prompt = recognizer.recognize_google(audio_data)
	pbar.update(50)
	except sr.UnknownValueError:
	pbar.update(50)
	return "Sorry, I could not understand the audio.", None, None

	if image_path:
	try:
	with tqdm(total=100, desc="Describing Image") as pbar:
	res = ollama.chat(
	model="llava",
	messages=[
	{
	'role': 'user',
	'content': 'Describe this image:',
	'images': [image_path]
	}
	]
	)
	pbar.update(100)
	response_text = res['message']['content']
	except Exception as e:
	response_text = f"Error describing image: {str(e)}"
	elif text_to_image_prompt:
	try:
	with tqdm(total=50, desc="Generating Image") as pbar:
	images = text_to_image(text_to_image_prompt, num_inference_steps=50).images
	for _ in range(50):
	pbar.update(1)
	image_path = "generated_image.png"
	images[0].save(image_path)
	response_text = f"Generated an image for the prompt: {text_to_image_prompt}"
	except Exception as e:
	response_text = f"Error generating image: {str(e)}"
	else:
	history.append(prompt)
	final_prompt = "\n".join(history)
	try:
	with tqdm(total=100, desc="Generating Text") as pbar:
	res = ollama.chat(
	model="gemma2",
	messages=[
	{
	'role': 'user',
	'content': final_prompt
	}
	]
	)
	pbar.update(100)
	response_text = res['message']['content']
	except Exception as e:
	response_text = f"Error generating text: {str(e)}"

	# Clean the response text for voice output
	cleaned_response_text = clean_text(response_text)

	with tqdm(total=100, desc="Generating Voice Output") as pbar:
	tts = gTTS(cleaned_response_text)
	tts.save("response.mp3")
	pbar.update(100)

	return response_text, "response.mp3", image_path if text_to_image_prompt else None

	# Function to handle document summarization
	def summarize_document(document):
	global extracted_text # Use the global variable to store extracted text
	try:
	reader = PyPDF2.PdfReader(document.name)
	full_text = ""
	for page in reader.pages:
	full_text += page.extract_text()

	extracted_text = full_text # Store the extracted text

	# Split the text into manageable chunks
	chunk_size = 1000 # You can adjust this size based on your needs
	chunks = [full_text[i:i + chunk_size] for i in range(0, len(full_text), chunk_size)]

	# Initialize progress bar
	pbar = tqdm(total=len(chunks), desc="Summarizing Document")

	# Summarize each chunk
	summaries = []
	for chunk in chunks:
	summary = summarizer(chunk, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
	summaries.append(summary)
	pbar.update(1)

	# Combine the summaries
	combined_summary = " ".join(summaries)
	pbar.close()

	return combined_summary
	except Exception as e:
	return f"Error summarizing document: {str(e)}"

	# Function to handle question answering
	def answer_question(question):
	try:
	if not extracted_text:
	return "Please upload a document first."

	response = qa_model(question=question, context=extracted_text)
	answer = response['answer']

	# Check if the answer is brief or insufficient
	if len(answer.split()) < 20: # Adjust the threshold as needed
	# Generate explanation using AI model
	explanation_res = ollama.chat(
	model="gemma2",
	messages=[
	{
	'role': 'user',
	'content': f"Why {question}?"
	}
	]
	)
	explanation = explanation_res['message']['content']
	return f"Answer: {answer}\nExplanation: {explanation}"
	else:
	return f"Answer: {answer}"
	except Exception as e:
	return f"Error answering question: {str(e)}"

	# Define Gradio interface for chat functionality
	chat_interface = gr.Interface(
	fn=generate_response,
	inputs=[
	gr.Textbox(lines=4, placeholder="Enter your Prompt", label="Text Input"),
	gr.Image(type="filepath", label="Upload an Image"),
	gr.Audio(type="filepath", label="Voice Input"),
	gr.Textbox(lines=2, placeholder="Enter text to generate an image", label="Text to Image Input")
	],
	outputs=[
	"text",
	gr.Audio(type="filepath", label="Voice Output"),
	gr.Image(type="filepath", label="Generated Image Output")
	],
	title="Jarvis",
	description="Enter a text prompt, upload an image to describe it, use your voice, or generate an image from text."
	)

	# Define a separate interface for document summarization
	document_interface = gr.Interface(
	fn=summarize_document,
	inputs=gr.File(label="Upload a Document"),
	outputs="text",
	title="Document Summarizer",
	description="Upload a document and get a summarized version of its content."
	)

	# Define a separate interface for question answering
	qa_interface = gr.Interface(
	fn=answer_question,
	inputs=gr.Textbox(lines=2, placeholder="Enter your question", label="Question"),
	outputs="text",
	title="Document Question Answering",
	description="Ask questions based on the uploaded document. If the answer is brief, an explanation will be provided."
	)

	# Combine all interfaces
	combined_interface = gr.TabbedInterface(
	[chat_interface, document_interface, qa_interface],
	["Chat Interface", "Document Summarizer", "Document Q&A"],
	theme="light", # Optional: Set default theme to light mode
	title="Jarvis - AI Assistant"
	)

	# Launch the interface
	combined_interface.launch(share=True)