Spaces:

l-tran
/

demo_app

Sleeping

App Files Files Community

demo_app / app.py

l-tran

Update app.py

96bca4f verified 13 days ago

raw

history blame contribute delete

5.53 kB

	import os
	import streamlit as st
	from sentence_transformers import SentenceTransformer, util
	from groq import Groq
	from PyPDF2 import PdfReader
	from docx import Document
	from pptx import Presentation

	# CSS styling for a professional look with black background
	st.markdown("""
	<style>
	body {
	background-color: #121212;
	color: #ffffff;
	font-family: Arial, sans-serif;
	}
	.title {
	font-size: 36px;
	font-weight: bold;
	color: #e67e22;
	text-align: center;
	margin-bottom: 20px;
	}
	.subheader {
	font-size: 24px;
	color: #f39c12;
	margin-top: 10px;
	text-align: center;
	}
	.input-area {
	color: #ecf0f1;
	font-size: 16px;
	}
	.about-app {
	margin-top: 20px;
	padding: 15px;
	background-color: #1e1e1e;
	border-radius: 8px;
	color: #bdc3c7;
	}
	.footer {
	background-color: #1c1c1c;
	color: #bdc3c7;
	font-size: 14px;
	text-align: center;
	padding: 10px;
	position: fixed;
	bottom: 0;
	left: 0;
	width: 100%;
	z-index: 1000;
	}
	.stTextInput > div > div > input {
	background-color: #2c3e50;
	color: #ecf0f1;
	font-size: 16px;
	border-radius: 5px;
	padding: 10px;
	}
	</style>
	""", unsafe_allow_html=True)

	# Initialize retriever and Groq client
	retriever = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
	#api_key = os.getenv("GROQ_API_KEY")

	#client = Groq(api_key=api_key)

	# Knowledge base (documents) and embeddings
	documents = [
	"Retrieval-Augmented Generation (RAG) is an AI framework that combines the strengths of retrieval-based and generative models.",
	"The main components of a RAG system are the retriever and the generator.",
	"A key benefit of Retrieval-Augmented Generation is that it can produce more accurate responses compared to standalone generative models.",
	"The retrieval process in a RAG system often relies on embedding-based models, like Sentence-BERT or DPR.",
	"Common use cases of RAG include chatbots, customer support systems, and knowledge retrieval for business intelligence."
	]
	document_embeddings = retriever.encode(documents, convert_to_tensor=True)

	def retrieve(query, top_k=1):
	query_embedding = retriever.encode(query, convert_to_tensor=True)
	hits = util.semantic_search(query_embedding, document_embeddings, top_k=top_k)
	top_docs = [documents[hit['corpus_id']] for hit in hits[0]]
	return top_docs[0] if hits[0] else None

	def generate_response(query, context):
	response = client.chat.completions.create(
	messages=[{
	"role": "user",
	"content": f"Context: {context} Question: {query} Answer:"
	}],
	model="gemma2-9b-it"
	)
	return response.choices[0].message.content

	# Streamlit app layout
	st.markdown('<div class="title">DocumentsReader</div>', unsafe_allow_html=True)
	# About the App section
	with st.expander("About App"):
	st.write("""
	### About the App: Document-Based RAG Question Answering
	This application, developed by Hamaad Ayub Khan, combines state-of-the-art Retrieval-Augmented Generation (RAG) technology with powerful AI models to answer questions based on the content of uploaded documents.
	Key Features:
	- Advanced Retrieval System
	- Generative Answering Capability
	- Multi-format Document Support
	- Seamless Knowledge Base Update
	- Contextually Rich Answers
	Developer Information: Hamaad Ayub Khan created this application with a commitment to making information retrieval simple, accurate, and accessible.
	Social Links:
	- [GitHub](https://github.com/hakgs1234)
	- [LinkedIn](https://linkedin.com/in/hamaadayubkhan)
	""")

	# Document upload and knowledge base update
	uploaded_file = st.file_uploader("Upload a document", type=["pdf", "docx", "pptx", "txt"])
	if uploaded_file:
	if uploaded_file.type == "application/pdf":
	file_text = PdfReader(uploaded_file).extract_text()
	elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	file_text = "\n".join([para.text for para in Document(uploaded_file).paragraphs])
	elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
	file_text = "\n".join([shape.text for slide in Presentation(uploaded_file).slides for shape in slide.shapes if hasattr(shape, "text")])
	elif uploaded_file.type == "text/plain":
	file_text = uploaded_file.read().decode("utf-8")

	documents.append(file_text)
	document_embeddings = retriever.encode(documents, convert_to_tensor=True)
	st.success("Document content successfully added to the knowledge base.")

	# Question input and output handling
	question = st.text_input("Enter your question:")

	# Check if there is a question and display the answer above the input field
	if question:
	retrieved_context = retrieve(question)
	answer = generate_response(question, retrieved_context) if retrieved_context else "I'm unable to find relevant information in the knowledge base."

	# Display the answer above the input field
	st.markdown("### Answer:")
	st.write(answer)