|
import os |
|
import streamlit as st |
|
from sentence_transformers import SentenceTransformer, util |
|
from groq import Groq |
|
from PyPDF2 import PdfReader |
|
from docx import Document |
|
from pptx import Presentation |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
body { |
|
background-color: #121212; |
|
color: #ffffff; |
|
font-family: Arial, sans-serif; |
|
} |
|
.title { |
|
font-size: 36px; |
|
font-weight: bold; |
|
color: #e67e22; |
|
text-align: center; |
|
margin-bottom: 20px; |
|
} |
|
.subheader { |
|
font-size: 24px; |
|
color: #f39c12; |
|
margin-top: 10px; |
|
text-align: center; |
|
} |
|
.input-area { |
|
color: #ecf0f1; |
|
font-size: 16px; |
|
} |
|
.about-app { |
|
margin-top: 20px; |
|
padding: 15px; |
|
background-color: #1e1e1e; |
|
border-radius: 8px; |
|
color: #bdc3c7; |
|
} |
|
.footer { |
|
background-color: #1c1c1c; |
|
color: #bdc3c7; |
|
font-size: 14px; |
|
text-align: center; |
|
padding: 10px; |
|
position: fixed; |
|
bottom: 0; |
|
left: 0; |
|
width: 100%; |
|
z-index: 1000; |
|
} |
|
.stTextInput > div > div > input { |
|
background-color: #2c3e50; |
|
color: #ecf0f1; |
|
font-size: 16px; |
|
border-radius: 5px; |
|
padding: 10px; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
retriever = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') |
|
|
|
|
|
|
|
|
|
|
|
documents = [ |
|
"Retrieval-Augmented Generation (RAG) is an AI framework that combines the strengths of retrieval-based and generative models.", |
|
"The main components of a RAG system are the retriever and the generator.", |
|
"A key benefit of Retrieval-Augmented Generation is that it can produce more accurate responses compared to standalone generative models.", |
|
"The retrieval process in a RAG system often relies on embedding-based models, like Sentence-BERT or DPR.", |
|
"Common use cases of RAG include chatbots, customer support systems, and knowledge retrieval for business intelligence." |
|
] |
|
document_embeddings = retriever.encode(documents, convert_to_tensor=True) |
|
|
|
def retrieve(query, top_k=1): |
|
query_embedding = retriever.encode(query, convert_to_tensor=True) |
|
hits = util.semantic_search(query_embedding, document_embeddings, top_k=top_k) |
|
top_docs = [documents[hit['corpus_id']] for hit in hits[0]] |
|
return top_docs[0] if hits[0] else None |
|
|
|
def generate_response(query, context): |
|
response = client.chat.completions.create( |
|
messages=[{ |
|
"role": "user", |
|
"content": f"Context: {context} Question: {query} Answer:" |
|
}], |
|
model="gemma2-9b-it" |
|
) |
|
return response.choices[0].message.content |
|
|
|
|
|
st.markdown('<div class="title">DocumentsReader</div>', unsafe_allow_html=True) |
|
|
|
with st.expander("About App"): |
|
st.write(""" |
|
### About the App: Document-Based RAG Question Answering |
|
This application, developed by **Hamaad Ayub Khan**, combines state-of-the-art **Retrieval-Augmented Generation (RAG)** technology with powerful AI models to answer questions based on the content of uploaded documents. |
|
**Key Features:** |
|
- Advanced Retrieval System |
|
- Generative Answering Capability |
|
- Multi-format Document Support |
|
- Seamless Knowledge Base Update |
|
- Contextually Rich Answers |
|
**Developer Information:** Hamaad Ayub Khan created this application with a commitment to making information retrieval simple, accurate, and accessible. |
|
**Social Links:** |
|
- [GitHub](https://github.com/hakgs1234) |
|
- [LinkedIn](https://linkedin.com/in/hamaadayubkhan) |
|
""") |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a document", type=["pdf", "docx", "pptx", "txt"]) |
|
if uploaded_file: |
|
if uploaded_file.type == "application/pdf": |
|
file_text = PdfReader(uploaded_file).extract_text() |
|
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": |
|
file_text = "\n".join([para.text for para in Document(uploaded_file).paragraphs]) |
|
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.presentationml.presentation": |
|
file_text = "\n".join([shape.text for slide in Presentation(uploaded_file).slides for shape in slide.shapes if hasattr(shape, "text")]) |
|
elif uploaded_file.type == "text/plain": |
|
file_text = uploaded_file.read().decode("utf-8") |
|
|
|
documents.append(file_text) |
|
document_embeddings = retriever.encode(documents, convert_to_tensor=True) |
|
st.success("Document content successfully added to the knowledge base.") |
|
|
|
|
|
question = st.text_input("Enter your question:") |
|
|
|
|
|
if question: |
|
retrieved_context = retrieve(question) |
|
answer = generate_response(question, retrieved_context) if retrieved_context else "I'm unable to find relevant information in the knowledge base." |
|
|
|
|
|
st.markdown("### Answer:") |
|
st.write(answer) |