Spaces:
Sleeping
Sleeping
File size: 2,781 Bytes
180ff94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# Import required libraries
import PyPDF2
from getpass import getpass
from haystack.nodes import PreProcessor, PromptModel, PromptTemplate, PromptNode, AnswerParser
from haystack.document_stores import InMemoryDocumentStore
from haystack import Document, Pipeline
from haystack.nodes import BM25Retriever
from pprint import pprint
import streamlit as st
import logging
from dotenv import load_dotenv
load_dotenv()
import os
import logging
logging.basicConfig(level=logging.DEBUG)
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_path):
text = ""
with open(pdf_path, "rb") as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text() or ""
return text
# Extract text from the PDF file
pdf_file_path = "Data/MR. MPROFY.pdf"
pdf_text = extract_text_from_pdf(pdf_file_path)
if not pdf_text:
raise ValueError("No text extracted from PDF.")
# Create a Haystack document
doc = Document(content=pdf_text, meta={"name": "MR. MPROFY"})
# Initialize Document Store
document_store = InMemoryDocumentStore(use_bm25=True)
document_store.write_documents([doc])
# Initialize Retriever
retriever = BM25Retriever(document_store=document_store, top_k=2)
# Define QA Template
qa_template = PromptTemplate(
prompt="""
Hi, I'm Mprofier, your friendly AI assistant. I'm here to provide direct and concise answers to your specific questions.
I won’t ask any follow-up questions myself.
If I can't find the answer in the provided context, I'll simply state that I don't have enough information to answer.
Context: {join(documents)};
Question: {query}
Answer:
""",
output_parser=AnswerParser()
)
# Get Huggingface token
HF_TOKEN = os.getenv('HF_KEY')
# Initialize Prompt Node
prompt_node = PromptNode(
model_name_or_path="mistralai/Mixtral-8x7B-Instruct-v0.1",
api_key=HF_TOKEN,
default_prompt_template=qa_template,
max_length=500,
model_kwargs={"model_max_length": 5000}
)
# Build Pipeline
rag_pipeline = Pipeline()
rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"])
rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])
# Streamlit Function for Handling Input and Displaying Output
def run_streamlit_app():
st.title("Mprofier - AI Assistant")
query_text = st.text_input("Enter your question:")
if st.button("Get Answer"):
response = rag_pipeline.run(query=query_text)
answer = response["answers"][0].answer if response["answers"] else "No answer found."
st.write(answer)
# Start the Streamlit application
if __name__ == "__main__":
run_streamlit_app()
|