# Import required libraries import PyPDF2 from getpass import getpass from haystack.nodes import PreProcessor, PromptModel, PromptTemplate, PromptNode, AnswerParser from haystack.document_stores import InMemoryDocumentStore from haystack import Document, Pipeline from haystack.nodes import BM25Retriever from pprint import pprint import streamlit as st import logging from dotenv import load_dotenv load_dotenv() import os import logging logging.basicConfig(level=logging.DEBUG) # Function to extract text from a PDF def extract_text_from_pdf(pdf_path): text = "" with open(pdf_path, "rb") as pdf_file: pdf_reader = PyPDF2.PdfReader(pdf_file) for page_num in range(len(pdf_reader.pages)): page = pdf_reader.pages[page_num] text += page.extract_text() or "" return text # Extract text from the PDF file pdf_file_path = "Data/MR. MPROFY.pdf" pdf_text = extract_text_from_pdf(pdf_file_path) if not pdf_text: raise ValueError("No text extracted from PDF.") # Create a Haystack document doc = Document(content=pdf_text, meta={"name": "MR. MPROFY"}) # Initialize Document Store document_store = InMemoryDocumentStore(use_bm25=True) document_store.write_documents([doc]) # Initialize Retriever retriever = BM25Retriever(document_store=document_store, top_k=2) # Define QA Template qa_template = PromptTemplate( prompt=""" Hi, I'm Mprofier, your friendly AI assistant. I'm here to provide direct and concise answers to your specific questions. I won’t ask any follow-up questions myself. If I can't find the answer in the provided context, I'll simply state that I don't have enough information to answer. Context: {join(documents)}; Question: {query} Answer: """, output_parser=AnswerParser() ) # Get Huggingface token HF_TOKEN = os.getenv('HF_KEY') # Initialize Prompt Node prompt_node = PromptNode( model_name_or_path="mistralai/Mixtral-8x7B-Instruct-v0.1", api_key=HF_TOKEN, default_prompt_template=qa_template, max_length=500, model_kwargs={"model_max_length": 5000} ) # Build Pipeline rag_pipeline = Pipeline() rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"]) rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"]) # Streamlit Function for Handling Input and Displaying Output def run_streamlit_app(): st.title("Mprofier - AI Assistant") query_text = st.text_input("Enter your question:") if st.button("Get Answer"): response = rag_pipeline.run(query=query_text) answer = response["answers"][0].answer if response["answers"] else "No answer found." st.write(answer) # Start the Streamlit application if __name__ == "__main__": run_streamlit_app()