Maxx0 commited on
Commit
fcecc27
1 Parent(s): d1be27a
Files changed (1) hide show
  1. app.py +82 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import required libraries
2
+ import PyPDF2
3
+ from getpass import getpass
4
+ from haystack.nodes import PreProcessor, PromptModel, PromptTemplate, PromptNode, AnswerParser
5
+ from haystack.document_stores import InMemoryDocumentStore
6
+ from haystack import Document, Pipeline
7
+ from haystack.nodes import BM25Retriever
8
+ from pprint import pprint
9
+ import streamlit as st
10
+ import logging
11
+ from dotenv import load_dotenv
12
+ load_dotenv()
13
+
14
+ import logging
15
+ logging.basicConfig(level=logging.DEBUG)
16
+
17
+ # Function to extract text from a PDF
18
+ def extract_text_from_pdf(pdf_path):
19
+ text = ""
20
+ with open(pdf_path, "rb") as pdf_file:
21
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
22
+ for page_num in range(len(pdf_reader.pages)):
23
+ page = pdf_reader.pages[page_num]
24
+ text += page.extract_text() or ""
25
+ return text
26
+
27
+ # Extract text from the PDF file
28
+ pdf_file_path = "Data/MR. MPROFY.pdf"
29
+ pdf_text = extract_text_from_pdf(pdf_file_path)
30
+ if not pdf_text:
31
+ raise ValueError("No text extracted from PDF.")
32
+
33
+ # Create a Haystack document
34
+ doc = Document(content=pdf_text, meta={"name": "MR. MPROFY"})
35
+
36
+ # Initialize Document Store
37
+ document_store = InMemoryDocumentStore(use_bm25=True)
38
+ document_store.write_documents([doc])
39
+
40
+ # Initialize Retriever
41
+ retriever = BM25Retriever(document_store=document_store, top_k=2)
42
+
43
+ # Define QA Template
44
+ qa_template = PromptTemplate(
45
+ prompt="""
46
+ Hi, I'm Mprofier, your friendly AI assistant. I'm here to provide direct and concise answers to your specific questions.
47
+ I won’t ask any follow-up questions myself.
48
+ If I can't find the answer in the provided context, I'll simply state that I don't have enough information to answer.
49
+ Context: {join(documents)};
50
+ Question: {query}
51
+ Answer:
52
+ """,
53
+ output_parser=AnswerParser()
54
+ )
55
+
56
+ # Initialize Prompt Node
57
+ prompt_node = PromptNode(
58
+ model_name_or_path="mistralai/Mixtral-8x7B-Instruct-v0.1",
59
+ api_key=HF_TOKEN,
60
+ default_prompt_template=qa_template,
61
+ max_length=500,
62
+ model_kwargs={"model_max_length": 5000}
63
+ )
64
+
65
+ # Build Pipeline
66
+ rag_pipeline = Pipeline()
67
+ rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"])
68
+ rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])
69
+
70
+ # Streamlit Function for Handling Input and Displaying Output
71
+ def run_streamlit_app():
72
+ st.title("Mprofier - AI Assistant")
73
+ query_text = st.text_input("Enter your question:")
74
+
75
+ if st.button("Get Answer"):
76
+ response = rag_pipeline.run(query=query_text)
77
+ answer = response["answers"][0].answer if response["answers"] else "No answer found."
78
+ st.write(answer)
79
+
80
+ # Start the Streamlit application
81
+ if __name__ == "__main__":
82
+ run_streamlit_app()