Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files- app.py +110 -0
- app1.py +58 -0
- requirements.txt +8 -0
app.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PyPDF2 import PdfReader
|
3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
+
import os
|
5 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
6 |
+
import google.generativeai as genai
|
7 |
+
from langchain.vectorstores import FAISS
|
8 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
9 |
+
from langchain.chains.question_answering import load_qa_chain
|
10 |
+
from langchain.prompts import PromptTemplate
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
|
13 |
+
load_dotenv()
|
14 |
+
os.getenv("GOOGLE_API_KEY")
|
15 |
+
#configure api key
|
16 |
+
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
## extract text from pdf files and put it on one variable
|
22 |
+
def get_pdf_text(pdf_docs):
|
23 |
+
text=""
|
24 |
+
for pdf in pdf_docs:
|
25 |
+
pdf_reader= PdfReader(pdf)
|
26 |
+
#take text from page
|
27 |
+
for page in pdf_reader.pages:
|
28 |
+
text+= page.extract_text()
|
29 |
+
return text
|
30 |
+
|
31 |
+
|
32 |
+
#break text to smaller chunks (list of chunks)
|
33 |
+
def get_text_chunks(text):
|
34 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
|
35 |
+
chunks = text_splitter.split_text(text)
|
36 |
+
return chunks
|
37 |
+
|
38 |
+
#create vectore store
|
39 |
+
def get_vector_store(text_chunks):
|
40 |
+
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
|
41 |
+
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
|
42 |
+
vector_store.save_local("faiss_index")
|
43 |
+
|
44 |
+
|
45 |
+
def get_conversational_chain():
|
46 |
+
|
47 |
+
prompt_template = """
|
48 |
+
Answer the question using all the details you have. If the information isn't there just say, "answer is not available in the context", don't provide the wrong answer\n\n
|
49 |
+
Context:\n {context}?\n
|
50 |
+
Question: \n{question}\n
|
51 |
+
|
52 |
+
Answer:
|
53 |
+
"""
|
54 |
+
|
55 |
+
model = ChatGoogleGenerativeAI(model="gemini-pro",
|
56 |
+
temperature=0.3)
|
57 |
+
|
58 |
+
prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
|
59 |
+
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
60 |
+
|
61 |
+
return chain
|
62 |
+
|
63 |
+
|
64 |
+
|
65 |
+
def user_input(user_question):
|
66 |
+
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
|
67 |
+
|
68 |
+
new_db = FAISS.load_local("faiss_index", embeddings)
|
69 |
+
docs = new_db.similarity_search(user_question)
|
70 |
+
|
71 |
+
chain = get_conversational_chain()
|
72 |
+
|
73 |
+
|
74 |
+
response = chain(
|
75 |
+
{"input_documents":docs, "question": user_question}
|
76 |
+
, return_only_outputs=True)
|
77 |
+
|
78 |
+
print(response)
|
79 |
+
st.write("Reply: ", response["output_text"])
|
80 |
+
|
81 |
+
##example of response
|
82 |
+
#{'output_text': "Qui utilise des codes mnémotechniques. Pour les codes opérations, on préfère des code s plus suggestif. On crée une table de cor respondance entre code \nopération de l'ordinateur, et des symboles lisibles. L'écriture d'un programme en langage assembleur exige une connaissance par faite du fonctionnement \ninterne de l'ordinateur (quelles sont les mémoires disponibles , etc.). Le langage assembleur permet à l'utilisateur de maîtrise r complètement le \nfonctionnement de son programme dans une machine déterminée. Mais : le programme est difficile à lire , et à corri ger. Le résultat est s pécifique à une pg pg , g pq\nmachine déterminée et donc non utilisable sur une machine différente."}
|
83 |
+
|
84 |
+
|
85 |
+
|
86 |
+
|
87 |
+
|
88 |
+
def main():
|
89 |
+
st.set_page_config("Chat PDF")
|
90 |
+
st.header("Chat with PDF")
|
91 |
+
|
92 |
+
user_question = st.text_input("Ask a Question from the PDF Files")
|
93 |
+
|
94 |
+
if user_question:
|
95 |
+
user_input(user_question)
|
96 |
+
|
97 |
+
with st.sidebar:
|
98 |
+
st.title("Menu:")
|
99 |
+
pdf_docs = st.file_uploader("Upload PDF Files", accept_multiple_files=True)
|
100 |
+
if st.button("Submit & Process"):
|
101 |
+
with st.spinner("Processing..."):
|
102 |
+
raw_text = get_pdf_text(pdf_docs)
|
103 |
+
text_chunks = get_text_chunks(raw_text)
|
104 |
+
get_vector_store(text_chunks)
|
105 |
+
st.success("Done")
|
106 |
+
|
107 |
+
|
108 |
+
|
109 |
+
if __name__ == "__main__":
|
110 |
+
main()
|
app1.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import streamlit as st
|
3 |
+
from streamlit_chat import message
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
from langchain.chat_models import ChatOpenAI
|
6 |
+
from langchain.schema import SystemMessage, HumanMessage,AIMessage
|
7 |
+
|
8 |
+
##################################################################################
|
9 |
+
##variable d'environement
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
##################################################################################
|
13 |
+
##streamlit page
|
14 |
+
def start():
|
15 |
+
st.set_page_config(page_title="heeloo")
|
16 |
+
st.header('chatbot')
|
17 |
+
st.divider()
|
18 |
+
start()
|
19 |
+
|
20 |
+
#message('how are you')
|
21 |
+
#message('how are you', is_user=True)
|
22 |
+
|
23 |
+
|
24 |
+
###################################################################################
|
25 |
+
def main():
|
26 |
+
|
27 |
+
chat = ChatOpenAI(temperature='0.5')
|
28 |
+
|
29 |
+
|
30 |
+
if "messages" not in st.session_state:
|
31 |
+
st.session_state.messages=[
|
32 |
+
SystemMessage(content="You are an assistant")
|
33 |
+
]
|
34 |
+
#create input
|
35 |
+
input_user= st.text_input("Ask Question ", placeholder ="Ask Question")
|
36 |
+
#create button
|
37 |
+
button=st.button('Ask' ,use_container_width=True)
|
38 |
+
|
39 |
+
if button:
|
40 |
+
|
41 |
+
#add message to message history
|
42 |
+
st.session_state.messages.append(HumanMessage(content=input_user))
|
43 |
+
#send message history to chat
|
44 |
+
resposne =chat(st.session_state.messages)
|
45 |
+
#display response
|
46 |
+
message(resposne, is_user=False)
|
47 |
+
#add response to message hisotry
|
48 |
+
st.session_state.messages.append(AIMessage(content=resposne.content))
|
49 |
+
#get list of messages
|
50 |
+
messages = st.session_state.get('messages',[])
|
51 |
+
#:1 start with index 1
|
52 |
+
for i, msg in enumerate(messages):
|
53 |
+
if i%2 ==0 :
|
54 |
+
message(msg.content, is_user=True,key=str(i)+'_user')
|
55 |
+
else:
|
56 |
+
message(msg.content, is_user=False,key=str(i)+'_ai')
|
57 |
+
|
58 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
streamlit
|
3 |
+
openai
|
4 |
+
python-dotenv
|
5 |
+
PyPDF2
|
6 |
+
faiss-cpu
|
7 |
+
google-generativeai
|
8 |
+
langchain_google_genai
|