AbdallaouiMed commited on
Commit
c0468f1
1 Parent(s): 3a416cd

Delete ask-multiple-pdfs-main

Browse files
ask-multiple-pdfs-main/.DS_Store DELETED
Binary file (6.15 kB)
 
ask-multiple-pdfs-main/.gitignore DELETED
@@ -1,160 +0,0 @@
1
- # Byte-compiled / optimized / DLL files
2
- __pycache__/
3
- *.py[cod]
4
- *$py.class
5
-
6
- # C extensions
7
- *.so
8
-
9
- # Distribution / packaging
10
- .Python
11
- build/
12
- develop-eggs/
13
- dist/
14
- downloads/
15
- eggs/
16
- .eggs/
17
- lib/
18
- lib64/
19
- parts/
20
- sdist/
21
- var/
22
- wheels/
23
- share/python-wheels/
24
- *.egg-info/
25
- .installed.cfg
26
- *.egg
27
- MANIFEST
28
-
29
- # PyInstaller
30
- # Usually these files are written by a python script from a template
31
- # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
- *.manifest
33
- *.spec
34
-
35
- # Installer logs
36
- pip-log.txt
37
- pip-delete-this-directory.txt
38
-
39
- # Unit test / coverage reports
40
- htmlcov/
41
- .tox/
42
- .nox/
43
- .coverage
44
- .coverage.*
45
- .cache
46
- nosetests.xml
47
- coverage.xml
48
- *.cover
49
- *.py,cover
50
- .hypothesis/
51
- .pytest_cache/
52
- cover/
53
-
54
- # Translations
55
- *.mo
56
- *.pot
57
-
58
- # Django stuff:
59
- *.log
60
- local_settings.py
61
- db.sqlite3
62
- db.sqlite3-journal
63
-
64
- # Flask stuff:
65
- instance/
66
- .webassets-cache
67
-
68
- # Scrapy stuff:
69
- .scrapy
70
-
71
- # Sphinx documentation
72
- docs/_build/
73
-
74
- # PyBuilder
75
- .pybuilder/
76
- target/
77
-
78
- # Jupyter Notebook
79
- .ipynb_checkpoints
80
-
81
- # IPython
82
- profile_default/
83
- ipython_config.py
84
-
85
- # pyenv
86
- # For a library or package, you might want to ignore these files since the code is
87
- # intended to run in multiple environments; otherwise, check them in:
88
- # .python-version
89
-
90
- # pipenv
91
- # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
- # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
- # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
- # install all needed dependencies.
95
- #Pipfile.lock
96
-
97
- # poetry
98
- # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
- # This is especially recommended for binary packages to ensure reproducibility, and is more
100
- # commonly ignored for libraries.
101
- # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
- #poetry.lock
103
-
104
- # pdm
105
- # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
- #pdm.lock
107
- # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
- # in version control.
109
- # https://pdm.fming.dev/#use-with-ide
110
- .pdm.toml
111
-
112
- # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
- __pypackages__/
114
-
115
- # Celery stuff
116
- celerybeat-schedule
117
- celerybeat.pid
118
-
119
- # SageMath parsed files
120
- *.sage.py
121
-
122
- # Environments
123
- .env
124
- .venv
125
- env/
126
- venv/
127
- ENV/
128
- env.bak/
129
- venv.bak/
130
-
131
- # Spyder project settings
132
- .spyderproject
133
- .spyproject
134
-
135
- # Rope project settings
136
- .ropeproject
137
-
138
- # mkdocs documentation
139
- /site
140
-
141
- # mypy
142
- .mypy_cache/
143
- .dmypy.json
144
- dmypy.json
145
-
146
- # Pyre type checker
147
- .pyre/
148
-
149
- # pytype static type analyzer
150
- .pytype/
151
-
152
- # Cython debug symbols
153
- cython_debug/
154
-
155
- # PyCharm
156
- # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
- # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
- # and can be added to the global gitignore or merged into this file. For a more nuclear
159
- # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
- .idea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ask-multiple-pdfs-main/Pipfile DELETED
@@ -1,19 +0,0 @@
1
- [[source]]
2
- url = "https://pypi.org/simple"
3
- verify_ssl = true
4
- name = "pypi"
5
-
6
- [packages]
7
- langchain = "*"
8
- pypdf2 = "==3.0.1"
9
- python-dotenv = "==1.0.0"
10
- streamlit = "==1.18.1"
11
- openai = "==0.27.6"
12
- faiss-cpu = "==1.7.4"
13
- altair = "==4"
14
- tiktoken = "==0.4.0"
15
-
16
- [dev-packages]
17
-
18
- [requires]
19
- python_version = "3.10"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ask-multiple-pdfs-main/Pipfile.lock DELETED
The diff for this file is too large to render. See raw diff
 
ask-multiple-pdfs-main/__pycache__/htmlTemplates.cpython-39.pyc DELETED
Binary file (1.03 kB)
 
ask-multiple-pdfs-main/app.py DELETED
@@ -1,105 +0,0 @@
1
- import streamlit as st
2
- from dotenv import load_dotenv
3
- from PyPDF2 import PdfReader
4
- from langchain.text_splitter import CharacterTextSplitter
5
- from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
6
- from langchain.vectorstores import FAISS
7
- from langchain.chat_models import ChatOpenAI
8
- from langchain.memory import ConversationBufferMemory
9
- from langchain.chains import ConversationalRetrievalChain
10
- from htmlTemplates import css, bot_template, user_template
11
- from langchain.llms import HuggingFaceHub
12
-
13
- def get_pdf_text(pdf_docs):
14
- text = ""
15
- for pdf in pdf_docs:
16
- pdf_reader = PdfReader(pdf)
17
- for page in pdf_reader.pages:
18
- text += page.extract_text()
19
- return text
20
-
21
-
22
- def get_text_chunks(text):
23
- text_splitter = CharacterTextSplitter(
24
- separator="\n",
25
- chunk_size=1000,
26
- chunk_overlap=200,
27
- length_function=len
28
- )
29
- chunks = text_splitter.split_text(text)
30
- return chunks
31
-
32
-
33
- def get_vectorstore(text_chunks):
34
- # embeddings = OpenAIEmbeddings()
35
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
36
- vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
37
- return vectorstore
38
-
39
-
40
- def get_conversation_chain(vectorstore):
41
- # llm = ChatOpenAI()
42
- llm = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.5, "max_length":512})
43
-
44
- memory = ConversationBufferMemory(
45
- memory_key='chat_history', return_messages=True)
46
- conversation_chain = ConversationalRetrievalChain.from_llm(
47
- llm=llm,
48
- retriever=vectorstore.as_retriever(),
49
- memory=memory
50
- )
51
- return conversation_chain
52
-
53
-
54
- def handle_userinput(user_question):
55
- response = st.session_state.conversation({'question': user_question})
56
- # st.session_state.chat_history = response['chat_history']
57
-
58
- for i, message in enumerate(st.session_state.chat_history):
59
- if i % 2 == 0:
60
- st.write(user_template.replace(
61
- "{{MSG}}", message.content), unsafe_allow_html=True)
62
- else:
63
- st.write(bot_template.replace(
64
- "{{MSG}}", message.content), unsafe_allow_html=True)
65
-
66
-
67
- def main():
68
- load_dotenv()
69
- st.set_page_config(page_title="Chat with multiple PDFs",
70
- page_icon=":books:")
71
- st.write(css, unsafe_allow_html=True)
72
-
73
- if "conversation" not in st.session_state:
74
- st.session_state.conversation = None
75
- if "chat_history" not in st.session_state:
76
- st.session_state.chat_history = None
77
-
78
- st.header("Chat with multiple PDFs :books:")
79
- user_question = st.text_input("Ask a question about your documents:")
80
- if user_question:
81
- handle_userinput(user_question)
82
-
83
- with st.sidebar:
84
- st.subheader("Your documents")
85
- pdf_docs = st.file_uploader(
86
- "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
87
- if st.button("Process"):
88
- with st.spinner("Processing"):
89
- # get pdf text
90
- raw_text = get_pdf_text(pdf_docs)
91
-
92
- # get the text chunks
93
- text_chunks = get_text_chunks(raw_text)
94
-
95
- # create vector store
96
- vectorstore = get_vectorstore(text_chunks)
97
-
98
- # create conversation chain
99
- st.session_state.conversation = get_conversation_chain(
100
- vectorstore)
101
- st.write("Processed Successfuly")
102
-
103
-
104
- if __name__ == '__main__':
105
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ask-multiple-pdfs-main/htmlTemplates.py DELETED
@@ -1,44 +0,0 @@
1
- css = '''
2
- <style>
3
- .chat-message {
4
- padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
5
- }
6
- .chat-message.user {
7
- background-color: #2b313e
8
- }
9
- .chat-message.bot {
10
- background-color: #475063
11
- }
12
- .chat-message .avatar {
13
- width: 20%;
14
- }
15
- .chat-message .avatar img {
16
- max-width: 78px;
17
- max-height: 78px;
18
- border-radius: 50%;
19
- object-fit: cover;
20
- }
21
- .chat-message .message {
22
- width: 80%;
23
- padding: 0 1.5rem;
24
- color: #fff;
25
- }
26
- '''
27
-
28
- bot_template = '''
29
- <div class="chat-message bot">
30
- <div class="avatar">
31
- <img src="/Users/mohamedabdallaoui/Desktop/ayoub_bot.png">
32
- </div>
33
- <div class="message">{{MSG}}</div>
34
- </div>
35
- '''
36
-
37
- user_template = '''
38
- <div class="chat-message user">
39
- <div class="avatar">
40
- <img src="/Users/mohamedabdallaoui/Desktop/mohamed_bot.png">
41
- </div>
42
- <div class="message">{{MSG}}</div>
43
- </div>
44
- '''