farhananis005 commited on
Commit
74f3f5d
1 Parent(s): 52b40c8

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +208 -0
  2. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Lawyer GPT
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1RPc_qH7s0_hsOswGpWRFaXbLT3eBIShJ
8
+ """
9
+
10
+ !pip install langchain
11
+ !pip install langchain-openai
12
+ !pip install PyPDF2
13
+ !pip install pypdf
14
+ !pip install docx2txt
15
+ !pip install unstructured
16
+ !pip install gradio
17
+ !pip install faiss-cpu
18
+ !pip install openai
19
+ !pip install tiktoken
20
+
21
+ import os
22
+ import openai
23
+
24
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
25
+ os.environ["OPENAI_API_KEY"] = "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"
26
+ openai.api_key = "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"
27
+
28
+ def save_docs(docs):
29
+
30
+ import shutil
31
+ import os
32
+
33
+ output_dir="/content/docs/"
34
+
35
+ if os.path.exists(output_dir):
36
+ shutil.rmtree(output_dir)
37
+
38
+ if not os.path.exists(output_dir):
39
+ os.makedirs(output_dir)
40
+
41
+ for doc in docs:
42
+ shutil.copy(doc.name, output_dir)
43
+
44
+ return "Successful!"
45
+
46
+ def process_docs():
47
+
48
+ from langchain.document_loaders import PyPDFLoader
49
+ from langchain.document_loaders import DirectoryLoader
50
+ from langchain.document_loaders import TextLoader
51
+ from langchain.document_loaders import Docx2txtLoader
52
+ from langchain.document_loaders.csv_loader import CSVLoader
53
+ from langchain.document_loaders import UnstructuredExcelLoader
54
+ from langchain.vectorstores import FAISS
55
+ from langchain_openai import OpenAIEmbeddings
56
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
57
+
58
+ loader1 = DirectoryLoader('/content/docs/', glob="./*.pdf", loader_cls=PyPDFLoader)
59
+ document1 = loader1.load()
60
+
61
+ loader2 = DirectoryLoader('/content/docs/', glob="./*.txt", loader_cls=TextLoader)
62
+ document2 = loader2.load()
63
+
64
+ loader3 = DirectoryLoader('/content/docs/', glob="./*.docx", loader_cls=Docx2txtLoader)
65
+ document3 = loader3.load()
66
+
67
+ loader4 = DirectoryLoader('/content/docs/', glob="./*.csv", loader_cls=CSVLoader)
68
+ document4 = loader4.load()
69
+
70
+ loader5 = DirectoryLoader('/content/docs/', glob="./*.xlsx", loader_cls=UnstructuredExcelLoader)
71
+ document5 = loader5.load()
72
+
73
+ document1.extend(document2)
74
+ document1.extend(document3)
75
+ document1.extend(document4)
76
+ document1.extend(document5)
77
+
78
+ text_splitter = RecursiveCharacterTextSplitter(
79
+ chunk_size=1000,
80
+ chunk_overlap=200,
81
+ length_function=len
82
+ )
83
+
84
+ docs = text_splitter.split_documents(document1)
85
+ embeddings = OpenAIEmbeddings()
86
+
87
+ docs_db = FAISS.from_documents(docs, embeddings)
88
+ docs_db.save_local("/content/docs_db/")
89
+
90
+ return "Successful!"
91
+
92
+ global agent
93
+
94
+ def create_agent():
95
+
96
+ from langchain_openai import ChatOpenAI
97
+ from langchain.chains.conversation.memory import ConversationSummaryBufferMemory
98
+ from langchain.chains import ConversationChain
99
+ global agent
100
+
101
+ llm = ChatOpenAI(model_name='gpt-3.5-turbo-16k')
102
+ memory = ConversationSummaryBufferMemory(llm=llm, max_token_limit=1000)
103
+ agent = ConversationChain(llm=llm, memory=memory, verbose=True)
104
+
105
+ return "Successful!"
106
+
107
+ def formatted_response(docs, question, response, state):
108
+
109
+ formatted_output = response + "\n\nSources"
110
+
111
+ for i, doc in enumerate(docs):
112
+ source_info = doc.metadata.get('source', 'Unknown source')
113
+ page_info = doc.metadata.get('page', None)
114
+
115
+ doc_name = source_info.split('/')[-1].strip()
116
+
117
+ if page_info is not None:
118
+ formatted_output += f"\n{doc_name}\tpage no {page_info}"
119
+ else:
120
+ formatted_output += f"\n{doc_name}"
121
+
122
+ state.append((question, formatted_output))
123
+ return state, state
124
+
125
+ def search_docs(prompt, question, state):
126
+
127
+ from langchain_openai import OpenAIEmbeddings
128
+ from langchain.vectorstores import FAISS
129
+ from langchain.callbacks import get_openai_callback
130
+ global agent
131
+ agent = agent
132
+
133
+ state = state or []
134
+
135
+ embeddings = OpenAIEmbeddings()
136
+ docs_db = FAISS.load_local("/content/docs_db/", embeddings, allow_dangerous_deserialization = True)
137
+ docs = docs_db.similarity_search(question)
138
+
139
+ prompt += "\n\n"
140
+ prompt += question
141
+ prompt += "\n\n"
142
+ prompt += str(docs)
143
+
144
+ with get_openai_callback() as cb:
145
+ response = agent.predict(input=prompt)
146
+ print(cb)
147
+
148
+ return formatted_response(docs, question, response, state)
149
+
150
+ import gradio as gr
151
+
152
+ css = """
153
+ .col{
154
+ max-width: 75%;
155
+ margin: 0 auto;
156
+ display: flex;
157
+ flex-direction: column;
158
+ justify-content: center;
159
+ align-items: center;
160
+ }
161
+ """
162
+
163
+ with gr.Blocks(css=css) as demo:
164
+ gr.Markdown("## <center>Lawyer GPT: Your AI Legal Assistant</center>")
165
+
166
+ with gr.Tab("Lawyer GPT: Your AI Legal Assistant"):
167
+ with gr.Column(elem_classes="col"):
168
+
169
+ with gr.Tab("Upload and Process Documents"):
170
+ with gr.Column():
171
+
172
+ docs_upload_input = gr.Files(label="Upload File(s)")
173
+ docs_upload_button = gr.Button("Upload")
174
+ docs_upload_output = gr.Textbox(label="Output")
175
+
176
+ docs_process_button = gr.Button("Process")
177
+ docs_process_output = gr.Textbox(label="Output")
178
+
179
+ create_agent_button = gr.Button("Create Agent")
180
+ create_agent_output = gr.Textbox(label="Output")
181
+
182
+ gr.ClearButton([docs_upload_input, docs_upload_output, docs_process_output, create_agent_output])
183
+
184
+ with gr.Tab("Query Documents"):
185
+ with gr.Column():
186
+
187
+ docs_prompt_input = gr.Textbox(label="Custom Prompt")
188
+
189
+ docs_chatbot = gr.Chatbot(label="Chats")
190
+ docs_state = gr.State()
191
+
192
+ docs_search_input = gr.Textbox(label="Question")
193
+ docs_search_button = gr.Button("Search")
194
+
195
+ gr.ClearButton([docs_prompt_input, docs_search_input])
196
+
197
+ #########################################################################################################
198
+
199
+ docs_upload_button.click(save_docs, inputs=docs_upload_input, outputs=docs_upload_output)
200
+ docs_process_button.click(process_docs, inputs=None, outputs=docs_process_output)
201
+ create_agent_button.click(create_agent, inputs=None, outputs=create_agent_output)
202
+
203
+ docs_search_button.click(search_docs, inputs=[docs_prompt_input, docs_search_input, docs_state], outputs=[docs_chatbot, docs_state])
204
+
205
+ #########################################################################################################
206
+
207
+ demo.queue()
208
+ demo.launch(debug=True, share=True)
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain-openai
3
+ PyPDF2
4
+ pypdf
5
+ docx2txt
6
+ unstructured
7
+ gradio
8
+ faiss-cpu
9
+ openai
10
+ tiktoken