darbysween awacke1 commited on
Commit
9cbedc2
โ€ข
0 Parent(s):

Duplicate from AIZero2HeroBootcamp/ChatGPTandLangchain

Browse files

Co-authored-by: Aaron C Wacker <[email protected]>

Files changed (5) hide show
  1. .gitattributes +35 -0
  2. README.md +14 -0
  3. app.py +442 -0
  4. requirements.txt +12 -0
  5. templates.py +44 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: ChatGPTandLangchain
3
+ emoji: ๐Ÿ˜ป
4
+ colorFrom: indigo
5
+ colorTo: red
6
+ sdk: streamlit
7
+ sdk_version: 1.21.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: AIZero2HeroBootcamp/ChatGPTandLangchain
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,442 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import openai
3
+ import os
4
+ import base64
5
+ import glob
6
+ import json
7
+ import mistune
8
+ import pytz
9
+ import math
10
+ import requests
11
+ import time
12
+ import re
13
+ import textract
14
+
15
+ from datetime import datetime
16
+ from openai import ChatCompletion
17
+ from xml.etree import ElementTree as ET
18
+ from bs4 import BeautifulSoup
19
+ from collections import deque
20
+ from audio_recorder_streamlit import audio_recorder
21
+
22
+ from dotenv import load_dotenv
23
+ from PyPDF2 import PdfReader
24
+ from langchain.text_splitter import CharacterTextSplitter
25
+ from langchain.embeddings import OpenAIEmbeddings
26
+ from langchain.vectorstores import FAISS
27
+ from langchain.chat_models import ChatOpenAI
28
+ from langchain.memory import ConversationBufferMemory
29
+ from langchain.chains import ConversationalRetrievalChain
30
+ from templates import css, bot_template, user_template
31
+
32
+
33
+
34
+ def generate_filename(prompt, file_type):
35
+ central = pytz.timezone('US/Central')
36
+ safe_date_time = datetime.now(central).strftime("%m%d_%H%M") # Date and time DD-HHMM
37
+ safe_prompt = "".join(x for x in prompt if x.isalnum())[:90] # Limit file name size and trim whitespace
38
+ return f"{safe_date_time}_{safe_prompt}.{file_type}" # Return a safe file name
39
+
40
+
41
+ def transcribe_audio(openai_key, file_path, model):
42
+ OPENAI_API_URL = "https://api.openai.com/v1/audio/transcriptions"
43
+ headers = {
44
+ "Authorization": f"Bearer {openai_key}",
45
+ }
46
+ with open(file_path, 'rb') as f:
47
+ data = {'file': f}
48
+ response = requests.post(OPENAI_API_URL, headers=headers, files=data, data={'model': model})
49
+ if response.status_code == 200:
50
+ st.write(response.json())
51
+ chatResponse = chat_with_model(response.json().get('text'), '') # *************************************
52
+ transcript = response.json().get('text')
53
+ #st.write('Responses:')
54
+ #st.write(chatResponse)
55
+ filename = generate_filename(transcript, 'txt')
56
+ create_file(filename, transcript, chatResponse)
57
+ return transcript
58
+ else:
59
+ st.write(response.json())
60
+ st.error("Error in API call.")
61
+ return None
62
+
63
+ def save_and_play_audio(audio_recorder):
64
+ audio_bytes = audio_recorder()
65
+ if audio_bytes:
66
+ filename = generate_filename("Recording", "wav")
67
+ with open(filename, 'wb') as f:
68
+ f.write(audio_bytes)
69
+ st.audio(audio_bytes, format="audio/wav")
70
+ return filename
71
+ return None
72
+
73
+ def create_file(filename, prompt, response):
74
+ if filename.endswith(".txt"):
75
+ with open(filename, 'w') as file:
76
+ file.write(f"{prompt}\n{response}")
77
+ elif filename.endswith(".htm"):
78
+ with open(filename, 'w') as file:
79
+ file.write(f"{prompt} {response}")
80
+ elif filename.endswith(".md"):
81
+ with open(filename, 'w') as file:
82
+ file.write(f"{prompt}\n\n{response}")
83
+
84
+ def truncate_document(document, length):
85
+ return document[:length]
86
+ def divide_document(document, max_length):
87
+ return [document[i:i+max_length] for i in range(0, len(document), max_length)]
88
+
89
+ def get_table_download_link(file_path):
90
+ with open(file_path, 'r') as file:
91
+ try:
92
+ data = file.read()
93
+ except:
94
+ st.write('')
95
+ return file_path
96
+ b64 = base64.b64encode(data.encode()).decode()
97
+ file_name = os.path.basename(file_path)
98
+ ext = os.path.splitext(file_name)[1] # get the file extension
99
+ if ext == '.txt':
100
+ mime_type = 'text/plain'
101
+ elif ext == '.py':
102
+ mime_type = 'text/plain'
103
+ elif ext == '.xlsx':
104
+ mime_type = 'text/plain'
105
+ elif ext == '.csv':
106
+ mime_type = 'text/plain'
107
+ elif ext == '.htm':
108
+ mime_type = 'text/html'
109
+ elif ext == '.md':
110
+ mime_type = 'text/markdown'
111
+ else:
112
+ mime_type = 'application/octet-stream' # general binary data type
113
+ href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>'
114
+ return href
115
+
116
+ def CompressXML(xml_text):
117
+ root = ET.fromstring(xml_text)
118
+ for elem in list(root.iter()):
119
+ if isinstance(elem.tag, str) and 'Comment' in elem.tag:
120
+ elem.parent.remove(elem)
121
+ return ET.tostring(root, encoding='unicode', method="xml")
122
+
123
+ def read_file_content(file,max_length):
124
+ if file.type == "application/json":
125
+ content = json.load(file)
126
+ return str(content)
127
+ elif file.type == "text/html" or file.type == "text/htm":
128
+ content = BeautifulSoup(file, "html.parser")
129
+ return content.text
130
+ elif file.type == "application/xml" or file.type == "text/xml":
131
+ tree = ET.parse(file)
132
+ root = tree.getroot()
133
+ xml = CompressXML(ET.tostring(root, encoding='unicode'))
134
+ return xml
135
+ elif file.type == "text/markdown" or file.type == "text/md":
136
+ md = mistune.create_markdown()
137
+ content = md(file.read().decode())
138
+ return content
139
+ elif file.type == "text/plain":
140
+ return file.getvalue().decode()
141
+ else:
142
+ return ""
143
+
144
+ def chat_with_model(prompt, document_section, model_choice='gpt-3.5-turbo'):
145
+ model = model_choice
146
+ conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
147
+ conversation.append({'role': 'user', 'content': prompt})
148
+ if len(document_section)>0:
149
+ conversation.append({'role': 'assistant', 'content': document_section})
150
+
151
+ start_time = time.time()
152
+ report = []
153
+ res_box = st.empty()
154
+ collected_chunks = []
155
+ collected_messages = []
156
+
157
+ for chunk in openai.ChatCompletion.create(
158
+ model='gpt-3.5-turbo',
159
+ messages=conversation,
160
+ temperature=0.5,
161
+ stream=True
162
+ ):
163
+
164
+ collected_chunks.append(chunk) # save the event response
165
+ chunk_message = chunk['choices'][0]['delta'] # extract the message
166
+ collected_messages.append(chunk_message) # save the message
167
+
168
+ content=chunk["choices"][0].get("delta",{}).get("content")
169
+
170
+ try:
171
+ report.append(content)
172
+ if len(content) > 0:
173
+ result = "".join(report).strip()
174
+ #result = result.replace("\n", "")
175
+ res_box.markdown(f'*{result}*')
176
+ except:
177
+ st.write(' ')
178
+
179
+ full_reply_content = ''.join([m.get('content', '') for m in collected_messages])
180
+ st.write("Elapsed time:")
181
+ st.write(time.time() - start_time)
182
+ return full_reply_content
183
+
184
+ def chat_with_file_contents(prompt, file_content, model_choice='gpt-3.5-turbo'):
185
+ conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
186
+ conversation.append({'role': 'user', 'content': prompt})
187
+ if len(file_content)>0:
188
+ conversation.append({'role': 'assistant', 'content': file_content})
189
+ response = openai.ChatCompletion.create(model=model_choice, messages=conversation)
190
+ return response['choices'][0]['message']['content']
191
+
192
+ def extract_mime_type(file):
193
+ # Check if the input is a string
194
+ if isinstance(file, str):
195
+ pattern = r"type='(.*?)'"
196
+ match = re.search(pattern, file)
197
+ if match:
198
+ return match.group(1)
199
+ else:
200
+ raise ValueError(f"Unable to extract MIME type from {file}")
201
+ # If it's not a string, assume it's a streamlit.UploadedFile object
202
+ elif isinstance(file, streamlit.UploadedFile):
203
+ return file.type
204
+ else:
205
+ raise TypeError("Input should be a string or a streamlit.UploadedFile object")
206
+
207
+ from io import BytesIO
208
+ import re
209
+
210
+ def extract_file_extension(file):
211
+ # get the file name directly from the UploadedFile object
212
+ file_name = file.name
213
+ pattern = r".*?\.(.*?)$"
214
+ match = re.search(pattern, file_name)
215
+ if match:
216
+ return match.group(1)
217
+ else:
218
+ raise ValueError(f"Unable to extract file extension from {file_name}")
219
+
220
+ def pdf2txt(docs):
221
+ text = ""
222
+ for file in docs:
223
+ file_extension = extract_file_extension(file)
224
+ # print the file extension
225
+ st.write(f"File type extension: {file_extension}")
226
+
227
+ # read the file according to its extension
228
+ try:
229
+ if file_extension.lower() in ['py', 'txt', 'html', 'htm', 'xml', 'json']:
230
+ text += file.getvalue().decode('utf-8')
231
+ elif file_extension.lower() == 'pdf':
232
+ from PyPDF2 import PdfReader
233
+ pdf = PdfReader(BytesIO(file.getvalue()))
234
+ for page in range(len(pdf.pages)):
235
+ text += pdf.pages[page].extract_text() # new PyPDF2 syntax
236
+ except Exception as e:
237
+ st.write(f"Error processing file {file.name}: {e}")
238
+
239
+ return text
240
+
241
+ def pdf2txt_old(pdf_docs):
242
+ st.write(pdf_docs)
243
+ for file in pdf_docs:
244
+ mime_type = extract_mime_type(file)
245
+ st.write(f"MIME type of file: {mime_type}")
246
+
247
+ text = ""
248
+ for pdf in pdf_docs:
249
+ pdf_reader = PdfReader(pdf)
250
+ for page in pdf_reader.pages:
251
+ text += page.extract_text()
252
+ return text
253
+
254
+ def txt2chunks(text):
255
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
256
+ return text_splitter.split_text(text)
257
+
258
+ def vector_store(text_chunks):
259
+ key = os.getenv('OPENAI_API_KEY')
260
+ embeddings = OpenAIEmbeddings(openai_api_key=key)
261
+ return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
262
+
263
+ def get_chain(vectorstore):
264
+ llm = ChatOpenAI()
265
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
266
+ return ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
267
+
268
+ def process_user_input(user_question):
269
+ response = st.session_state.conversation({'question': user_question})
270
+ st.session_state.chat_history = response['chat_history']
271
+ for i, message in enumerate(st.session_state.chat_history):
272
+ template = user_template if i % 2 == 0 else bot_template
273
+ st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
274
+ # Save file output from PDF query results
275
+ filename = generate_filename(user_question, 'txt')
276
+ create_file(filename, user_question, message.content)
277
+
278
+ #st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
279
+
280
+ def divide_prompt(prompt, max_length):
281
+ words = prompt.split()
282
+ chunks = []
283
+ current_chunk = []
284
+ current_length = 0
285
+ for word in words:
286
+ if len(word) + current_length <= max_length:
287
+ current_length += len(word) + 1 # Adding 1 to account for spaces
288
+ current_chunk.append(word)
289
+ else:
290
+ chunks.append(' '.join(current_chunk))
291
+ current_chunk = [word]
292
+ current_length = len(word)
293
+ chunks.append(' '.join(current_chunk)) # Append the final chunk
294
+ return chunks
295
+
296
+ def main():
297
+ # Sidebar and global
298
+ openai.api_key = os.getenv('OPENAI_API_KEY')
299
+ st.set_page_config(page_title="GPT Streamlit Document Reasoner",layout="wide")
300
+
301
+ # File type for output, model choice
302
+ menu = ["txt", "htm", "xlsx", "csv", "md", "py"] #619
303
+ choice = st.sidebar.selectbox("Output File Type:", menu)
304
+ model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301'))
305
+
306
+ # Audio, transcribe, GPT:
307
+ filename = save_and_play_audio(audio_recorder)
308
+ if filename is not None:
309
+ transcription = transcribe_audio(openai.api_key, filename, "whisper-1")
310
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
311
+ filename=None # since transcription is finished next time just use the saved transcript
312
+
313
+ # prompt interfaces
314
+ user_prompt = st.text_area("Enter prompts, instructions & questions:", '', height=100)
315
+
316
+ # file section interface for prompts against large documents as context
317
+ collength, colupload = st.columns([2,3]) # adjust the ratio as needed
318
+ with collength:
319
+ max_length = st.slider("File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000)
320
+ with colupload:
321
+ uploaded_file = st.file_uploader("Add a file for context:", type=["pdf", "xml", "json", "xlsx","csv","html", "htm", "md", "txt"])
322
+
323
+ # Document section chat
324
+ document_sections = deque()
325
+ document_responses = {}
326
+ if uploaded_file is not None:
327
+ file_content = read_file_content(uploaded_file, max_length)
328
+ document_sections.extend(divide_document(file_content, max_length))
329
+ if len(document_sections) > 0:
330
+ if st.button("๐Ÿ‘๏ธ View Upload"):
331
+ st.markdown("**Sections of the uploaded file:**")
332
+ for i, section in enumerate(list(document_sections)):
333
+ st.markdown(f"**Section {i+1}**\n{section}")
334
+ st.markdown("**Chat with the model:**")
335
+ for i, section in enumerate(list(document_sections)):
336
+ if i in document_responses:
337
+ st.markdown(f"**Section {i+1}**\n{document_responses[i]}")
338
+ else:
339
+ if st.button(f"Chat about Section {i+1}"):
340
+ st.write('Reasoning with your inputs...')
341
+ response = chat_with_model(user_prompt, section, model_choice) # *************************************
342
+ st.write('Response:')
343
+ st.write(response)
344
+ document_responses[i] = response
345
+ filename = generate_filename(f"{user_prompt}_section_{i+1}", choice)
346
+ create_file(filename, user_prompt, response)
347
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
348
+
349
+ if st.button('๐Ÿ’ฌ Chat'):
350
+ st.write('Reasoning with your inputs...')
351
+
352
+ #response = chat_with_model(user_prompt, ''.join(list(document_sections,)), model_choice) # *************************************
353
+
354
+ # Divide the user_prompt into smaller sections
355
+ user_prompt_sections = divide_prompt(user_prompt, max_length)
356
+ full_response = ''
357
+ for prompt_section in user_prompt_sections:
358
+ # Process each section with the model
359
+ response = chat_with_model(prompt_section, ''.join(list(document_sections)), model_choice)
360
+ full_response += response + '\n' # Combine the responses
361
+
362
+ #st.write('Response:')
363
+ #st.write(full_response)
364
+
365
+ response = full_response
366
+ st.write('Response:')
367
+ st.write(response)
368
+
369
+ filename = generate_filename(user_prompt, choice)
370
+ create_file(filename, user_prompt, response)
371
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
372
+
373
+ all_files = glob.glob("*.*")
374
+ all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 20] # exclude files with short names
375
+ all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
376
+
377
+ # sidebar of files
378
+ file_contents=''
379
+ next_action=''
380
+ for file in all_files:
381
+ col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1]) # adjust the ratio as needed
382
+ with col1:
383
+ if st.button("๐ŸŒ", key="md_"+file): # md emoji button
384
+ with open(file, 'r') as f:
385
+ file_contents = f.read()
386
+ next_action='md'
387
+ with col2:
388
+ st.markdown(get_table_download_link(file), unsafe_allow_html=True)
389
+ with col3:
390
+ if st.button("๐Ÿ“‚", key="open_"+file): # open emoji button
391
+ with open(file, 'r') as f:
392
+ file_contents = f.read()
393
+ next_action='open'
394
+ with col4:
395
+ if st.button("๐Ÿ”", key="read_"+file): # search emoji button
396
+ with open(file, 'r') as f:
397
+ file_contents = f.read()
398
+ next_action='search'
399
+ with col5:
400
+ if st.button("๐Ÿ—‘", key="delete_"+file):
401
+ os.remove(file)
402
+ st.experimental_rerun()
403
+
404
+ if len(file_contents) > 0:
405
+ if next_action=='open':
406
+ file_content_area = st.text_area("File Contents:", file_contents, height=500)
407
+ if next_action=='md':
408
+ st.markdown(file_contents)
409
+ if next_action=='search':
410
+ file_content_area = st.text_area("File Contents:", file_contents, height=500)
411
+ st.write('Reasoning with your inputs...')
412
+ response = chat_with_model(user_prompt, file_contents, model_choice)
413
+ filename = generate_filename(file_contents, choice)
414
+ create_file(filename, file_contents, response)
415
+
416
+ st.experimental_rerun()
417
+ #st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
418
+
419
+ if __name__ == "__main__":
420
+ main()
421
+
422
+ load_dotenv()
423
+ st.write(css, unsafe_allow_html=True)
424
+
425
+ st.header("Chat with documents :books:")
426
+ user_question = st.text_input("Ask a question about your documents:")
427
+ if user_question:
428
+ process_user_input(user_question)
429
+
430
+ with st.sidebar:
431
+ st.subheader("Your documents")
432
+ docs = st.file_uploader("import documents", accept_multiple_files=True)
433
+ with st.spinner("Processing"):
434
+ raw = pdf2txt(docs)
435
+ if len(raw) > 0:
436
+ length = str(len(raw))
437
+ text_chunks = txt2chunks(raw)
438
+ vectorstore = vector_store(text_chunks)
439
+ st.session_state.conversation = get_chain(vectorstore)
440
+ st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
441
+ filename = generate_filename(raw, 'txt')
442
+ create_file(filename, raw, '')
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio-recorder-streamlit
2
+ beautifulsoup4
3
+ faiss-cpu
4
+ langchain
5
+ mistune
6
+ openai
7
+ PyPDF2
8
+ python-dotenv
9
+ pytz
10
+ streamlit
11
+ tiktoken
12
+ textract
templates.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ css = '''
2
+ <style>
3
+ .chat-message {
4
+ padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
5
+ }
6
+ .chat-message.user {
7
+ background-color: #2b313e
8
+ }
9
+ .chat-message.bot {
10
+ background-color: #475063
11
+ }
12
+ .chat-message .avatar {
13
+ width: 20%;
14
+ }
15
+ .chat-message .avatar img {
16
+ max-width: 78px;
17
+ max-height: 78px;
18
+ border-radius: 50%;
19
+ object-fit: cover;
20
+ }
21
+ .chat-message .message {
22
+ width: 80%;
23
+ padding: 0 1.5rem;
24
+ color: #fff;
25
+ }
26
+ '''
27
+
28
+ bot_template = '''
29
+ <div class="chat-message bot">
30
+ <div class="avatar">
31
+ <img src="https://cdna.artstation.com/p/assets/images/images/054/910/878/large/aaron-wacker-cyberpunk-computer-devices-iot.jpg?1665656564" style="max-height: 78px; max-width: 78px; border-radius: 50%; object-fit: cover;">
32
+ </div>
33
+ <div class="message">{{MSG}}</div>
34
+ </div>
35
+ '''
36
+
37
+ user_template = '''
38
+ <div class="chat-message user">
39
+ <div class="avatar">
40
+ <img src="https://cdnb.artstation.com/p/assets/images/images/054/910/875/large/aaron-wacker-cyberpunk-computer-brain-design.jpg?1665656558">
41
+ </div>
42
+ <div class="message">{{MSG}}</div>
43
+ </div>
44
+ '''