PCFISH commited on
Commit
899d5e4
β€’
1 Parent(s): af69459

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -47
app.py CHANGED
@@ -29,31 +29,28 @@ def get_pdf_text(pdf_docs):
29
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
30
 
31
  def get_text_file(docs):
32
- if docs.type == 'text/plain':
33
- # ν…μŠ€νŠΈ 파일 (.txt)μ—μ„œ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜
34
- return [docs.getvalue().decode('utf-8')]
35
- else:
36
- st.warning("Unsupported file type for get_text_file")
 
37
 
38
  def get_csv_file(docs):
39
- if docs.type == 'text/csv':
40
- # CSV 파일 (.csv)μ—μ„œ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜
41
- csv_loader = CSVLoader(docs)
42
- csv_data = csv_loader.load()
43
- # CSV 파일의 각 행을 λ¬Έμžμ—΄λ‘œ λ³€ν™˜ν•˜μ—¬ λ°˜ν™˜
44
- return [' '.join(map(str, row)) for row in csv_data]
45
- else:
46
- st.warning("Unsupported file type for get_csv_file")
47
 
48
  def get_json_file(docs):
49
- if docs.type == 'application/json':
50
- # JSON 파일 (.json)μ—μ„œ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜
51
- json_loader = JSONLoader(docs)
52
- json_data = json_loader.load()
53
- # JSON 파일의 각 ν•­λͺ©μ„ λ¬Έμžμ—΄λ‘œ λ³€ν™˜ν•˜μ—¬ λ°˜ν™˜
54
- return [json.dumps(item) for item in json_data]
55
- else:
56
- st.warning("Unsupported file type for get_json_file")
57
 
58
 
59
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
@@ -155,32 +152,33 @@ def main():
155
  docs = st.file_uploader(
156
  "Upload your documents here and click on 'Process'", accept_multiple_files=True)
157
  if st.button("Process"):
158
- with st.spinner("Processing"):
159
- # λ¬Έμ„œμ—μ„œ μΆ”μΆœν•œ ν…μŠ€νŠΈλ₯Ό 담을 리슀트
160
- doc_list = []
161
-
162
- for file in docs:
163
- if file.type == 'text/plain':
164
- # .txt 파일의 경우
165
- doc_list.extend(get_text_file(file))
166
- elif file.type == 'text/csv':
167
- # .csv 파일의 경우
168
- doc_list.extend(get_csv_file(file))
169
- elif file.type == 'application/json':
170
- # .json 파일의 경우
171
- doc_list.extend(get_json_file(file))
172
- elif file.type in ['application/octet-stream', 'application/pdf']:
173
- # .pdf 파일의 경우
174
- doc_list.extend(get_pdf_text(file))
175
-
176
- # ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„κΈ°
177
- text_chunks = get_text_chunks(doc_list)
178
-
179
- # 벑터 μŠ€ν† μ–΄ 생성
180
- vectorstore = get_vectorstore(text_chunks)
181
-
182
- # λŒ€ν™” 체인 생성
183
- st.session_state.conversation = get_conversation_chain(vectorstore)
 
184
 
185
 
186
  if __name__ == '__main__':
 
29
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
30
 
31
  def get_text_file(docs):
32
+ text_list = []
33
+ for file in docs:
34
+ if file.type == 'text/plain':
35
+ # file is .txt
36
+ text_list.append(file.getvalue().decode('utf-8'))
37
+ return text_list
38
 
39
  def get_csv_file(docs):
40
+ csv_list = []
41
+ for file in docs:
42
+ if file.type == 'text/csv':
43
+ # file is .csv
44
+ csv_list.extend(csv.reader(file.getvalue().decode('utf-8').splitlines()))
45
+ return csv_list
 
 
46
 
47
  def get_json_file(docs):
48
+ json_list = []
49
+ for file in docs:
50
+ if file.type == 'application/json':
51
+ # file is .json
52
+ json_list.extend(json.load(file))
53
+ return json_list
 
 
54
 
55
 
56
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
 
152
  docs = st.file_uploader(
153
  "Upload your documents here and click on 'Process'", accept_multiple_files=True)
154
  if st.button("Process"):
155
+ with st.spinner("Processing"):
156
+ # get pdf text
157
+ doc_list = []
158
+
159
+ for file in docs:
160
+ print('file - type : ', file.type)
161
+ if file.type == 'text/plain':
162
+ # file is .txt
163
+ doc_list.extend(get_text_file([file]))
164
+ elif file.type in ['application/octet-stream', 'application/pdf']:
165
+ # file is .pdf
166
+ doc_list.extend(get_pdf_text(file))
167
+ elif file.type == 'text/csv':
168
+ # file is .csv
169
+ doc_list.extend(get_csv_file([file]))
170
+ elif file.type == 'application/json':
171
+ # file is .json
172
+ doc_list.extend(get_json_file([file]))
173
+
174
+ # get the text chunks
175
+ text_chunks = get_text_chunks(doc_list)
176
+
177
+ # create vector store
178
+ vectorstore = get_vectorstore(text_chunks)
179
+
180
+ # create conversation chain
181
+ st.session_state.conversation = get_conversation_chain(vectorstore)
182
 
183
 
184
  if __name__ == '__main__':