PCFISH commited on
Commit
f5f9605
β€’
1 Parent(s): 780971d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -17
app.py CHANGED
@@ -29,14 +29,22 @@ def get_pdf_text(pdf_docs):
29
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
30
 
31
  def get_text_file(docs):
32
- pass
33
-
34
 
35
  def get_csv_file(docs):
36
- pass
 
 
 
 
37
 
38
  def get_json_file(docs):
39
- pass
 
 
 
 
40
 
41
 
42
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
@@ -117,35 +125,34 @@ def main():
117
 
118
  st.subheader("Your documents")
119
  docs = st.file_uploader(
120
- "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
121
  if st.button("Process"):
122
  with st.spinner("Processing"):
123
- # get pdf text
124
  doc_list = []
125
 
126
  for file in docs:
127
  if file.type == 'text/plain':
128
- # file is .txt
129
  doc_list.extend(get_text_file(file))
130
- elif file.type in ['application/octet-stream', 'application/pdf']:
131
- # file is .pdf
132
- doc_list.extend(get_pdf_text(file))
133
  elif file.type == 'text/csv':
134
- # file is .csv
135
  doc_list.extend(get_csv_file(file))
136
  elif file.type == 'application/json':
137
- # file is .json
138
  doc_list.extend(get_json_file(file))
 
 
 
139
 
140
- # get the text chunks
141
  text_chunks = get_text_chunks(doc_list)
142
 
143
- # create vector store
144
  vectorstore = get_vectorstore(text_chunks)
145
 
146
- # create conversation chain
147
- st.session_state.conversation = get_conversation_chain(
148
- vectorstore)
149
 
150
 
151
  if __name__ == '__main__':
 
29
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
30
 
31
  def get_text_file(docs):
32
+ # ν…μŠ€νŠΈ 파일 (.txt)μ—μ„œ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜
33
+ return [docs.getvalue().decode('utf-8')]
34
 
35
  def get_csv_file(docs):
36
+ # CSV 파일 (.csv)μ—μ„œ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜
37
+ csv_loader = CSVLoader(docs)
38
+ csv_data = csv_loader.load()
39
+ # CSV 파일의 각 행을 λ¬Έμžμ—΄λ‘œ λ³€ν™˜ν•˜μ—¬ λ°˜ν™˜
40
+ return [' '.join(map(str, row)) for row in csv_data]
41
 
42
  def get_json_file(docs):
43
+ # JSON 파일 (.json)μ—μ„œ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜
44
+ json_loader = JSONLoader(docs)
45
+ json_data = json_loader.load()
46
+ # JSON 파일의 각 ν•­λͺ©μ„ λ¬Έμžμ—΄λ‘œ λ³€ν™˜ν•˜μ—¬ λ°˜ν™˜
47
+ return [json.dumps(item) for item in json_data]
48
 
49
 
50
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
 
125
 
126
  st.subheader("Your documents")
127
  docs = st.file_uploader(
128
+ "Upload your documents here and click on 'Process'", accept_multiple_files=True)
129
  if st.button("Process"):
130
  with st.spinner("Processing"):
131
+ # λ¬Έμ„œμ—μ„œ μΆ”μΆœν•œ ν…μŠ€νŠΈλ₯Ό 담을 리슀트
132
  doc_list = []
133
 
134
  for file in docs:
135
  if file.type == 'text/plain':
136
+ # .txt 파일의 경우
137
  doc_list.extend(get_text_file(file))
 
 
 
138
  elif file.type == 'text/csv':
139
+ # .csv 파일의 경우
140
  doc_list.extend(get_csv_file(file))
141
  elif file.type == 'application/json':
142
+ # .json 파일의 경우
143
  doc_list.extend(get_json_file(file))
144
+ elif file.type in ['application/octet-stream', 'application/pdf']:
145
+ # .pdf 파일의 경우
146
+ doc_list.extend(get_pdf_text(file))
147
 
148
+ # ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„κΈ°
149
  text_chunks = get_text_chunks(doc_list)
150
 
151
+ # 벑터 μŠ€ν† μ–΄ 생성
152
  vectorstore = get_vectorstore(text_chunks)
153
 
154
+ # λŒ€ν™” 체인 생성
155
+ st.session_state.conversation = get_conversation_chain(vectorstore)
 
156
 
157
 
158
  if __name__ == '__main__':