Update app.py
Browse files
app.py
CHANGED
@@ -29,14 +29,22 @@ def get_pdf_text(pdf_docs):
|
|
29 |
# μλ ν
μ€νΈ μΆμΆ ν¨μλ₯Ό μμ±
|
30 |
|
31 |
def get_text_file(docs):
|
32 |
-
|
33 |
-
|
34 |
|
35 |
def get_csv_file(docs):
|
36 |
-
|
|
|
|
|
|
|
|
|
37 |
|
38 |
def get_json_file(docs):
|
39 |
-
|
|
|
|
|
|
|
|
|
40 |
|
41 |
|
42 |
# λ¬Έμλ€μ μ²λ¦¬νμ¬ ν
μ€νΈ μ²ν¬λ‘ λλλ ν¨μμ
λλ€.
|
@@ -117,35 +125,34 @@ def main():
|
|
117 |
|
118 |
st.subheader("Your documents")
|
119 |
docs = st.file_uploader(
|
120 |
-
"Upload your
|
121 |
if st.button("Process"):
|
122 |
with st.spinner("Processing"):
|
123 |
-
#
|
124 |
doc_list = []
|
125 |
|
126 |
for file in docs:
|
127 |
if file.type == 'text/plain':
|
128 |
-
#
|
129 |
doc_list.extend(get_text_file(file))
|
130 |
-
elif file.type in ['application/octet-stream', 'application/pdf']:
|
131 |
-
# file is .pdf
|
132 |
-
doc_list.extend(get_pdf_text(file))
|
133 |
elif file.type == 'text/csv':
|
134 |
-
#
|
135 |
doc_list.extend(get_csv_file(file))
|
136 |
elif file.type == 'application/json':
|
137 |
-
#
|
138 |
doc_list.extend(get_json_file(file))
|
|
|
|
|
|
|
139 |
|
140 |
-
#
|
141 |
text_chunks = get_text_chunks(doc_list)
|
142 |
|
143 |
-
#
|
144 |
vectorstore = get_vectorstore(text_chunks)
|
145 |
|
146 |
-
#
|
147 |
-
st.session_state.conversation = get_conversation_chain(
|
148 |
-
vectorstore)
|
149 |
|
150 |
|
151 |
if __name__ == '__main__':
|
|
|
29 |
# μλ ν
μ€νΈ μΆμΆ ν¨μλ₯Ό μμ±
|
30 |
|
31 |
def get_text_file(docs):
|
32 |
+
# ν
μ€νΈ νμΌ (.txt)μμ ν
μ€νΈλ₯Ό μΆμΆνλ ν¨μ
|
33 |
+
return [docs.getvalue().decode('utf-8')]
|
34 |
|
35 |
def get_csv_file(docs):
|
36 |
+
# CSV νμΌ (.csv)μμ ν
μ€νΈλ₯Ό μΆμΆνλ ν¨μ
|
37 |
+
csv_loader = CSVLoader(docs)
|
38 |
+
csv_data = csv_loader.load()
|
39 |
+
# CSV νμΌμ κ° νμ λ¬Έμμ΄λ‘ λ³ννμ¬ λ°ν
|
40 |
+
return [' '.join(map(str, row)) for row in csv_data]
|
41 |
|
42 |
def get_json_file(docs):
|
43 |
+
# JSON νμΌ (.json)μμ ν
μ€νΈλ₯Ό μΆμΆνλ ν¨μ
|
44 |
+
json_loader = JSONLoader(docs)
|
45 |
+
json_data = json_loader.load()
|
46 |
+
# JSON νμΌμ κ° νλͺ©μ λ¬Έμμ΄λ‘ λ³ννμ¬ λ°ν
|
47 |
+
return [json.dumps(item) for item in json_data]
|
48 |
|
49 |
|
50 |
# λ¬Έμλ€μ μ²λ¦¬νμ¬ ν
μ€νΈ μ²ν¬λ‘ λλλ ν¨μμ
λλ€.
|
|
|
125 |
|
126 |
st.subheader("Your documents")
|
127 |
docs = st.file_uploader(
|
128 |
+
"Upload your documents here and click on 'Process'", accept_multiple_files=True)
|
129 |
if st.button("Process"):
|
130 |
with st.spinner("Processing"):
|
131 |
+
# λ¬Έμμμ μΆμΆν ν
μ€νΈλ₯Ό λ΄μ 리μ€νΈ
|
132 |
doc_list = []
|
133 |
|
134 |
for file in docs:
|
135 |
if file.type == 'text/plain':
|
136 |
+
# .txt νμΌμ κ²½μ°
|
137 |
doc_list.extend(get_text_file(file))
|
|
|
|
|
|
|
138 |
elif file.type == 'text/csv':
|
139 |
+
# .csv νμΌμ κ²½μ°
|
140 |
doc_list.extend(get_csv_file(file))
|
141 |
elif file.type == 'application/json':
|
142 |
+
# .json νμΌμ κ²½μ°
|
143 |
doc_list.extend(get_json_file(file))
|
144 |
+
elif file.type in ['application/octet-stream', 'application/pdf']:
|
145 |
+
# .pdf νμΌμ κ²½μ°
|
146 |
+
doc_list.extend(get_pdf_text(file))
|
147 |
|
148 |
+
# ν
μ€νΈ μ²ν¬λ‘ λλκΈ°
|
149 |
text_chunks = get_text_chunks(doc_list)
|
150 |
|
151 |
+
# λ²‘ν° μ€ν μ΄ μμ±
|
152 |
vectorstore = get_vectorstore(text_chunks)
|
153 |
|
154 |
+
# λν μ²΄μΈ μμ±
|
155 |
+
st.session_state.conversation = get_conversation_chain(vectorstore)
|
|
|
156 |
|
157 |
|
158 |
if __name__ == '__main__':
|