[email protected]
commited on
Commit
โข
beefdef
1
Parent(s):
8b057ae
edit codes
Browse files
app.py
CHANGED
@@ -19,59 +19,60 @@ import os
|
|
19 |
|
20 |
# PDF ๋ฌธ์๋ก๋ถํฐ ํ
์คํธ๋ฅผ ์ถ์ถํ๋ ํจ์์
๋๋ค.
|
21 |
def get_pdf_text(pdf_docs):
|
22 |
-
temp_dir = tempfile.TemporaryDirectory()
|
23 |
-
temp_filepath = os.path.join(temp_dir.name, pdf_docs.name)
|
24 |
-
with open(temp_filepath, "wb") as f:
|
25 |
-
f.write(pdf_docs.getvalue())
|
26 |
-
pdf_loader = PyPDFLoader(temp_filepath)
|
27 |
-
pdf_doc = pdf_loader.load()
|
28 |
-
return pdf_doc
|
29 |
|
30 |
# ๊ณผ์
|
31 |
# ์๋ ํ
์คํธ ์ถ์ถ ํจ์๋ฅผ ์์ฑ
|
32 |
-
|
33 |
def get_text_file(docs):
|
34 |
-
temp_dir2 = tempfile.TemporaryDirectory()
|
35 |
-
temp_filepath2 = os.path.join(temp_dir2.name, docs.name) # ์์ ํ์ผ
|
36 |
-
with open(temp_filepath2, "wb") as f:
|
37 |
-
f.write(docs.getvalue())
|
38 |
-
txt_loader = TextLoader(
|
39 |
-
file_path=temp_filepath2,
|
40 |
txt_args={
|
41 |
-
|
|
|
|
|
42 |
}
|
43 |
)
|
44 |
-
txt_data = txt_loader.load()
|
45 |
-
return txt_data
|
46 |
|
47 |
def get_csv_file(docs):
|
48 |
-
temp_dir3 = tempfile.TemporaryDirectory()
|
49 |
-
temp_filepath3 = os.path.join(temp_dir3.name, docs.name)
|
50 |
-
with open(temp_filepath3, "wb") as f:
|
51 |
-
f.write(docs.getvalue())
|
52 |
-
csv_loader = CSVLoader(
|
53 |
-
file_path=temp_filepath3,
|
54 |
csv_args={
|
55 |
-
"delimiter": ",",
|
56 |
-
"quotechar": '"',
|
57 |
-
"fieldnames": ["name", "school", "address", "phone"],
|
58 |
},
|
59 |
)
|
60 |
-
csv_data = csv_loader.load()
|
61 |
-
return csv_data
|
62 |
|
63 |
def get_json_file(docs):
|
64 |
-
temp_dir4 = tempfile.TemporaryDirectory()
|
65 |
-
temp_filepath4 = os.path.join(temp_dir4.name, docs.name)
|
66 |
-
with open(temp_filepath4, "wb") as f:
|
67 |
-
f.write(docs.getvalue())
|
68 |
-
json_loader = JSONLoader(
|
69 |
-
file_path=temp_filepath4,
|
70 |
-
jq_schema='.messages[].content',
|
71 |
-
text_content=False
|
72 |
)
|
73 |
-
json_data = json_loader.load()
|
74 |
-
return json_data
|
75 |
|
76 |
|
77 |
# ๋ฌธ์๋ค์ ์ฒ๋ฆฌํ์ฌ ํ
์คํธ ์ฒญํฌ๋ก ๋๋๋ ํจ์์
๋๋ค.
|
|
|
19 |
|
20 |
# PDF ๋ฌธ์๋ก๋ถํฐ ํ
์คํธ๋ฅผ ์ถ์ถํ๋ ํจ์์
๋๋ค.
|
21 |
def get_pdf_text(pdf_docs):
|
22 |
+
temp_dir = tempfile.TemporaryDirectory() # ์์ ๋๋ ํ ๋ฆฌ ์์ฑ
|
23 |
+
temp_filepath = os.path.join(temp_dir.name, pdf_docs.name) # ์์ ํ์ผ ๊ฒฝ๋ก ์์ฑ
|
24 |
+
with open(temp_filepath, "wb") as f: # ์์ ํ์ผ ๋ฐ์ด๋๋ฆฌ ์ฐ๊ธฐ ๋ชจ๋๋ก ์ด๊ธฐ
|
25 |
+
f.write(pdf_docs.getvalue()) # PDF ๋ฌธ์ ๋ด์ฉ ์์ ํ์ผ์ ์ฐ๊ธฐ
|
26 |
+
pdf_loader = PyPDFLoader(temp_filepath) # PyPDFLoader๋ก PDF ๋ก๋
|
27 |
+
pdf_doc = pdf_loader.load() # ํ
์คํธ ์ถ์ถ
|
28 |
+
return pdf_doc # ์ถ์ถํ ํ
์คํธ ๋ฐํ
|
29 |
|
30 |
# ๊ณผ์
|
31 |
# ์๋ ํ
์คํธ ์ถ์ถ ํจ์๋ฅผ ์์ฑ
|
|
|
32 |
def get_text_file(docs):
|
33 |
+
temp_dir2 = tempfile.TemporaryDirectory() # ์์ ๋๋ ํ ๋ฆฌ ์์ฑ
|
34 |
+
temp_filepath2 = os.path.join(temp_dir2.name, docs.name) # ์์ ํ์ผ ๊ฒฝ๋ก ์์ฑ
|
35 |
+
with open(temp_filepath2, "wb") as f: # ์์ ํ์ผ ๋ฐ์ด๋๋ฆฌ ์ฐ๊ธฐ ๋ชจ๋๋ก ์ด๊ธฐ
|
36 |
+
f.write(docs.getvalue()) # text ๋ฌธ์์ ๋ด์ฉ ์์ ํ์ผ์ ์ฐ๊ธฐ
|
37 |
+
txt_loader = TextLoader( # TextLoader๋ก text ํ์ผ ๋ก๋
|
38 |
+
file_path=temp_filepath2, # text ๋ฌธ์์ ๋ด์ฉ์ด ์ฐ์ธ ํ์ผ ๊ฒฝ๋ก
|
39 |
txt_args={
|
40 |
+
"delimiter": " ", # ๋ด์ฉ์ ๋์ด์ฐ๊ธฐ๋ก ๊ตฌ๋ถ
|
41 |
+
# ์๋์ ์ ํด์ ์์๋ก ๋ด์ฉ ๋ฃ๊ธฐ
|
42 |
+
#"content":'"What is the most important thing in Team project? I think it is communication. No matter how good an individual ability is I think it is difficult to achieve good results without communicating with each other a lot."'
|
43 |
}
|
44 |
)
|
45 |
+
txt_data = txt_loader.load() # ์ถ์ถ๋ ํ
์คํธ ์ ์ฅ
|
46 |
+
return txt_data # ์ถ์ถ๋ ํ
์คํธ ๋ฐํ
|
47 |
|
48 |
def get_csv_file(docs):
|
49 |
+
temp_dir3 = tempfile.TemporaryDirectory() # ์์ ๋๋ ํ ๋ฆฌ ์์ฑ
|
50 |
+
temp_filepath3 = os.path.join(temp_dir3.name, docs.name) # ์์ ํ์ผ ๊ฒฝ๋ก ์์ฑ
|
51 |
+
with open(temp_filepath3, "wb") as f: # ์์ ํ์ผ ๋ฐ์ด๋๋ฆฌ ์ฐ๊ธฐ ๋ชจ๋๋ก ์ด๊ธฐ
|
52 |
+
f.write(docs.getvalue()) # csv ๋ฌธ์์ ๋ด์ฉ ์์ ํ์ผ์ ์ฐ๊ธฐ
|
53 |
+
csv_loader = CSVLoader( # CSVLoader๋ก csv ํ์ผ ๋ก๋
|
54 |
+
file_path=temp_filepath3, # CSV ๋ฌธ์์ ๋ด์ฉ์ด ์ฐ์ธ ํ์ผ ๊ฒฝ๋ก
|
55 |
csv_args={
|
56 |
+
"delimiter": ",", # ๋ด์ฉ์ ์ผํ๋ก ๊ตฌ๋ถ
|
57 |
+
"quotechar": '"', # ๋ฌธ์์ด์ "" ์์ ์ฐ์
|
58 |
+
"fieldnames": ["name", "school", "address", "phone"], # ํ๋ ์ด๋ฆ ๋์ด
|
59 |
},
|
60 |
)
|
61 |
+
csv_data = csv_loader.load() # ์ถ์ถ๋ ํ
์คํธ ์ ์ฅ
|
62 |
+
return csv_data # ์ถ์ถ๋ ํ
์คํธ ๋ฐํ
|
63 |
|
64 |
def get_json_file(docs):
|
65 |
+
temp_dir4 = tempfile.TemporaryDirectory() # ์์ ๋๋ ํ ๋ฆฌ ์์ฑ
|
66 |
+
temp_filepath4 = os.path.join(temp_dir4.name, docs.name) # ์์ ํ์ผ ๊ฒฝ๋ก ์์ฑ
|
67 |
+
with open(temp_filepath4, "wb") as f: # ์์ ํ์ผ ๋ฐ์ด๋๋ฆฌ ์ฐ๊ธฐ ๋ชจ๋๋ก ์ด๊ธฐ
|
68 |
+
f.write(docs.getvalue()) # json ๋ฌธ์์ ๋ด์ฉ ์์ ํ์ผ์ ์ฐ๊ธฐ
|
69 |
+
json_loader = JSONLoader( # JSONLoader๋ก json ํ์ผ ๋ก๋
|
70 |
+
file_path=temp_filepath4, # json ๋ฌธ์์ ๋ด์ฉ์ด ์ฐ์ธ ํ์ผ ๊ฒฝ๋ก
|
71 |
+
jq_schema='.messages[].content', # json ๋ฌธ์์์ ์ถ์ถํ ๋ด์ฉ ์ค์ (์ฑํ
๋ฉ์์ง)
|
72 |
+
text_content=False # ์ถ์ถํ ๋ฐ์ดํฐ๋ ํ
์คํธ ํ์์ผ๋ก
|
73 |
)
|
74 |
+
json_data = json_loader.load() # ์ถ์ถ๋ ํ
์คํธ ์ ์ฅ
|
75 |
+
return json_data # ์ถ์ถ๋ ํ
์คํธ ๋ฐํ
|
76 |
|
77 |
|
78 |
# ๋ฌธ์๋ค์ ์ฒ๋ฆฌํ์ฌ ํ
์คํธ ์ฒญํฌ๋ก ๋๋๋ ํจ์์
๋๋ค.
|