Spaces:
Sleeping
Sleeping
januarevan
commited on
Commit
•
ebce04e
1
Parent(s):
b0477e7
init
Browse files
main.py
CHANGED
@@ -82,8 +82,13 @@ async def insert(file: UploadFile = File(...)):
|
|
82 |
create_a_collection(milvus_client, collection_name)
|
83 |
|
84 |
contents = pypdf.PdfReader(BytesIO(contents))
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
-
splitted_document_data = split_documents(
|
87 |
|
88 |
print(splitted_document_data)
|
89 |
|
|
|
82 |
create_a_collection(milvus_client, collection_name)
|
83 |
|
84 |
contents = pypdf.PdfReader(BytesIO(contents))
|
85 |
+
|
86 |
+
extracted_text = ""
|
87 |
+
for page_num in range(len(contents.pages)):
|
88 |
+
page = contents.pages[page_num]
|
89 |
+
extracted_text += page.extract_text()
|
90 |
|
91 |
+
splitted_document_data = split_documents(extracted_text)
|
92 |
|
93 |
print(splitted_document_data)
|
94 |
|