januarevan commited on
Commit
ebce04e
1 Parent(s): b0477e7
Files changed (1) hide show
  1. main.py +6 -1
main.py CHANGED
@@ -82,8 +82,13 @@ async def insert(file: UploadFile = File(...)):
82
  create_a_collection(milvus_client, collection_name)
83
 
84
  contents = pypdf.PdfReader(BytesIO(contents))
 
 
 
 
 
85
 
86
- splitted_document_data = split_documents(contents)
87
 
88
  print(splitted_document_data)
89
 
 
82
  create_a_collection(milvus_client, collection_name)
83
 
84
  contents = pypdf.PdfReader(BytesIO(contents))
85
+
86
+ extracted_text = ""
87
+ for page_num in range(len(contents.pages)):
88
+ page = contents.pages[page_num]
89
+ extracted_text += page.extract_text()
90
 
91
+ splitted_document_data = split_documents(extracted_text)
92
 
93
  print(splitted_document_data)
94