Spaces:
Running
Running
lodhrangpt
commited on
Commit
•
a37c21d
1
Parent(s):
0a61124
Update app.py
Browse files
app.py
CHANGED
@@ -31,9 +31,9 @@ def pdf_to_dataframe(uploaded_file):
|
|
31 |
extracted_text = ocr_pipeline(uploaded_file.read(), max_length=1024, do_sample=False)[0]["generated_text"]
|
32 |
lines = extracted_text.split("\n")
|
33 |
data = []
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
|
38 |
# df = pd.DataFrame(lines, columns=['Text'])
|
39 |
|
|
|
31 |
extracted_text = ocr_pipeline(uploaded_file.read(), max_length=1024, do_sample=False)[0]["generated_text"]
|
32 |
lines = extracted_text.split("\n")
|
33 |
data = []
|
34 |
+
for line in lines:
|
35 |
+
data.append([line])
|
36 |
+
df = pd.DataFrame(data, columns=["Text"])
|
37 |
|
38 |
# df = pd.DataFrame(lines, columns=['Text'])
|
39 |
|