Spaces:
Sleeping
Sleeping
FridayMaster
commited on
Commit
•
93452e4
1
Parent(s):
8918a3e
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,9 @@
|
|
|
|
1 |
import pandas as pd
|
2 |
import PyPDF2
|
3 |
import spacy
|
4 |
-
|
5 |
-
from sentence_transformers import SentenceTransformer
|
6 |
import torch
|
7 |
import gradio as gr
|
8 |
|
@@ -16,8 +17,12 @@ def extract_text_from_pdf(pdf_path):
|
|
16 |
text += page.extract_text()
|
17 |
return text
|
18 |
|
|
|
|
|
|
|
|
|
19 |
# Extract text from the PDF
|
20 |
-
pdf_text = extract_text_from_pdf(
|
21 |
|
22 |
# Convert the text to a DataFrame
|
23 |
df = pd.DataFrame({'text': [pdf_text]})
|
|
|
1 |
+
import os
|
2 |
import pandas as pd
|
3 |
import PyPDF2
|
4 |
import spacy
|
5 |
+
import faiss
|
6 |
+
from sentence_transformers import SentenceTransformer
|
7 |
import torch
|
8 |
import gradio as gr
|
9 |
|
|
|
17 |
text += page.extract_text()
|
18 |
return text
|
19 |
|
20 |
+
# Ensure correct relative path
|
21 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
22 |
+
pdf_path = os.path.join(current_dir, 'Getting_Started_with_Ubuntu_16.04.pdf') # Adjust as needed
|
23 |
+
|
24 |
# Extract text from the PDF
|
25 |
+
pdf_text = extract_text_from_pdf(pdf_path) # Replace with your PDF path
|
26 |
|
27 |
# Convert the text to a DataFrame
|
28 |
df = pd.DataFrame({'text': [pdf_text]})
|