Spaces:
Runtime error
Runtime error
Updated the file reading for the app
Browse filesError Fix: https://stackoverflow.com/questions/51337167/typeerror-stat-path-should-be-string-bytes-os-pathlike-or-integer-not-io-t
- pdftoqa_generator.py +22 -16
pdftoqa_generator.py
CHANGED
@@ -11,29 +11,35 @@ from langchain.text_splitter import (
|
|
11 |
RecursiveCharacterTextSplitter,
|
12 |
)
|
13 |
from tqdm import tqdm
|
|
|
14 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
15 |
|
16 |
os.environ["OPENAI_API_KEY"] = "sk-"
|
17 |
|
18 |
|
19 |
def pdf_parser(file_path):
|
20 |
-
|
|
|
|
|
|
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
37 |
|
38 |
|
39 |
def qa_generator(texts):
|
|
|
11 |
RecursiveCharacterTextSplitter,
|
12 |
)
|
13 |
from tqdm import tqdm
|
14 |
+
from tempfile import NamedTemporaryFile
|
15 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
16 |
|
17 |
os.environ["OPENAI_API_KEY"] = "sk-"
|
18 |
|
19 |
|
20 |
def pdf_parser(file_path):
|
21 |
+
bytes_data = uploaded_file.read()
|
22 |
+
with NamedTemporaryFile(delete=False) as tmp: # open a named temporary file
|
23 |
+
tmp.write(bytes_data) # Write data from the uploaded file into it
|
24 |
+
pdf_loader = PyPDFLoader(tmp.name) # <---- now it works!
|
25 |
|
26 |
+
#pdf_loader = PyPDFLoader(file_path) only for file path offline
|
27 |
+
|
28 |
+
documents = pdf_loader.load()
|
29 |
+
documents_text = [d.page_content for d in documents]
|
30 |
+
|
31 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
32 |
+
# Set a really small chunk size, just to show.
|
33 |
+
chunk_size=600,
|
34 |
+
chunk_overlap=200,
|
35 |
+
length_function=len,
|
36 |
+
is_separator_regex=False,
|
37 |
+
)
|
38 |
+
|
39 |
+
# Split the text into chunks
|
40 |
+
texts = text_splitter.create_documents(documents_text)
|
41 |
+
os.remove(tmp.name) # remove temp file
|
42 |
+
return texts
|
43 |
|
44 |
|
45 |
def qa_generator(texts):
|