Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,15 @@
|
|
1 |
from haystack.nodes import TextConverter, PDFToTextConverter, DocxToTextConverter, PreProcessor
|
2 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
pdf_converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
|
4 |
converted = pdf_converter.convert(file_path="statistics-for-machine-learning.pdf", meta
|
5 |
|
6 |
-
|
7 |
preprocessor = PreProcessor(
|
8 |
split_by="word",
|
9 |
split_length=200,
|
@@ -11,19 +17,16 @@ preprocessor = PreProcessor(
|
|
11 |
)
|
12 |
preprocessed = preprocessor.process(converted)
|
13 |
|
14 |
-
from haystack.document_stores.faiss import FAISSDocumentStore
|
15 |
|
16 |
document_store = FAISSDocumentStore(faiss_index_factory_str="Flat", return_embedding=True)
|
17 |
document_store.delete_all_documents()
|
18 |
document_store.write_documents(preprocessed)
|
19 |
|
20 |
-
|
21 |
-
from haystack.nodes import FARMReader
|
22 |
retriever = DensePassageRetriever(document_store=document_store)
|
23 |
reader = FARMReader(model_name_or_path='deepset/roberta-base-squad2-distilled', use_gpu=False)
|
24 |
document_store.update_embeddings(retriever)
|
25 |
|
26 |
-
from haystack.pipelines import ExtractiveQAPipeline
|
27 |
pipeline = ExtractiveQAPipeline(reader, retriever)
|
28 |
|
29 |
questions = [ 'What is linear regression?',
|
|
|
1 |
from haystack.nodes import TextConverter, PDFToTextConverter, DocxToTextConverter, PreProcessor
|
2 |
import gradio as gr
|
3 |
+
from haystack.nodes import PreProcessor
|
4 |
+
from haystack.document_stores.faiss import FAISSDocumentStore
|
5 |
+
from haystack.nodes import DensePassageRetriever
|
6 |
+
from haystack.nodes import FARMReader
|
7 |
+
from haystack.pipelines import ExtractiveQAPipeline
|
8 |
+
|
9 |
pdf_converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
|
10 |
converted = pdf_converter.convert(file_path="statistics-for-machine-learning.pdf", meta
|
11 |
|
12 |
+
|
13 |
preprocessor = PreProcessor(
|
14 |
split_by="word",
|
15 |
split_length=200,
|
|
|
17 |
)
|
18 |
preprocessed = preprocessor.process(converted)
|
19 |
|
|
|
20 |
|
21 |
document_store = FAISSDocumentStore(faiss_index_factory_str="Flat", return_embedding=True)
|
22 |
document_store.delete_all_documents()
|
23 |
document_store.write_documents(preprocessed)
|
24 |
|
25 |
+
|
|
|
26 |
retriever = DensePassageRetriever(document_store=document_store)
|
27 |
reader = FARMReader(model_name_or_path='deepset/roberta-base-squad2-distilled', use_gpu=False)
|
28 |
document_store.update_embeddings(retriever)
|
29 |
|
|
|
30 |
pipeline = ExtractiveQAPipeline(reader, retriever)
|
31 |
|
32 |
questions = [ 'What is linear regression?',
|