Spaces:

hema1
/

pdf_QA_bot

Runtime error

App Files Files Community

hema1 commited on Mar 25, 2023

Commit

7be7fa0

•

1 Parent(s): 7755f97

Create app.py

Browse files

Files changed (1) hide show

app.py +58 -0

app.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from haystack.nodes import TextConverter, PDFToTextConverter, DocxToTextConverter, PreProcessor
+import gradio as gr
+pdf_converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
+converted = pdf_converter.convert(file_path="statistics-for-machine-learning.pdf", meta
+from haystack.nodes import PreProcessor
+preprocessor = PreProcessor(
+ split_by="word",
+ split_length=200,
+ split_overlap=10,
+ )
+preprocessed = preprocessor.process(converted)
+from haystack.document_stores.faiss import FAISSDocumentStore
+document_store = FAISSDocumentStore(faiss_index_factory_str="Flat", return_embedding=True)
+document_store.delete_all_documents()
+document_store.write_documents(preprocessed)
+from haystack.nodes import DensePassageRetriever
+from haystack.nodes import FARMReader
+retriever = DensePassageRetriever(document_store=document_store)
+reader = FARMReader(model_name_or_path='deepset/roberta-base-squad2-distilled', use_gpu=False)
+document_store.update_embeddings(retriever)
+from haystack.pipelines import ExtractiveQAPipeline
+pipeline = ExtractiveQAPipeline(reader, retriever)
+questions = [ 'What is linear regression?',
+ 'What is machine learning?',
+ 'What are the steps in machine learning model development and deployment?',
+ 'What is classification?'
+ ]
+answers = []
+for question in questions:
+    prediction = pipeline.run(query=question)
+    answers.append(prediction)
+for answer in answers:
+ print('Q:', answer['query'])
+ print('A:', answer['answers'][0].answer)
+ print('Context: ', answer['answers'][0].context)
+ print('score: ',answer['answers'][0].score)
+ print('\n')
+def correct(question):
+    prediction = pipeline.run(query=question)
+    return answers.append(prediction)
+app_inputs = gr.inputs.File()
+interface = gr.Interface(fn=correct,
+                        inputs=[app_inputs,gr.inputs.Textbox(lines=10)],
+                         outputs=gr.inputs.Textbox(lines=20),
+                        title='PDF QA system')
+interface.launch(share=True)