Spaces:

hema1
/

pdf_QA_bot

Runtime error

App Files Files Community

pdf_QA_bot / app.py

hema1

Update app.py

e9738f9 over 1 year ago

raw

history blame contribute delete

2 kB

	from haystack.nodes import TextConverter, PDFToTextConverter, DocxToTextConverter, PreProcessor
	import gradio as gr
	from haystack.nodes import PreProcessor
	from haystack.document_stores.faiss import FAISSDocumentStore
	from haystack.nodes import DensePassageRetriever
	from haystack.nodes import FARMReader
	from haystack.pipelines import ExtractiveQAPipeline

	pdf_converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
	converted = pdf_converter.convert(file_path="statistics-for-machine-learning.pdf", meta
	preprocessor = PreProcessor(
	split_by="word",
	split_length=200,
	split_overlap=10,
	)
	preprocessed = preprocessor.process(converted)


	document_store = FAISSDocumentStore(faiss_index_factory_str="Flat", return_embedding=True)
	document_store.delete_all_documents()
	document_store.write_documents(preprocessed)


	retriever = DensePassageRetriever(document_store=document_store)
	reader = FARMReader(model_name_or_path='deepset/roberta-base-squad2-distilled', use_gpu=False)
	document_store.update_embeddings(retriever)

	pipeline = ExtractiveQAPipeline(reader, retriever)

	questions = [ 'What is linear regression?',
	'What is machine learning?',
	'What are the steps in machine learning model development and deployment?',
	'What is classification?'
	]
	answers = []
	for question in questions:
	prediction = pipeline.run(query=question)

	answers.append(prediction)

	for answer in answers:
	print('Q:', answer['query'])
	print('A:', answer['answers'][0].answer)
	print('Context: ', answer['answers'][0].context)
	print('score: ',answer['answers'][0].score)
	print('\n')

	def correct(question):
	prediction = pipeline.run(query=question)

	return answers.append(prediction)

	app_inputs = gr.inputs.File()

	interface = gr.Interface(fn=correct,
	inputs=[app_inputs,gr.inputs.Textbox(lines=10)],
	outputs=gr.inputs.Textbox(lines=20),
	title='PDF QA system')
	interface.launch(share=True)