Spaces:

VGG11
/

armenian_chatbot_bert_multilingual

Runtime error

App Files Files Community

armenian_chatbot_bert_multilingual / app.py

Mary12

Update app.py

cda0496 over 1 year ago

raw

history blame contribute delete

5.07 kB

	import gradio as gr
	import re
	from pypdf import PdfReader
	from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
	import docx
	import concurrent.futures


	def remove_references(text):
	text = re.sub(r'\[\d+\]', '', text) ##[ref]
	text = re.sub(r'\[https?://[^\[\]]+\s[^\[\]]+\]', '', text) ##hyperlink with text
	text = re.sub(r'\[https?://[^\[\]]+\]', '', text) ##just the hyperlink
	# text = html.unescape(text)
	text = re.sub(r'\s+', ' ', text).strip() ##clear out the white spaces
	return text


	# def extract_text_from_pdf(file_path):
	# text = ""
	# pdf_reader = PdfReader(file_path)
	# for page in pdf_reader.pages:
	# text += page.extract_text() + "\n"
	# return text

	import fitz # PyMuPDF

	def extract_text_from_pdf(file_path):
	text = ""
	pdf_document = fitz.open(file_path)
	for page_num in range(pdf_document.page_count):
	page = pdf_document[page_num]
	text += page.get_text("text") + "\n"
	pdf_document.close()
	return text

	def extract_text_from_txt(file_path):
	text = ""
	with open(file_path, "r", encoding='utf-8') as txt_file:
	# text = txt_file.read()
	return txt_file.read()
	# return text

	# def extract_text_from_doc(file_path):
	# doc = docx.Document(file_path)
	# fullText = []
	# for para in doc.paragraphs:
	# fullText.append(para.text)
	# return '\n'.join(fullText)

	def extract_text_from_paragraph(para):
	return para.text

	def extract_text_from_doc(file_path):
	doc = docx.Document(file_path)
	with concurrent.futures.ThreadPoolExecutor() as executor:
	results = list(executor.map(extract_text_from_paragraph, doc.paragraphs))

	return '\n'.join(results)



	def model(model_name):
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForQuestionAnswering.from_pretrained(model_name,return_dict = False)
	model_pipeline = pipeline("question-answering",model = model,tokenizer = tokenizer)

	return model_pipeline

	model_name = "timpal0l/mdeberta-v3-base-squad2"
	pipe = model(model_name)

	def qa_result(context, question, file, pipe=pipe):
	if file is not None:
	allowed_types = [".pdf", ".txt", ".docx"]
	extension = "." + file.name.split(".")[-1].lower()
	if not extension in allowed_types:
	text = "Խնդրում եմ ներբեռնել .pdf, .txt, կամ .docx ֆայլեր"
	else:
	if len(question) == 0:
	text = "Ես չեմ կարողանալ քեզ օգնել եթե ինձ չտաս հարցը"
	else:
	if extension == allowed_types[0]:
	context = extract_text_from_pdf(file.name)
	elif extension == allowed_types[1]:
	context = extract_text_from_txt(file.name)
	else:
	context = extract_text_from_doc(file.name)
	result = pipe(question=question, context=context)
	answered = result['answer']
	text = remove_references(answered)
	else:
	# if file is None and len(context) == 0 and len(question) == 0:
	# text = "Որպեսզի ես կարողանամ քեզ օգնել, դու պետք է տրամադրես տեքստ կամ ֆայլը, և հարցեր"
	# elif len(context) == 0:
	# text = "Ես չեմ կարողանամ քեզ օգնել եթե դու չտրամադրես տեքստը"
	# elif len(question) == 0:
	# text = "Ես չեմ կարողանամ քեզ օգնել եթե դու չտաս հարցը"
	if len(context) == 0:
	if len(question) == 0:
	text = "Որպեսզի ես կարողանամ քեզ օգնել, դու պետք է տրամադրես տեքստ կամ ֆայլը, և հարցեր"
	else:
	text = "Ես չեմ կարողանամ քեզ օգնել եթե դու չտրամադրես տեքստը"
	elif len(question) == 0:
	text = "Ես չեմ կարողանամ քեզ օգնել եթե դու չտաս հարցը"
	else:
	result = pipe(question=question, context=context)
	answered = result['answer']
	text = remove_references(answered)

	text = text.replace('(', '', 1)
	text = text.rstrip(',')

	return text.capitalize()

	theme = gr.themes.Soft().set(
	body_background_fill='*background_fill_secondary',
	body_text_color_subdued='*body_text_color',
	body_text_color_subdued_dark='*chatbot_code_background_color'
	)


	app = gr.Interface(
	fn=qa_result,
	btn=gr.UploadButton("📁"),
	inputs=['textbox', 'text', gr.inputs.File()],
	outputs='textbox',
	title='Ողջու՛յն։ Ես քո արհեստական բանականությամբ օգնականն եմ',
	theme=theme,
	description='Տու՛ր ինձ տեքստ, ու տեքստին վերաբերող հարցեր, ու ես կօգնեմ քեզ պատասխանել հարցերին'
	)
	app.launch(inline=False)