Spaces:

ashishraics
/

MCQ-Generator

Runtime error

App Files Files Community

MCQ-Generator / app.py

ashishraics

requiremtn fix

3c150d8 over 2 years ago

raw

history blame contribute delete

6.38 kB

	import streamlit as st
	from transformers import AutoTokenizer
	from fastT5 import OnnxT5,get_onnx_runtime_sessions
	from keywords import tokenize_sentence, get_multipartiterank_topics,get_topicrank_topics,get_yake_topics
	from annotated_text import annotated_text
	import nltk
	nltk.download('stopwords')
	nltk.download('wordnet')
	nltk.download('punkt')
	from nltk.corpus import stopwords,wordnet
	from nltk.tokenize import sent_tokenize
	import string
	import subprocess
	import logging
	import multiprocessing
	total_threads=multiprocessing.cpu_count()
	import onnxruntime as ort
	# from bertopic import BERTopic
	from sklearn.feature_extraction.text import CountVectorizer
	try:
	import pke
	logging.error("importing pke info")
	except:
	logging.error("installing pke info")
	subprocess.run(['pip3', 'install','git+https://github.com/boudinfl/pke.git'])
	subprocess.run(['python3' ,'-m' ,'spacy' ,'download' ,'en'])
	import pke

	session_options_ort = ort.SessionOptions()
	session_options_ort.intra_op_num_threads=1
	session_options_ort.inter_op_num_threads=1

	st.set_page_config( # Alternate names: setup_page, page, layout
	layout="wide", # Can be "centered" or "wide". In the future also "dashboard", etc.
	initial_sidebar_state="auto", # Can be "auto", "expanded", "collapsed"
	page_title='None', # String or None. Strings get appended with "• Streamlit".
	)

	def set_page_title(title):
	st.sidebar.markdown(unsafe_allow_html=True, body=f"""
	<iframe height=0 srcdoc="<script>
	const title = window.parent.document.querySelector('title') \

	const oldObserver = window.parent.titleObserver
	if (oldObserver) {{
	oldObserver.disconnect()
	}} \

	const newObserver = new MutationObserver(function(mutations) {{
	const target = mutations[0].target
	if (target.text !== '{title}') {{
	target.text = '{title}'
	}}
	}}) \

	newObserver.observe(title, {{ childList: true }})
	window.parent.titleObserver = newObserver \

	title.text = '{title}'
	</script>" />
	""")


	set_page_title('MCQ Generator')

	import yaml
	def read_yaml(file_path):
	with open(file_path, "r") as f:
	return yaml.safe_load(f)

	config = read_yaml('config.yaml')

	t5_chkpt=config['t5_normal']['chkpt']
	t5_model_path= config['t5_normal']['model_path']
	t5_tokenizer= config['t5_normal']['tokenizer']

	model_path_quanitzed=(f'{t5_model_path}/{t5_chkpt.split("/")[1]}-encoder-quantized.onnx',
	f'{t5_model_path}/{t5_chkpt.split("/")[1]}-decoder-quantized.onnx',
	f'{t5_model_path}/{t5_chkpt.split("/")[1]}-init-decoder-quantized.onnx'
	)

	model_session=get_onnx_runtime_sessions(model_paths=model_path_quanitzed,n_threads=1,parallel_exe_mode=False)
	model_t5=OnnxT5(model_or_model_path=t5_chkpt,onnx_model_sessions=model_session)
	tokenizer_t5=AutoTokenizer.from_pretrained(t5_tokenizer)

	def create_question_t5(model,tokenizer,context,answer,max_length=64):
	input = "context: %s answer: %s </s>" % (context, answer)
	features=tokenizer([input],return_tensors='pt')
	output=model.generate(input_ids=features['input_ids'],
	attention_mask=features['attention_mask'],
	max_length=max_length,
	num_beams=3)

	return tokenizer.decode(output.squeeze(), skip_special_tokens=True)

	def create_answers_t5(model,tokenizer,context,question,max_length=128):
	input = "context: %s question: %s </s>" % (context, question)
	features=tokenizer([input],return_tensors='pt')
	output=model.generate(input_ids=features['input_ids'],
	attention_mask=features['attention_mask'],
	max_length=max_length,
	num_beams=3)

	return tokenizer.decode(output.squeeze(), skip_special_tokens=True)

	default_context = """Another important distinction is between companies that build enterprise products (B2B - business to business) and companies that build customer products (B2C - business to consumer).

	B2B companies build products for organizations. Examples of enterprise products are Customer relationship management (CRM) software, project management tools, database management systems, cloud hosting services, etc.

	B2C companies build products for individuals. Examples of consumer products are social networks, search engines, ride-sharing services, health trackers, etc.

	Many companies do both -- their products can be used by individuals but they also offer plans for enterprise users. For example, Google Drive can be used by anyone but they also have Google Drive for Enterprise.

	Even if a B2C company doesn’t create products for enterprises directly, they might still need to sell to enterprises. For example, Facebook’s main product is used by individuals but they sell ads to enterprises. Some might argue that this makes Facebook users products, as famously quipped: “If you’re not paying for it, you’re not the customer; you’re the product being sold.”"""

	default_answer = "companies"



	input_context = st.text_area(label='Input paragraph', height=300, max_chars=1000, value=default_context)

	c1,c2,c3=st.columns(3)

	with c1:
	create_usingkeyword = st.button("Create Questions using Keywords")
	if create_usingkeyword:
	tokenized_sent = tokenize_sentence(input_context)
	keywords_noun_adj_verb = get_multipartiterank_topics(input_context)
	t5_questions=[]

	with st.spinner("Creating Questionsssss"):
	for k in keywords_noun_adj_verb:
	question = create_question_t5(model=model_t5,
	tokenizer=tokenizer_t5,
	context=input_context,
	answer=k)
	t5_questions.append(question.split('question:')[1])

	for i,(quest,ans) in enumerate(zip(t5_questions,keywords_noun_adj_verb)):
	st.write(f"{i + 1}: {quest}")
	annotated_text("Answer is ", (ans, '', "#fea"))
	st.markdown("---")

	with c2:
	create_usinglongformer = st.button("Create Questions using Longformer")
	if create_usinglongformer:
	pass