Spaces:
Runtime error
Runtime error
import streamlit as st | |
from transformers import AutoTokenizer | |
from fastT5 import OnnxT5,get_onnx_runtime_sessions | |
from keywords import tokenize_sentence, get_multipartiterank_topics,get_topicrank_topics,get_yake_topics | |
from annotated_text import annotated_text | |
import nltk | |
nltk.download('stopwords') | |
nltk.download('wordnet') | |
nltk.download('punkt') | |
from nltk.corpus import stopwords,wordnet | |
from nltk.tokenize import sent_tokenize | |
import string | |
import subprocess | |
import logging | |
import multiprocessing | |
total_threads=multiprocessing.cpu_count() | |
import onnxruntime as ort | |
# from bertopic import BERTopic | |
from sklearn.feature_extraction.text import CountVectorizer | |
try: | |
import pke | |
logging.error("importing pke info") | |
except: | |
logging.error("installing pke info") | |
subprocess.run(['pip3', 'install','git+https://github.com/boudinfl/pke.git']) | |
subprocess.run(['python3' ,'-m' ,'spacy' ,'download' ,'en']) | |
import pke | |
session_options_ort = ort.SessionOptions() | |
session_options_ort.intra_op_num_threads=1 | |
session_options_ort.inter_op_num_threads=1 | |
st.set_page_config( # Alternate names: setup_page, page, layout | |
layout="wide", # Can be "centered" or "wide". In the future also "dashboard", etc. | |
initial_sidebar_state="auto", # Can be "auto", "expanded", "collapsed" | |
page_title='None', # String or None. Strings get appended with "• Streamlit". | |
) | |
def set_page_title(title): | |
st.sidebar.markdown(unsafe_allow_html=True, body=f""" | |
<iframe height=0 srcdoc="<script> | |
const title = window.parent.document.querySelector('title') \ | |
const oldObserver = window.parent.titleObserver | |
if (oldObserver) {{ | |
oldObserver.disconnect() | |
}} \ | |
const newObserver = new MutationObserver(function(mutations) {{ | |
const target = mutations[0].target | |
if (target.text !== '{title}') {{ | |
target.text = '{title}' | |
}} | |
}}) \ | |
newObserver.observe(title, {{ childList: true }}) | |
window.parent.titleObserver = newObserver \ | |
title.text = '{title}' | |
</script>" /> | |
""") | |
set_page_title('MCQ Generator') | |
import yaml | |
def read_yaml(file_path): | |
with open(file_path, "r") as f: | |
return yaml.safe_load(f) | |
config = read_yaml('config.yaml') | |
t5_chkpt=config['t5_normal']['chkpt'] | |
t5_model_path= config['t5_normal']['model_path'] | |
t5_tokenizer= config['t5_normal']['tokenizer'] | |
model_path_quanitzed=(f'{t5_model_path}/{t5_chkpt.split("/")[1]}-encoder-quantized.onnx', | |
f'{t5_model_path}/{t5_chkpt.split("/")[1]}-decoder-quantized.onnx', | |
f'{t5_model_path}/{t5_chkpt.split("/")[1]}-init-decoder-quantized.onnx' | |
) | |
model_session=get_onnx_runtime_sessions(model_paths=model_path_quanitzed,n_threads=1,parallel_exe_mode=False) | |
model_t5=OnnxT5(model_or_model_path=t5_chkpt,onnx_model_sessions=model_session) | |
tokenizer_t5=AutoTokenizer.from_pretrained(t5_tokenizer) | |
def create_question_t5(model,tokenizer,context,answer,max_length=64): | |
input = "context: %s answer: %s </s>" % (context, answer) | |
features=tokenizer([input],return_tensors='pt') | |
output=model.generate(input_ids=features['input_ids'], | |
attention_mask=features['attention_mask'], | |
max_length=max_length, | |
num_beams=3) | |
return tokenizer.decode(output.squeeze(), skip_special_tokens=True) | |
def create_answers_t5(model,tokenizer,context,question,max_length=128): | |
input = "context: %s question: %s </s>" % (context, question) | |
features=tokenizer([input],return_tensors='pt') | |
output=model.generate(input_ids=features['input_ids'], | |
attention_mask=features['attention_mask'], | |
max_length=max_length, | |
num_beams=3) | |
return tokenizer.decode(output.squeeze(), skip_special_tokens=True) | |
default_context = """Another important distinction is between companies that build enterprise products (B2B - business to business) and companies that build customer products (B2C - business to consumer). | |
B2B companies build products for organizations. Examples of enterprise products are Customer relationship management (CRM) software, project management tools, database management systems, cloud hosting services, etc. | |
B2C companies build products for individuals. Examples of consumer products are social networks, search engines, ride-sharing services, health trackers, etc. | |
Many companies do both -- their products can be used by individuals but they also offer plans for enterprise users. For example, Google Drive can be used by anyone but they also have Google Drive for Enterprise. | |
Even if a B2C company doesn’t create products for enterprises directly, they might still need to sell to enterprises. For example, Facebook’s main product is used by individuals but they sell ads to enterprises. Some might argue that this makes Facebook users products, as famously quipped: “If you’re not paying for it, you’re not the customer; you’re the product being sold.”""" | |
default_answer = "companies" | |
input_context = st.text_area(label='Input paragraph', height=300, max_chars=1000, value=default_context) | |
c1,c2,c3=st.columns(3) | |
with c1: | |
create_usingkeyword = st.button("Create Questions using Keywords") | |
if create_usingkeyword: | |
tokenized_sent = tokenize_sentence(input_context) | |
keywords_noun_adj_verb = get_multipartiterank_topics(input_context) | |
t5_questions=[] | |
with st.spinner("Creating Questionsssss"): | |
for k in keywords_noun_adj_verb: | |
question = create_question_t5(model=model_t5, | |
tokenizer=tokenizer_t5, | |
context=input_context, | |
answer=k) | |
t5_questions.append(question.split('question:')[1]) | |
for i,(quest,ans) in enumerate(zip(t5_questions,keywords_noun_adj_verb)): | |
st.write(f"{i + 1}: {quest}") | |
annotated_text("Answer is ", (ans, '', "#fea")) | |
st.markdown("---") | |
with c2: | |
create_usinglongformer = st.button("Create Questions using Longformer") | |
if create_usinglongformer: | |
pass | |