Spaces:

ashishraics
/

MCQ-Generator

Runtime error

App Files Files Community

MCQ-Generator / keywords.py

ashishraics

requiremtn fix

51fb126 over 2 years ago

raw

history blame contribute delete

2.71 kB

	import nltk
	nltk.download('stopwords')
	nltk.download('wordnet')
	nltk.download('punkt')
	from nltk.corpus import stopwords,wordnet
	from nltk.tokenize import sent_tokenize
	import string
	import subprocess
	import logging

	try:
	import pke
	logging.error("importing pke info")
	except:
	logging.error("installing pke info")
	subprocess.run(['pip3', 'install','git+https://github.com/boudinfl/pke.git'])
	subprocess.run(['python3' ,'-m' ,'spacy' ,'download' ,'en'])
	import pke

	stoplist = list(string.punctuation)
	stoplist += pke.lang.stopwords.get('en')
	stoplist += ['-lrb-', '-rrb-', '-lcb-', '-rcb-', '-lsb-', '-rsb-']
	stoplist += stopwords.words('english')

	def tokenize_sentence(text):
	sentences=sent_tokenize(text)
	sentences=[s.strip().lstrip().rstrip() for s in sentences if len(s) > 20]
	return sentences

	def get_multipartiterank_topics(text):
	output = []
	try:
	extractor = pke.unsupervised.MultipartiteRank()
	extractor.load_document(input=text, language='en',normalization=None,stoplist=stoplist)
	# keyphrase candidate selection #'ADJ' 'ADP' 'ADV' 'AUX' 'DET' 'NOUN' 'NUM' 'PART' 'PROPN' 'PUNCT' 'VERB'
	extractor.candidate_selection(pos={'NOUN','VERB','ADJ'})
	extractor.candidate_weighting(threshold=0.7,method='average',alpha=1.1)
	keyphrases = extractor.get_n_best(n=5)

	for val in keyphrases:
	output.append(val[0])
	except Exception as e:
	print("found exception",e)
	return list(set(output))

	def get_topicrank_topics(text):
	output = []
	try:
	extractor = pke.unsupervised.TopicRank()
	extractor.load_document(input=text, language='en',normalization=None,stoplist=stoplist)
	# keyphrase candidate selection #'ADJ' 'ADP' 'ADV' 'AUX' 'DET' 'NOUN' 'NUM' 'PART' 'PROPN' 'PUNCT' 'VERB'
	extractor.candidate_selection(pos={'NOUN', 'ADJ'})
	extractor.candidate_weighting(threshold=0.7,method='average')
	keyphrases = extractor.get_n_best(n=5)

	for val in keyphrases:
	output.append(val[0])
	except Exception as e:
	print("found exception",e)
	return list(set(output))

	def get_yake_topics(text):
	#statistics model --very poor performance
	output = []
	try:
	extractor = pke.unsupervised.YAKE()
	extractor.load_document(input=text, language='en',normalization=None,stoplist=stoplist)
	extractor.candidate_selection(n=3)
	extractor.candidate_weighting(window=2)
	keyphrases = extractor.get_n_best(n=5,threshold=0.9)

	for val in keyphrases:
	output.append(val[0])
	except Exception as e:
	print("found exception",e)
	return list(set(output))