Spaces:

miesnerjacob
/

Multi-task-NLP

Sleeping

App Files Files Community

Multi-task-NLP / named_entity_recognition.py

miesnerjacob

changed welcome page text

59fcc9f over 2 years ago

raw

history blame

1.97 kB

	from transformers import AutoTokenizer, AutoModelForTokenClassification
	from transformers import pipeline


	class NamedEntityRecognition:
	""" Named Entity Recognition on text data

	Attributes:
	tokenizer: An instance of Hugging Face Tokenizer
	model: An instance of Hugging Face Model
	nlp: An instance of Hugging Face Named Entity Recognition pipeline
	"""

	def __init__(self):
	tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english")
	model = AutoModelForTokenClassification.from_pretrained("xlm-roberta-large-finetuned-conll03-english")
	self.nlp = pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)

	def get_annotation(self, preds, text):
	"""
	The function to add two Complex Numbers.

	Parameters:
	num (ComplexNumber): The complex number to be added.

	Returns:
	ComplexNumber: A complex number which contains the sum.
	"""

	splits = [0]
	entities = {}
	for i in preds:
	splits.append(i['start'])
	splits.append(i['end'])
	entities[i['word']] = i['entity_group']

	# Exclude bad preds
	exclude = ['', '.', '. ', ' ']
	for x in exclude:
	if x in entities.keys():
	entities.pop(x)

	parts = [text[i:j] for i, j in zip(splits, splits[1:] + [None])]

	final_annotation = [(x, entities[x], "") if x in entities.keys() else x for x in parts]

	return final_annotation

	def classify(self, text):
	"""
	The function to add two Complex Numbers.

	Parameters:
	num (ComplexNumber): The complex number to be added.

	Returns:
	ComplexNumber: A complex number which contains the sum.
	"""

	preds = self.nlp(text)
	ner_annotation = self.get_annotation(preds, text)
	return preds, ner_annotation