Spaces:

abhibisht89
/

CV_MATCHER

Runtime error

App Files Files Community

CV_MATCHER / app.py

abhibisht89

Update app.py

9c34257 over 2 years ago

raw

history blame contribute delete

3.25 kB

	import spacy
	import gradio as gr
	from spacy.pipeline import EntityRuler
	from spacy import displacy
	import jsonlines
	from spacy.cli import download
	download('en_core_web_sm')
	nlp = spacy.load('en_core_web_sm')

	# Create list with entity labels from jsonl file
	with jsonlines.open("skill_patterns.jsonl") as f:
	created_entities = [line['label'].upper() for line in f.iter()]

	def extract_text_from_word(txt):
	'''Opens en reads in a .doc or .docx file from path'''
	return txt.replace('\n', ' ').replace('\t', ' ').lower()

	def add_newruler_to_pipeline(skill_pattern_path):
	'''Reads in all created patterns from a JSONL file and adds it to the pipeline after PARSER and before NER'''

	# new_ruler = EntityRuler(nlp).from_disk(skill_pattern_path)
	ruler=nlp.add_pipe("entity_ruler",after='parser')
	ruler.from_disk(skill_pattern_path) # loads patterns only

	def create_skill_set(doc):
	'''Create a set of the extracted skill entities of a doc'''

	return set([ent.label_.upper()[6:] for ent in doc.ents if 'skill' in ent.label_.lower()])

	def create_skillset_dict(resume_names, resume_texts):
	'''Create a dictionary containing a set of the extracted skills. Name is key, matching skillset is value'''
	skillsets = [create_skill_set(resume_text) for resume_text in resume_texts]
	return dict(zip(resume_names, skillsets))

	def match_skills(vacature_set, cv_set, resume_name):
	'''Get intersection of resume skills and job offer skills and return match percentage'''

	if len(vacature_set) < 1:
	print('could not extract skills from job offer text')
	else:
	pct_match = round(len(vacature_set.intersection(cv_set[resume_name])) / len(vacature_set) * 100, 0)
	print(resume_name + " has a {}% skill match on this job offer".format(pct_match))
	print('Required skills: {} '.format(vacature_set))
	print('Matched skills: {} \n'.format(vacature_set.intersection(cv_set[resume_name])))

	return (resume_name, pct_match)

	add_newruler_to_pipeline("skill_patterns.jsonl")

	def match(CV,JD):
	resume_texts=[]
	resume_texts.append(nlp(CV))
	resume_names=['ABHI']
	skillset_dict = create_skillset_dict(resume_names, resume_texts)
	jd_skillset = create_skill_set(nlp(JD))
	match_pairs = [match_skills(jd_skillset, skillset_dict, name) for name in skillset_dict.keys()]
	if match_pairs[0]:
	return match_pairs[0][1]
	else:
	return "No matching skill set."

	exp=["Who is steve jobs?","What is coldplay?","What is a turing test?","What is the most interesting thing about our universe?","What are the most beautiful places on earth?"]

	desc="A Machine Learning Based Resume Matcher, to compare Resumes with Job Descriptions. "

	inp1=gr.inputs.Textbox(lines=10, placeholder=None, default="", label="Resume Details")
	inp2=gr.inputs.Textbox(lines=10, placeholder=None, default="", label="Job Description")

	out=gr.outputs.Textbox(type="auto",label="Match Score")

	iface = gr.Interface(fn=match, inputs=[inp1,inp2], outputs=out,title="A Machine Learning Based Resume Matcher, to compare Resumes with Job Descriptions",article=desc,theme="huggingface",layout='vertical')
	iface.launch(debug=True)