Spaces:
Build error
Build error
from Models import Models | |
class ResumeSegmenter: | |
def __init__(self, zero_shot_classifier): | |
self.zero_shot_classifier = zero_shot_classifier | |
objective = ( | |
'career goal', | |
'objective', | |
'career objective', | |
'employment objective', | |
'professional objective', | |
'summary', | |
'summary of qualifications', | |
'digital' | |
) | |
work_and_employment = ( | |
'employment history', | |
'employment data', | |
'career summary', | |
'work history', | |
'work experience', | |
'experience', | |
'professional experience', | |
'professional background', | |
'professional employment', | |
'additional experience', | |
'career related experience', | |
"professional employment history", | |
'related experience', | |
'programming experience', | |
'freelance', | |
'freelance experience', | |
'army experience', | |
'military experience', | |
'military background', | |
) | |
education_and_training = ( | |
'academic background', | |
'academic experience', | |
'programs', | |
'courses', | |
'related courses', | |
'education', | |
'educational background', | |
'educational qualifications', | |
'educational training', | |
'education and training', | |
'training', | |
'academic training', | |
'professional training', | |
'course project experience', | |
'related course projects', | |
'internship experience', | |
'internships', | |
'apprenticeships', | |
'college activities', | |
'certifications', | |
'special training', | |
) | |
skills_header = ( | |
'credentials', | |
'qualifications', | |
'areas of experience', | |
'areas of expertise', | |
'areas of knowledge', | |
'skills', | |
"other skills", | |
"other abilities", | |
'career related skills', | |
'professional skills', | |
'specialized skills', | |
'technical skills', | |
'computer skills', | |
'personal skills', | |
'computer knowledge', | |
'technologies', | |
'technical experience', | |
'proficiencies', | |
'languages', | |
'language competencies and skills', | |
'programming languages', | |
'competencies' | |
) | |
misc = ( | |
'activities and honors', | |
'activities', | |
'affiliations', | |
'professional affiliations', | |
'associations', | |
'professional associations', | |
'memberships', | |
'professional memberships', | |
'athletic involvement', | |
'community involvement', | |
'refere', | |
'civic activities', | |
'extra-Curricular activities', | |
'professional activities', | |
'volunteer work', | |
'volunteer experience', | |
'additional information', | |
'interests' | |
) | |
accomplishments = ( | |
'achievement', | |
'awards and achievements', | |
'licenses', | |
'presentations', | |
'conference presentations', | |
'conventions', | |
'dissertations', | |
'exhibits', | |
'papers', | |
'publications', | |
'professional publications', | |
'research experience', | |
'research grants', | |
'project', | |
'research projects', | |
'personal projects', | |
'current research interests', | |
'thesis', | |
'theses', | |
) | |
def find_segment_indices(self, string_to_search, resume_segments, resume_indices): | |
for i, line in enumerate(string_to_search): | |
if line[0].islower(): | |
continue | |
header = line.lower() | |
if [o for o in self.objective if header.startswith(o)]: | |
try: | |
resume_segments['objective'][header] | |
except: | |
resume_indices.append(i) | |
header = [o for o in self.objective if header.startswith(o)][0] | |
resume_segments['objective'][header] = i | |
elif [w for w in self.work_and_employment if header.startswith(w)]: | |
try: | |
resume_segments['work_and_employment'][header] | |
except: | |
resume_indices.append(i) | |
header = [w for w in self.work_and_employment if header.startswith(w)][0] | |
resume_segments['work_and_employment'][header] = i | |
elif [e for e in self.education_and_training if header.startswith(e)]: | |
try: | |
resume_segments['education_and_training'][header] | |
except: | |
resume_indices.append(i) | |
header = [e for e in self.education_and_training if header.startswith(e)][0] | |
resume_segments['education_and_training'][header] = i | |
elif [s for s in self.skills_header if header.startswith(s)]: | |
try: | |
resume_segments['skills'][header] | |
except: | |
resume_indices.append(i) | |
header = [s for s in self.skills_header if header.startswith(s)][0] | |
resume_segments['skills'][header] = i | |
elif [m for m in self.misc if header.startswith(m)]: | |
try: | |
resume_segments['misc'][header] | |
except: | |
resume_indices.append(i) | |
header = [m for m in self.misc if header.startswith(m)][0] | |
resume_segments['misc'][header] = i | |
elif [a for a in self.accomplishments if header.startswith(a)]: | |
try: | |
resume_segments['accomplishments'][header] | |
except: | |
resume_indices.append(i) | |
header = [a for a in self.accomplishments if header.startswith(a)][0] | |
resume_segments['accomplishments'][header] = i | |
def slice_segments(self, string_to_search, resume_segments, resume_indices): | |
resume_segments['contact_info'] = string_to_search[:resume_indices[0]] | |
sec_idxs = {} | |
for section, value in resume_segments.items(): | |
if section == 'contact_info': | |
continue | |
for sub_section, start_idx in value.items(): | |
end_idx = len(string_to_search) | |
if (resume_indices.index(start_idx) + 1) != len(resume_indices): | |
end_idx = resume_indices[resume_indices.index(start_idx) + 1] | |
sec_idxs[section] = (start_idx, end_idx) | |
# print(start_idx, end_idx) | |
resume_segments[section][sub_section] = string_to_search[start_idx:end_idx] | |
return sec_idxs | |
def find_true_segment(self, dict_of_segments, segment_name): | |
segment_classes = { | |
'objective': ["objective", "other"], | |
'work_and_employment':["employment history", "other"], | |
'education_and_training': ["education", "other"], | |
'skills': ["skills", "other"], | |
'accomplishments': ["accomplishments", "other"], | |
'misc': ["misc", "other"], | |
'contact_info': ["contact information", "other"] | |
} | |
classes = segment_classes[segment_name] | |
scores = [] | |
segs = dict_of_segments.keys() | |
for seg in segs: | |
sequence = dict_of_segments[seg] | |
score = self.zero_shot_classifier(' '.join(sequence), classes)["scores"][0] | |
scores.append(score) | |
res = sorted(zip(dict_of_segments.keys(), scores), key=lambda x: x[1], reverse=True) | |
if len(res): | |
return res[0][0] | |
else: return 0 | |
def segment(self, string_to_search): | |
print("Segmenting the Resume..") | |
resume_segments = { | |
'objective': {}, | |
'work_and_employment': {}, | |
'education_and_training': {}, | |
'skills': {}, | |
'accomplishments': {}, | |
'misc': {} | |
} | |
resume_indices = [] | |
self.find_segment_indices(string_to_search, resume_segments, resume_indices) | |
if len(resume_indices) != 0: | |
sec_idx = self.slice_segments(string_to_search, resume_segments, resume_indices) | |
else: | |
resume_segments['contact_info'] = [] | |
for segment in resume_segments: | |
if segment == "contact_info": continue | |
if not len(resume_segments[segment]) > 1: | |
if len(resume_segments[segment]) == 1: | |
only_key = list(resume_segments[segment].keys())[0] | |
resume_segments[segment] = resume_segments[segment][only_key][1:] | |
continue | |
if segment != "work_and_employment": continue | |
true_seg = self.find_true_segment(resume_segments[segment], segment) | |
if not true_seg: | |
resume_segments[segment] = [] | |
else: | |
resume_segments[segment] = resume_segments[segment][true_seg][1:] | |
return resume_segments | |