ResumeAPI / SkillExtractV1.py
Vaibhav84's picture
Change
b6ee5b6
raw
history blame
4.75 kB
import re
from datetime import datetime
import psycopg2
import pandas as pd
from DbConnection import DbConnection
class SkillExtractorDetailsV1:
def GetSkillData(skill_extractor, inputData):
getdbskills = SkillExtractorDetailsV1.GetSkillDatafromDB()
skills_list = []
annotations = skill_extractor.annotate(inputData)
matches = annotations['results']['full_matches']+annotations['results']['ngram_scored']
skill_Typelst = []
skill_Scorelst = []
reuiredSkilllst = []
softskillslst = []
goodtohavelst = []
for result in matches:
skill_id = result['skill_id']
skill_name1 = skill_extractor.skills_db[skill_id]['skill_name']
skill_name = skill_name1.split("(")[0].strip()
skill_type = skill_extractor.skills_db[skill_id]['skill_type']
skill_score = round(result['score'],2) * 100
result = getdbskills[getdbskills['skill'] == skill_name.upper()]
if(result.empty and skill_name not in skills_list):
skills_list.append(skill_name)
if(skill_score > 99 and skill_type != 'Soft Skill'):
reuiredSkilllst.append(skill_name)
elif(skill_score > 50 and skill_type == 'Soft Skill'):
softskillslst.append(skill_name)
elif(skill_score <= 50 and skill_type == 'Soft Skill'):
goodtohavelst.append(skill_name)
elif(skill_score <= 99 and skill_type != 'Soft Skill'):
goodtohavelst.append(skill_name)
# skills_list.append(skill_name)
# skill_Typelst.append(skill_type)
# skill_Scorelst.append(skill_score)
else:
print('Weightage zero skill-'+ skill_name)
# list_of_tuples = list(zip(skills_list, skill_Typelst,skill_Scorelst))
# df = pd.DataFrame(list_of_tuples,
# columns=['Skill_Name', 'Skill_Type','Skill_Score'])
newSkilllst = SkillExtractorDetailsV1.extractWords(inputData)
reqSkill = ''
if(len(newSkilllst)>0):
reqSkill = ','.join(reuiredSkilllst) + ',' + ','.join(newSkilllst)
else:
reqSkill = ','.join(reuiredSkilllst)
data = {'Data':['Required Skills', 'Soft Skills', 'Good to have Skills'], 'Values':[reqSkill, ','.join(softskillslst), ','.join(goodtohavelst)]}
df = pd.DataFrame(data)
return df
def GetSkillDatafromDB():
conn = DbConnection.GetMySQLDbConnection()
query = "select upper(skilldetails) skill from skillmaster where weightage = 0"
df = pd.read_sql_query(query, conn)
return df
def extractWords(job_description):
job_roles = []
job_description = job_description.replace(')',' ')
delimiters = ",", " ", " , ", ";","\n","/","\\"
regex_pattern = '|'.join(map(re.escape, delimiters))
df = SkillExtractorDetailsV1.getNewSkills()
data = re.split(regex_pattern, job_description)
#data = job_description.split(',')
for ds in data:
#print(ds)
try:
if(SkillExtractorDetailsV1.skill_Validate(df,ds.strip())):
job_roles.append(ds)
#SkillExtractorDetails.GetSkillId(ds.strip(),JdMasterid,db_params)
print("Additional Skills : " + ds)
except Exception as error:
test = 1
return job_roles
def getNewSkills():
query = "select skillid,skilldetails,skilltype,skill_score from skillmaster where weightage = -2"
conn = DbConnection.GetMySQLDbConnection()
cursor = conn.cursor()
df_skill_master = pd.read_sql_query(query, conn)
df_skill_master['skilldetails'] = df_skill_master['skilldetails'].str.upper()
cursor.close()
# Close the connection
conn.close()
#print(df_skill_master)
return df_skill_master
def skill_Validate(df, skill):
skill = skill.upper()
if (len(skill.split()) < 2 and len(skill) < 3) or len(skill.split())==1:
df['skill_present'] = df['skilldetails'].apply(lambda x: re.match(rf'^{skill}$', x))
if any(df['skill_present']):
#print("Valid Skill")
return 1
else:
#print("Not a Skill")
return 0
elif df['skilldetails'].str.contains(skill.upper()).any():
#print("Valid Skill")
return 1
else:
# print("Not a Skill")
return 0