|
import re |
|
from datetime import datetime |
|
import psycopg2 |
|
import pandas as pd |
|
from DbConnection import DbConnection |
|
class SkillExtractorDetailsV1: |
|
|
|
|
|
def GetSkillData(skill_extractor, inputData): |
|
getdbskills = SkillExtractorDetailsV1.GetSkillDatafromDB() |
|
|
|
skills_list = [] |
|
annotations = skill_extractor.annotate(inputData) |
|
matches = annotations['results']['full_matches']+annotations['results']['ngram_scored'] |
|
skill_Typelst = [] |
|
skill_Scorelst = [] |
|
reuiredSkilllst = [] |
|
softskillslst = [] |
|
goodtohavelst = [] |
|
for result in matches: |
|
skill_id = result['skill_id'] |
|
skill_name1 = skill_extractor.skills_db[skill_id]['skill_name'] |
|
skill_name = skill_name1.split("(")[0].strip() |
|
skill_type = skill_extractor.skills_db[skill_id]['skill_type'] |
|
skill_score = round(result['score'],2) * 100 |
|
result = getdbskills[getdbskills['skill'] == skill_name.upper()] |
|
|
|
if(result.empty and skill_name not in skills_list): |
|
skills_list.append(skill_name) |
|
if(skill_score > 99 and skill_type != 'Soft Skill'): |
|
reuiredSkilllst.append(skill_name) |
|
elif(skill_score > 50 and skill_type == 'Soft Skill'): |
|
softskillslst.append(skill_name) |
|
elif(skill_score <= 50 and skill_type == 'Soft Skill'): |
|
goodtohavelst.append(skill_name) |
|
elif(skill_score <= 99 and skill_type != 'Soft Skill'): |
|
goodtohavelst.append(skill_name) |
|
|
|
|
|
|
|
|
|
else: |
|
print('Weightage zero skill-'+ skill_name) |
|
|
|
|
|
|
|
newSkilllst = SkillExtractorDetailsV1.extractWords(inputData) |
|
reqSkill = '' |
|
if(len(newSkilllst)>0): |
|
reqSkill = ','.join(reuiredSkilllst) + ',' + ','.join(newSkilllst) |
|
else: |
|
reqSkill = ','.join(reuiredSkilllst) |
|
|
|
data = {'Data':['Required Skills', 'Soft Skills', 'Good to have Skills'], 'Values':[reqSkill, ','.join(softskillslst), ','.join(goodtohavelst)]} |
|
df = pd.DataFrame(data) |
|
|
|
return df |
|
|
|
def GetSkillDatafromDB(): |
|
conn = DbConnection.GetMySQLDbConnection() |
|
|
|
query = "select upper(skilldetails) skill from skillmaster where weightage = 0" |
|
df = pd.read_sql_query(query, conn) |
|
return df |
|
|
|
def extractWords(job_description): |
|
job_roles = [] |
|
job_description = job_description.replace(')',' ') |
|
delimiters = ",", " ", " , ", ";","\n","/","\\" |
|
regex_pattern = '|'.join(map(re.escape, delimiters)) |
|
df = SkillExtractorDetailsV1.getNewSkills() |
|
data = re.split(regex_pattern, job_description) |
|
|
|
for ds in data: |
|
|
|
try: |
|
if(SkillExtractorDetailsV1.skill_Validate(df,ds.strip())): |
|
job_roles.append(ds) |
|
|
|
print("Additional Skills : " + ds) |
|
except Exception as error: |
|
test = 1 |
|
return job_roles |
|
|
|
def getNewSkills(): |
|
query = "select skillid,skilldetails,skilltype,skill_score from skillmaster where weightage = -2" |
|
conn = DbConnection.GetMySQLDbConnection() |
|
cursor = conn.cursor() |
|
df_skill_master = pd.read_sql_query(query, conn) |
|
df_skill_master['skilldetails'] = df_skill_master['skilldetails'].str.upper() |
|
cursor.close() |
|
|
|
conn.close() |
|
|
|
|
|
return df_skill_master |
|
def skill_Validate(df, skill): |
|
skill = skill.upper() |
|
if (len(skill.split()) < 2 and len(skill) < 3) or len(skill.split())==1: |
|
df['skill_present'] = df['skilldetails'].apply(lambda x: re.match(rf'^{skill}$', x)) |
|
if any(df['skill_present']): |
|
|
|
return 1 |
|
else: |
|
|
|
return 0 |
|
elif df['skilldetails'].str.contains(skill.upper()).any(): |
|
|
|
return 1 |
|
else: |
|
|
|
return 0 |