File size: 6,827 Bytes
a13e09c 163e27f b6ee5b6 a13e09c b6ee5b6 8791603 c32df89 a13e09c af7a4a8 247bb4a c92e112 a13e09c c92e112 a13e09c c579528 74d49c7 c31881a a628037 74d49c7 c92e112 b9725a5 8b003d9 5bb0bd3 446700a 247bb4a 446700a 247bb4a 446700a 247bb4a 446700a 247bb4a 4a9632c 8814bdf 247bb4a 53cb282 7b81380 7f487d3 7b81380 8f933e9 247bb4a 4a9632c a13e09c b6ee5b6 c2f2c32 030f3a6 c2f2c32 8f933e9 b6ee5b6 8f933e9 b6ee5b6 8f933e9 5ed6eff 8f933e9 8814bdf 8f933e9 5ed6eff 8f933e9 b6ee5b6 8f933e9 b6ee5b6 8f933e9 a13e09c 8791603 7ddf5eb 8791603 7c568a3 8f933e9 5ed6eff 7c568a3 8f933e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import re
from datetime import datetime
import psycopg2
import pandas as pd
from DbConnection import DbConnection
class SkillExtractorDetailsV1:
def GetSkillData(skill_extractor, inputData):
getdbskills = SkillExtractorDetailsV1.getSkills1()
print(getdbskills)
skills_list = []
annotations = skill_extractor.annotate(inputData)
matches = annotations['results']['full_matches']+annotations['results']['ngram_scored']
skill_Typelst = []
skill_Scorelst = []
reuiredSkilllst = []
softskillslst = []
goodtohavelst = []
ArchivedSkill=0
for result in matches:
ArchivedSkill=0
skill_id = result['skill_id']
skill_name1 = skill_extractor.skills_db[skill_id]['skill_name']
skill_name = skill_name1.split("(")[0].strip()
skill_type = skill_extractor.skills_db[skill_id]['skill_type']
skill_score = round(result['score'],2) * 100
#result = getdbskills[getdbskills['skilldetails'] == skill_name.upper()]
#print('Result1 -'+ result)
print('Skill - ' + skill_name.upper())
for row in getdbskills:
if(row['skills'].upper() == skill_name.upper()):
ArchivedSkill=1
break
if(ArchivedSkill == 0 and skill_name not in skills_list):
print('Weightage non skill-'+ skill_name)
print(skill_name)
print(skill_score)
print(skill_type)
skills_list.append(skill_name)
if(skill_score > 99 and skill_type != 'Soft Skill'):
reuiredSkilllst.append(skill_name)
elif(skill_score > 50 and skill_type == 'Soft Skill'):
softskillslst.append(skill_name)
elif(skill_score <= 50 and skill_type == 'Soft Skill'):
goodtohavelst.append(skill_name)
elif(skill_score <= 99 and skill_type != 'Soft Skill'):
goodtohavelst.append(skill_name)
# skills_list.append(skill_name)
# skill_Typelst.append(skill_type)
# skill_Scorelst.append(skill_score)
else:
print('Weightage zero skill-'+ skill_name)
# list_of_tuples = list(zip(skills_list, skill_Typelst,skill_Scorelst))
# df = pd.DataFrame(list_of_tuples,
# columns=['Skill_Name', 'Skill_Type','Skill_Score'])
newSkilllst = SkillExtractorDetailsV1.extractWordsNew(inputData)
reqSkill = ''
if(len(newSkilllst)>0):
reqSkill = ','.join(reuiredSkilllst) + ',' + ','.join(newSkilllst)
else:
reqSkill = ','.join(reuiredSkilllst)
data = {'Data':['Required Skills', 'Soft Skills', 'Good to have Skills'], 'Values':[reqSkill, ','.join(softskillslst), ','.join(goodtohavelst)]}
df = pd.DataFrame(data)
return df
def GetSkillDatafromDB():
conn = DbConnection.GetMySQLDbConnection()
query = "select skilldetails from skillmaster where weightage = 0"
df = pd.read_sql_query(query, conn)
return df
def extractWords(job_description):
job_roles = []
job_description = job_description.replace(')',' ')
delimiters = ",", " ", " , ", ";","\n","/","\\"
regex_pattern = '|'.join(map(re.escape, delimiters))
df = SkillExtractorDetailsV1.getNewSkills()
data = re.split(regex_pattern, job_description)
#data = job_description.split(',')
for ds in data:
#print(ds)
try:
if(SkillExtractorDetailsV1.skill_Validate(df,ds.strip())):
job_roles.append(ds)
#SkillExtractorDetails.GetSkillId(ds.strip(),JdMasterid,db_params)
print("Additional Skills : " + ds)
except Exception as error:
test = 1
return job_roles
def extractWordsNew(job_description):
job_roles = []
job_description = job_description.replace(')',' ')
delimiters = ",", " ", " , ", ";","\n","/","\\"
regex_pattern = '|'.join(map(re.escape, delimiters))
df = SkillExtractorDetailsV1.getNewSkills1()
data = re.split(regex_pattern, job_description)
#data = job_description.split(',')
for ds in data:
#print(ds)
try:
for row in df:
if(row['skills'].upper() == ds.strip().upper()):
if(ds.strip() not in job_roles):
job_roles.append(ds.strip())
print("Additional Skills : " + ds)
except Exception as error:
test = 1
return job_roles
def getNewSkills():
query = "select skillid,skilldetails,skilltype,skill_score from skillmaster where weightage = -2"
conn = DbConnection.GetMySQLDbConnection()
cursor = conn.cursor()
df_skill_master = pd.read_sql_query(query, conn)
df_skill_master['skilldetails'] = df_skill_master['skilldetails'].str.upper()
cursor.close()
# Close the connection
conn.close()
#print(df_skill_master)
return df_skill_master
def getSkills1():
conn = DbConnection.GetMySQLDbConnection()
cursor = conn.cursor()
cursor.execute("SELECT upper(skilldetails) skills FROM skillmaster where weightage = 0")
cursor.close()
# Close the connection
conn.close()
return cursor.fetchall()
#print(df_skill_master)
def getNewSkills1():
conn = DbConnection.GetMySQLDbConnection()
cursor = conn.cursor()
cursor.execute("SELECT upper(skilldetails) skills FROM skillmaster where weightage = -2")
cursor.close()
# Close the connection
conn.close()
return cursor.fetchall()
#print(df_skill_master)
def skill_Validate(df, skill):
skill = skill.upper()
if (len(skill.split()) < 2 and len(skill) < 3) or len(skill.split())==1:
df['skill_present'] = df['skilldetails'].apply(lambda x: re.match(rf'^{skill}$', x))
if any(df['skill_present']):
#print("Valid Skill")
return 1
else:
#print("Not a Skill")
return 0
elif df['skilldetails'].str.contains(skill.upper()).any():
#print("Valid Skill")
return 1
else:
# print("Not a Skill")
return 0 |