Spaces:
Sleeping
Sleeping
File size: 4,750 Bytes
a13e09c 163e27f b6ee5b6 a13e09c b6ee5b6 c2f2c32 a13e09c af7a4a8 247bb4a a13e09c c579528 c2f2c32 4a9632c 5bb0bd3 247bb4a 4a9632c 8814bdf 247bb4a b6ee5b6 7b81380 7f487d3 7b81380 8f933e9 247bb4a 4a9632c a13e09c b6ee5b6 c2f2c32 8f933e9 b6ee5b6 8f933e9 b6ee5b6 8f933e9 8814bdf 8f933e9 b6ee5b6 8f933e9 b6ee5b6 8f933e9 a13e09c 8f933e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import re
from datetime import datetime
import psycopg2
import pandas as pd
from DbConnection import DbConnection
class SkillExtractorDetailsV1:
def GetSkillData(skill_extractor, inputData):
getdbskills = SkillExtractorDetailsV1.GetSkillDatafromDB()
skills_list = []
annotations = skill_extractor.annotate(inputData)
matches = annotations['results']['full_matches']+annotations['results']['ngram_scored']
skill_Typelst = []
skill_Scorelst = []
reuiredSkilllst = []
softskillslst = []
goodtohavelst = []
for result in matches:
skill_id = result['skill_id']
skill_name1 = skill_extractor.skills_db[skill_id]['skill_name']
skill_name = skill_name1.split("(")[0].strip()
skill_type = skill_extractor.skills_db[skill_id]['skill_type']
skill_score = round(result['score'],2) * 100
result = getdbskills[getdbskills['skill'] == skill_name.upper()]
if(result.empty and skill_name not in skills_list):
skills_list.append(skill_name)
if(skill_score > 99 and skill_type != 'Soft Skill'):
reuiredSkilllst.append(skill_name)
elif(skill_score > 50 and skill_type == 'Soft Skill'):
softskillslst.append(skill_name)
elif(skill_score <= 50 and skill_type == 'Soft Skill'):
goodtohavelst.append(skill_name)
elif(skill_score <= 99 and skill_type != 'Soft Skill'):
goodtohavelst.append(skill_name)
# skills_list.append(skill_name)
# skill_Typelst.append(skill_type)
# skill_Scorelst.append(skill_score)
else:
print('Weightage zero skill-'+ skill_name)
# list_of_tuples = list(zip(skills_list, skill_Typelst,skill_Scorelst))
# df = pd.DataFrame(list_of_tuples,
# columns=['Skill_Name', 'Skill_Type','Skill_Score'])
newSkilllst = SkillExtractorDetailsV1.extractWords(inputData)
reqSkill = ''
if(len(newSkilllst)>0):
reqSkill = ','.join(reuiredSkilllst) + ',' + ','.join(newSkilllst)
else:
reqSkill = ','.join(reuiredSkilllst)
data = {'Data':['Required Skills', 'Soft Skills', 'Good to have Skills'], 'Values':[reqSkill, ','.join(softskillslst), ','.join(goodtohavelst)]}
df = pd.DataFrame(data)
return df
def GetSkillDatafromDB():
conn = DbConnection.GetMySQLDbConnection()
query = "select upper(skilldetails) skill from skillmaster where weightage = 0"
df = pd.read_sql_query(query, conn)
return df
def extractWords(job_description):
job_roles = []
job_description = job_description.replace(')',' ')
delimiters = ",", " ", " , ", ";","\n","/","\\"
regex_pattern = '|'.join(map(re.escape, delimiters))
df = SkillExtractorDetailsV1.getNewSkills()
data = re.split(regex_pattern, job_description)
#data = job_description.split(',')
for ds in data:
#print(ds)
try:
if(SkillExtractorDetailsV1.skill_Validate(df,ds.strip())):
job_roles.append(ds)
#SkillExtractorDetails.GetSkillId(ds.strip(),JdMasterid,db_params)
print("Additional Skills : " + ds)
except Exception as error:
test = 1
return job_roles
def getNewSkills():
query = "select skillid,skilldetails,skilltype,skill_score from skillmaster where weightage = -2"
conn = DbConnection.GetMySQLDbConnection()
cursor = conn.cursor()
df_skill_master = pd.read_sql_query(query, conn)
df_skill_master['skilldetails'] = df_skill_master['skilldetails'].str.upper()
cursor.close()
# Close the connection
conn.close()
#print(df_skill_master)
return df_skill_master
def skill_Validate(df, skill):
skill = skill.upper()
if (len(skill.split()) < 2 and len(skill) < 3) or len(skill.split())==1:
df['skill_present'] = df['skilldetails'].apply(lambda x: re.match(rf'^{skill}$', x))
if any(df['skill_present']):
#print("Valid Skill")
return 1
else:
#print("Not a Skill")
return 0
elif df['skilldetails'].str.contains(skill.upper()).any():
#print("Valid Skill")
return 1
else:
# print("Not a Skill")
return 0 |