Spaces:
Sleeping
Sleeping
import re | |
from datetime import datetime | |
import psycopg2 | |
import pandas as pd | |
from DbConnection import DbConnection | |
class SkillExtractorDetailsV1: | |
def GetSkillData(skill_extractor, inputData): | |
getdbskills = SkillExtractorDetailsV1.GetSkillDatafromDB() | |
skills_list = [] | |
annotations = skill_extractor.annotate(inputData) | |
matches = annotations['results']['full_matches']+annotations['results']['ngram_scored'] | |
skill_Typelst = [] | |
skill_Scorelst = [] | |
reuiredSkilllst = [] | |
softskillslst = [] | |
goodtohavelst = [] | |
for result in matches: | |
skill_id = result['skill_id'] | |
skill_name1 = skill_extractor.skills_db[skill_id]['skill_name'] | |
skill_name = skill_name1.split("(")[0].strip() | |
skill_type = skill_extractor.skills_db[skill_id]['skill_type'] | |
skill_score = round(result['score'],2) * 100 | |
result = getdbskills[getdbskills['skill'] == skill_name.upper()] | |
if(result.empty and skill_name not in skills_list): | |
skills_list.append(skill_name) | |
if(skill_score > 99 and skill_type != 'Soft Skill'): | |
reuiredSkilllst.append(skill_name) | |
elif(skill_score > 50 and skill_type == 'Soft Skill'): | |
softskillslst.append(skill_name) | |
elif(skill_score <= 50 and skill_type == 'Soft Skill'): | |
goodtohavelst.append(skill_name) | |
elif(skill_score <= 99 and skill_type != 'Soft Skill'): | |
goodtohavelst.append(skill_name) | |
# skills_list.append(skill_name) | |
# skill_Typelst.append(skill_type) | |
# skill_Scorelst.append(skill_score) | |
else: | |
print('Weightage zero skill-'+ skill_name) | |
# list_of_tuples = list(zip(skills_list, skill_Typelst,skill_Scorelst)) | |
# df = pd.DataFrame(list_of_tuples, | |
# columns=['Skill_Name', 'Skill_Type','Skill_Score']) | |
newSkilllst = SkillExtractorDetailsV1.extractWords(inputData) | |
reqSkill = '' | |
if(len(newSkilllst)>0): | |
reqSkill = ','.join(reuiredSkilllst) + ',' + ','.join(newSkilllst) | |
else: | |
reqSkill = ','.join(reuiredSkilllst) | |
data = {'Data':['Required Skills', 'Soft Skills', 'Good to have Skills'], 'Values':[reqSkill, ','.join(softskillslst), ','.join(goodtohavelst)]} | |
df = pd.DataFrame(data) | |
return df | |
def GetSkillDatafromDB(): | |
conn = DbConnection.GetMySQLDbConnection() | |
query = "select upper(skilldetails) skill from skillmaster where weightage = 0" | |
df = pd.read_sql_query(query, conn) | |
return df | |
def extractWords(job_description): | |
job_roles = [] | |
job_description = job_description.replace(')',' ') | |
delimiters = ",", " ", " , ", ";","\n","/","\\" | |
regex_pattern = '|'.join(map(re.escape, delimiters)) | |
df = SkillExtractorDetailsV1.getNewSkills() | |
data = re.split(regex_pattern, job_description) | |
#data = job_description.split(',') | |
for ds in data: | |
#print(ds) | |
try: | |
if(SkillExtractorDetailsV1.skill_Validate(df,ds.strip())): | |
job_roles.append(ds) | |
#SkillExtractorDetails.GetSkillId(ds.strip(),JdMasterid,db_params) | |
print("Additional Skills : " + ds) | |
except Exception as error: | |
test = 1 | |
return job_roles | |
def getNewSkills(): | |
query = "select skillid,skilldetails,skilltype,skill_score from skillmaster where weightage = -2" | |
conn = DbConnection.GetMySQLDbConnection() | |
cursor = conn.cursor() | |
df_skill_master = pd.read_sql_query(query, conn) | |
df_skill_master['skilldetails'] = df_skill_master['skilldetails'].str.upper() | |
cursor.close() | |
# Close the connection | |
conn.close() | |
#print(df_skill_master) | |
return df_skill_master | |
def skill_Validate(df, skill): | |
skill = skill.upper() | |
if (len(skill.split()) < 2 and len(skill) < 3) or len(skill.split())==1: | |
df['skill_present'] = df['skilldetails'].apply(lambda x: re.match(rf'^{skill}$', x)) | |
if any(df['skill_present']): | |
#print("Valid Skill") | |
return 1 | |
else: | |
#print("Not a Skill") | |
return 0 | |
elif df['skilldetails'].str.contains(skill.upper()).any(): | |
#print("Valid Skill") | |
return 1 | |
else: | |
# print("Not a Skill") | |
return 0 |