import re from datetime import datetime import psycopg2 import pandas as pd from DbConnection import DbConnection class SkillExtractorDetailsV1: def GetSkillData(skill_extractor, inputData): getdbskills = SkillExtractorDetailsV1.getSkills1() print(getdbskills) skills_list = [] annotations = skill_extractor.annotate(inputData) matches = annotations['results']['full_matches']+annotations['results']['ngram_scored'] skill_Typelst = [] skill_Scorelst = [] reuiredSkilllst = [] softskillslst = [] goodtohavelst = [] ArchivedSkill=0 for result in matches: ArchivedSkill=0 skill_id = result['skill_id'] skill_name1 = skill_extractor.skills_db[skill_id]['skill_name'] skill_name = skill_name1.split("(")[0].strip() skill_type = skill_extractor.skills_db[skill_id]['skill_type'] skill_score = round(result['score'],2) * 100 #result = getdbskills[getdbskills['skilldetails'] == skill_name.upper()] #print('Result1 -'+ result) print('Skill - ' + skill_name.upper()) for row in getdbskills: if(row['skills'].upper() == skill_name.upper()): ArchivedSkill=1 break if(ArchivedSkill == 0 and skill_name not in skills_list): print('Weightage non skill-'+ skill_name) print(skill_name) print(skill_score) print(skill_type) skills_list.append(skill_name) if(skill_score > 99 and skill_type != 'Soft Skill'): reuiredSkilllst.append(skill_name) elif(skill_score > 50 and skill_type == 'Soft Skill'): softskillslst.append(skill_name) elif(skill_score <= 50 and skill_type == 'Soft Skill'): goodtohavelst.append(skill_name) elif(skill_score <= 99 and skill_type != 'Soft Skill'): goodtohavelst.append(skill_name) # skills_list.append(skill_name) # skill_Typelst.append(skill_type) # skill_Scorelst.append(skill_score) else: print('Weightage zero skill-'+ skill_name) # list_of_tuples = list(zip(skills_list, skill_Typelst,skill_Scorelst)) # df = pd.DataFrame(list_of_tuples, # columns=['Skill_Name', 'Skill_Type','Skill_Score']) newSkilllst = SkillExtractorDetailsV1.extractWordsNew(inputData) reqSkill = '' if(len(newSkilllst)>0): reqSkill = ','.join(reuiredSkilllst) + ',' + ','.join(newSkilllst) else: reqSkill = ','.join(reuiredSkilllst) data = {'Data':['Required Skills', 'Soft Skills', 'Good to have Skills'], 'Values':[reqSkill, ','.join(softskillslst), ','.join(goodtohavelst)]} df = pd.DataFrame(data) return df def GetSkillDatafromDB(): conn = DbConnection.GetMySQLDbConnection() query = "select skilldetails from skillmaster where weightage = 0" df = pd.read_sql_query(query, conn) return df def extractWords(job_description): job_roles = [] job_description = job_description.replace(')',' ') delimiters = ",", " ", " , ", ";","\n","/","\\" regex_pattern = '|'.join(map(re.escape, delimiters)) df = SkillExtractorDetailsV1.getNewSkills() data = re.split(regex_pattern, job_description) #data = job_description.split(',') for ds in data: #print(ds) try: if(SkillExtractorDetailsV1.skill_Validate(df,ds.strip())): job_roles.append(ds) #SkillExtractorDetails.GetSkillId(ds.strip(),JdMasterid,db_params) print("Additional Skills : " + ds) except Exception as error: test = 1 return job_roles def extractWordsNew(job_description): job_roles = [] job_description = job_description.replace(')',' ') delimiters = ",", " ", " , ", ";","\n","/","\\" regex_pattern = '|'.join(map(re.escape, delimiters)) df = SkillExtractorDetailsV1.getNewSkills1() data = re.split(regex_pattern, job_description) #data = job_description.split(',') for ds in data: #print(ds) try: for row in df: if(row['skills'].upper() == ds.strip().upper()): if(ds.strip() not in job_roles): job_roles.append(ds.strip()) print("Additional Skills : " + ds) except Exception as error: test = 1 return job_roles def getNewSkills(): query = "select skillid,skilldetails,skilltype,skill_score from skillmaster where weightage = -2" conn = DbConnection.GetMySQLDbConnection() cursor = conn.cursor() df_skill_master = pd.read_sql_query(query, conn) df_skill_master['skilldetails'] = df_skill_master['skilldetails'].str.upper() cursor.close() # Close the connection conn.close() #print(df_skill_master) return df_skill_master def getSkills1(): conn = DbConnection.GetMySQLDbConnection() cursor = conn.cursor() cursor.execute("SELECT upper(skilldetails) skills FROM skillmaster where weightage = 0") cursor.close() # Close the connection conn.close() return cursor.fetchall() #print(df_skill_master) def getNewSkills1(): conn = DbConnection.GetMySQLDbConnection() cursor = conn.cursor() cursor.execute("SELECT upper(skilldetails) skills FROM skillmaster where weightage = -2") cursor.close() # Close the connection conn.close() return cursor.fetchall() #print(df_skill_master) def skill_Validate(df, skill): skill = skill.upper() if (len(skill.split()) < 2 and len(skill) < 3) or len(skill.split())==1: df['skill_present'] = df['skilldetails'].apply(lambda x: re.match(rf'^{skill}$', x)) if any(df['skill_present']): #print("Valid Skill") return 1 else: #print("Not a Skill") return 0 elif df['skilldetails'].str.contains(skill.upper()).any(): #print("Valid Skill") return 1 else: # print("Not a Skill") return 0