File size: 4,750 Bytes
a13e09c
 
163e27f
 
b6ee5b6
a13e09c
 
 
b6ee5b6
 
c2f2c32
a13e09c
 
 
af7a4a8
 
247bb4a
 
 
a13e09c
 
 
 
 
c579528
c2f2c32
4a9632c
5bb0bd3
 
247bb4a
 
 
 
 
 
 
 
 
 
 
 
4a9632c
8814bdf
247bb4a
 
 
b6ee5b6
7b81380
 
7f487d3
7b81380
 
8f933e9
 
247bb4a
 
4a9632c
a13e09c
b6ee5b6
 
c2f2c32
 
 
 
8f933e9
b6ee5b6
8f933e9
 
 
 
b6ee5b6
8f933e9
 
 
 
 
 
 
 
8814bdf
8f933e9
 
 
 
b6ee5b6
8f933e9
b6ee5b6
8f933e9
 
 
 
 
 
a13e09c
8f933e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import re
from datetime import datetime
import psycopg2
import pandas as pd
from DbConnection import DbConnection
class SkillExtractorDetailsV1:


    def GetSkillData(skill_extractor, inputData):
        getdbskills = SkillExtractorDetailsV1.GetSkillDatafromDB()
        
        skills_list = []
        annotations = skill_extractor.annotate(inputData)
        matches = annotations['results']['full_matches']+annotations['results']['ngram_scored']
        skill_Typelst = []
        skill_Scorelst = []
        reuiredSkilllst = []
        softskillslst = []
        goodtohavelst = []
        for result in matches:
            skill_id = result['skill_id']
            skill_name1 = skill_extractor.skills_db[skill_id]['skill_name']
            skill_name = skill_name1.split("(")[0].strip()
            skill_type = skill_extractor.skills_db[skill_id]['skill_type']
            skill_score = round(result['score'],2) * 100
            result = getdbskills[getdbskills['skill'] == skill_name.upper()]
            
            if(result.empty and skill_name not in skills_list):
                skills_list.append(skill_name)    
                if(skill_score > 99 and skill_type != 'Soft  Skill'):
                    reuiredSkilllst.append(skill_name)
                elif(skill_score > 50 and skill_type == 'Soft  Skill'):
                    softskillslst.append(skill_name)
                elif(skill_score <= 50 and skill_type == 'Soft  Skill'):
                    goodtohavelst.append(skill_name)
                elif(skill_score <= 99 and skill_type != 'Soft  Skill'):
                    goodtohavelst.append(skill_name)

                # skills_list.append(skill_name)
                # skill_Typelst.append(skill_type)
                # skill_Scorelst.append(skill_score)
            else:
                print('Weightage zero skill-'+ skill_name)    
        # list_of_tuples = list(zip(skills_list, skill_Typelst,skill_Scorelst))        
        # df = pd.DataFrame(list_of_tuples,
        #           columns=['Skill_Name', 'Skill_Type','Skill_Score'])
        newSkilllst = SkillExtractorDetailsV1.extractWords(inputData)
        reqSkill = ''
        if(len(newSkilllst)>0):
            reqSkill = ','.join(reuiredSkilllst) + ','  + ','.join(newSkilllst)
        else:
            reqSkill = ','.join(reuiredSkilllst)    

        data = {'Data':['Required Skills', 'Soft Skills', 'Good to have Skills'], 'Values':[reqSkill, ','.join(softskillslst), ','.join(goodtohavelst)]}
        df = pd.DataFrame(data)
        
        return df        

    def GetSkillDatafromDB():
        conn = DbConnection.GetMySQLDbConnection()
        
        query = "select upper(skilldetails) skill from skillmaster where weightage = 0"
        df = pd.read_sql_query(query, conn)
        return df
    
    def extractWords(job_description):
        job_roles = []
        job_description = job_description.replace(')',' ')
        delimiters = ",", " ", " , ", ";","\n","/","\\"
        regex_pattern = '|'.join(map(re.escape, delimiters))
        df = SkillExtractorDetailsV1.getNewSkills()
        data = re.split(regex_pattern, job_description)
        #data = job_description.split(',')
        for ds in data:
            #print(ds)
            try:
                if(SkillExtractorDetailsV1.skill_Validate(df,ds.strip())):                
                    job_roles.append(ds) 
                    #SkillExtractorDetails.GetSkillId(ds.strip(),JdMasterid,db_params)
                    print("Additional Skills : " + ds)   
            except Exception as error:
                test = 1
        return job_roles 

    def getNewSkills():
        query = "select skillid,skilldetails,skilltype,skill_score from skillmaster where weightage = -2"
        conn = DbConnection.GetMySQLDbConnection()
        cursor = conn.cursor()
        df_skill_master = pd.read_sql_query(query, conn)
        df_skill_master['skilldetails'] = df_skill_master['skilldetails'].str.upper()
        cursor.close()
        # Close the connection
        conn.close()

        #print(df_skill_master)
        return df_skill_master
    def skill_Validate(df, skill):
        skill = skill.upper()
        if (len(skill.split()) < 2 and len(skill) < 3) or len(skill.split())==1:
            df['skill_present'] = df['skilldetails'].apply(lambda x: re.match(rf'^{skill}$', x))
            if any(df['skill_present']):
                #print("Valid Skill")
                return 1
            else:
                #print("Not a Skill")
                return 0
        elif df['skilldetails'].str.contains(skill.upper()).any():
            #print("Valid Skill")
            return 1
        else:
            # print("Not a Skill")
            return 0