File size: 6,827 Bytes
a13e09c
 
163e27f
 
b6ee5b6
a13e09c
 
 
b6ee5b6
8791603
c32df89
a13e09c
 
 
af7a4a8
 
247bb4a
 
 
c92e112
a13e09c
c92e112
a13e09c
 
 
 
c579528
74d49c7
c31881a
a628037
74d49c7
c92e112
 
 
 
 
b9725a5
 
 
8b003d9
5bb0bd3
446700a
247bb4a
446700a
247bb4a
446700a
247bb4a
446700a
247bb4a
 
 
 
 
4a9632c
8814bdf
247bb4a
 
 
53cb282
7b81380
 
7f487d3
7b81380
 
8f933e9
 
247bb4a
 
4a9632c
a13e09c
b6ee5b6
 
c2f2c32
030f3a6
c2f2c32
 
8f933e9
b6ee5b6
8f933e9
 
 
 
b6ee5b6
8f933e9
 
 
 
 
5ed6eff
8f933e9
 
 
8814bdf
8f933e9
 
 
5ed6eff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f933e9
b6ee5b6
8f933e9
b6ee5b6
8f933e9
 
 
 
 
 
a13e09c
8791603
 
 
 
 
7ddf5eb
8791603
 
 
7c568a3
8f933e9
5ed6eff
 
 
 
 
 
 
 
 
7c568a3
8f933e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import re
from datetime import datetime
import psycopg2
import pandas as pd
from DbConnection import DbConnection
class SkillExtractorDetailsV1:


    def GetSkillData(skill_extractor, inputData):
        getdbskills = SkillExtractorDetailsV1.getSkills1()
        print(getdbskills)
        skills_list = []
        annotations = skill_extractor.annotate(inputData)
        matches = annotations['results']['full_matches']+annotations['results']['ngram_scored']
        skill_Typelst = []
        skill_Scorelst = []
        reuiredSkilllst = []
        softskillslst = []
        goodtohavelst = []
        ArchivedSkill=0
        for result in matches:
            ArchivedSkill=0
            skill_id = result['skill_id']
            skill_name1 = skill_extractor.skills_db[skill_id]['skill_name']
            skill_name = skill_name1.split("(")[0].strip()
            skill_type = skill_extractor.skills_db[skill_id]['skill_type']
            skill_score = round(result['score'],2) * 100
            #result = getdbskills[getdbskills['skilldetails'] == skill_name.upper()]
            #print('Result1 -'+ result)
            print('Skill - ' + skill_name.upper())
            for row in getdbskills:
                 if(row['skills'].upper() == skill_name.upper()):
                    ArchivedSkill=1
                    break
                 
            if(ArchivedSkill == 0 and skill_name not in skills_list):
                print('Weightage non skill-'+ skill_name)  
                print(skill_name)  
                print(skill_score)  
                print(skill_type)  
                skills_list.append(skill_name)    
                if(skill_score > 99 and skill_type != 'Soft Skill'):
                    reuiredSkilllst.append(skill_name)
                elif(skill_score > 50 and skill_type == 'Soft Skill'):
                    softskillslst.append(skill_name)
                elif(skill_score <= 50 and skill_type == 'Soft Skill'):
                    goodtohavelst.append(skill_name)
                elif(skill_score <= 99 and skill_type != 'Soft Skill'):
                    goodtohavelst.append(skill_name)

                # skills_list.append(skill_name)
                # skill_Typelst.append(skill_type)
                # skill_Scorelst.append(skill_score)
            else:
                print('Weightage zero skill-'+ skill_name)    
        # list_of_tuples = list(zip(skills_list, skill_Typelst,skill_Scorelst))        
        # df = pd.DataFrame(list_of_tuples,
        #           columns=['Skill_Name', 'Skill_Type','Skill_Score'])
        newSkilllst = SkillExtractorDetailsV1.extractWordsNew(inputData)
        reqSkill = ''
        if(len(newSkilllst)>0):
            reqSkill = ','.join(reuiredSkilllst) + ','  + ','.join(newSkilllst)
        else:
            reqSkill = ','.join(reuiredSkilllst)    

        data = {'Data':['Required Skills', 'Soft Skills', 'Good to have Skills'], 'Values':[reqSkill, ','.join(softskillslst), ','.join(goodtohavelst)]}
        df = pd.DataFrame(data)
        
        return df        

    def GetSkillDatafromDB():
        conn = DbConnection.GetMySQLDbConnection()
        
        query = "select skilldetails from skillmaster where weightage = 0"
        df = pd.read_sql_query(query, conn)
        return df
    
    def extractWords(job_description):
        job_roles = []
        job_description = job_description.replace(')',' ')
        delimiters = ",", " ", " , ", ";","\n","/","\\"
        regex_pattern = '|'.join(map(re.escape, delimiters))
        df = SkillExtractorDetailsV1.getNewSkills()
        data = re.split(regex_pattern, job_description)
        #data = job_description.split(',')
        for ds in data:
            #print(ds)
            try:
                
                if(SkillExtractorDetailsV1.skill_Validate(df,ds.strip())):                
                    job_roles.append(ds) 
                    #SkillExtractorDetails.GetSkillId(ds.strip(),JdMasterid,db_params)
                    print("Additional Skills : " + ds)   
            except Exception as error:
                test = 1
        return job_roles 
    def extractWordsNew(job_description):
        job_roles = []
        job_description = job_description.replace(')',' ')
        delimiters = ",", " ", " , ", ";","\n","/","\\"
        regex_pattern = '|'.join(map(re.escape, delimiters))
        df = SkillExtractorDetailsV1.getNewSkills1()
        data = re.split(regex_pattern, job_description)
        #data = job_description.split(',')
        for ds in data:
            #print(ds)
            try:
                for row in df:                 
                 if(row['skills'].upper() == ds.strip().upper()):
                    if(ds.strip() not in job_roles):                        
                        job_roles.append(ds.strip()) 
                        print("Additional Skills : " + ds)   
                
            except Exception as error:
                test = 1
        return job_roles

    def getNewSkills():
        query = "select skillid,skilldetails,skilltype,skill_score from skillmaster where weightage = -2"
        conn = DbConnection.GetMySQLDbConnection()
        cursor = conn.cursor()
        df_skill_master = pd.read_sql_query(query, conn)
        df_skill_master['skilldetails'] = df_skill_master['skilldetails'].str.upper()
        cursor.close()
        # Close the connection
        conn.close()

        #print(df_skill_master)
        return df_skill_master
    def getSkills1():
        conn = DbConnection.GetMySQLDbConnection()
        cursor = conn.cursor()
        cursor.execute("SELECT upper(skilldetails) skills FROM skillmaster where weightage = 0")
        cursor.close()
        # Close the connection
        conn.close()
        return cursor.fetchall()
        #print(df_skill_master)
    def getNewSkills1():
        conn = DbConnection.GetMySQLDbConnection()
        cursor = conn.cursor()
        cursor.execute("SELECT upper(skilldetails) skills FROM skillmaster where weightage = -2")
        cursor.close()
        # Close the connection
        conn.close()
        return cursor.fetchall()
        #print(df_skill_master)    
        
    def skill_Validate(df, skill):
        skill = skill.upper()
        if (len(skill.split()) < 2 and len(skill) < 3) or len(skill.split())==1:
            df['skill_present'] = df['skilldetails'].apply(lambda x: re.match(rf'^{skill}$', x))
            if any(df['skill_present']):
                #print("Valid Skill")
                return 1
            else:
                #print("Not a Skill")
                return 0
        elif df['skilldetails'].str.contains(skill.upper()).any():
            #print("Valid Skill")
            return 1
        else:
            # print("Not a Skill")
            return 0