File size: 4,616 Bytes
a13e09c
 
163e27f
 
a13e09c
 
 
163e27f
 
c2f2c32
a13e09c
 
 
af7a4a8
 
247bb4a
 
 
a13e09c
 
 
 
 
c579528
c2f2c32
4a9632c
5bb0bd3
 
247bb4a
 
 
 
 
 
 
 
 
 
 
 
4a9632c
 
247bb4a
 
 
8f933e9
 
 
 
247bb4a
 
4a9632c
a13e09c
163e27f
 
c2f2c32
 
 
 
8f933e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a13e09c
8f933e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import re
from datetime import datetime
import psycopg2
import pandas as pd
class SkillExtractorDetailsV1:


    def GetSkillData(skill_extractor, inputData, db_params):
        getdbskills = SkillExtractorDetailsV1.GetSkillDatafromDB(db_params)
        
        skills_list = []
        annotations = skill_extractor.annotate(inputData)
        matches = annotations['results']['full_matches']+annotations['results']['ngram_scored']
        skill_Typelst = []
        skill_Scorelst = []
        reuiredSkilllst = []
        softskillslst = []
        goodtohavelst = []
        for result in matches:
            skill_id = result['skill_id']
            skill_name1 = skill_extractor.skills_db[skill_id]['skill_name']
            skill_name = skill_name1.split("(")[0].strip()
            skill_type = skill_extractor.skills_db[skill_id]['skill_type']
            skill_score = round(result['score'],2) * 100
            result = getdbskills[getdbskills['skill'] == skill_name.upper()]
            
            if(result.empty and skill_name not in skills_list):
                skills_list.append(skill_name)    
                if(skill_score > 99 and skill_type != 'Soft  Skill'):
                    reuiredSkilllst.append(skill_name)
                elif(skill_score > 50 and skill_type == 'Soft  Skill'):
                    softskillslst.append(skill_name)
                elif(skill_score <= 50 and skill_type == 'Soft  Skill'):
                    goodtohavelst.append(skill_name)
                elif(skill_score <= 99 and skill_type != 'Soft  Skill'):
                    goodtohavelst.append(skill_name)

                # skills_list.append(skill_name)
                # skill_Typelst.append(skill_type)
                # skill_Scorelst.append(skill_score)
            else:
                print('----'+ result)    
        # list_of_tuples = list(zip(skills_list, skill_Typelst,skill_Scorelst))        
        # df = pd.DataFrame(list_of_tuples,
        #           columns=['Skill_Name', 'Skill_Type','Skill_Score'])
        newSkilllst = SkillExtractorDetailsV1.extractWords(inputData, db_params)
        reqSkill = ','.join(reuiredSkilllst) + ','.join(newSkilllst)

        data = {'Data':['Required Skills', 'Soft Skills', 'Good to have Skills'], 'Values':[reqSkill, ','.join(softskillslst), ','.join(goodtohavelst)]}
        df = pd.DataFrame(data)
        
        return df        

    def GetSkillDatafromDB(db_params):
        conn = psycopg2.connect(**db_params)
        
        query = "select upper(skilldetails) skill from skillmaster where weightage = 0"
        df = pd.read_sql_query(query, conn)
        return df
    
    def extractWords(job_description,db_params):
        job_roles = []
        job_description = job_description.replace(')',' ')
        delimiters = ",", " ", " , ", ";","\n","/","\\"
        regex_pattern = '|'.join(map(re.escape, delimiters))
        df = SkillExtractorDetailsV1.getNewSkills(db_params)
        data = re.split(regex_pattern, job_description)
        #data = job_description.split(',')
        for ds in data:
            #print(ds)
            try:
                if(SkillExtractorDetailsV1.skill_Validate(df,ds.strip())):                
                    job_roles.append(ds) 
                    #SkillExtractorDetails.GetSkillId(ds.strip(),JdMasterid,db_params)
                    print("Skills Identified* : " + ds)   
            except Exception as error:
                test = 1
        return job_roles 

    def getNewSkills(db_params):
        query = "select skillid,skilldetails,skilltype,skill_score from skillmaster where weightage = -2"
        conn = psycopg2.connect(**db_params)
        cursor = conn.cursor()
        df_skill_master = pd.read_sql_query(query, conn)
        df_skill_master['skilldetails'] = df_skill_master['skilldetails'].str.upper()
        cursor.close()
        # Close the connection
        conn.close()

        #print(df_skill_master)
        return df_skill_master
    def skill_Validate(df, skill):
        skill = skill.upper()
        if (len(skill.split()) < 2 and len(skill) < 3) or len(skill.split())==1:
            df['skill_present'] = df['skilldetails'].apply(lambda x: re.match(rf'^{skill}$', x))
            if any(df['skill_present']):
                #print("Valid Skill")
                return 1
            else:
                #print("Not a Skill")
                return 0
        elif df['skilldetails'].str.contains(skill.upper()).any():
            #print("Valid Skill")
            return 1
        else:
            # print("Not a Skill")
            return 0