File size: 4,475 Bytes
ce5ba9e
 
 
 
 
 
 
 
 
db967a4
 
 
ce5ba9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc93d74
ce5ba9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296fd89
ce5ba9e
 
 
 
 
37c0254
ce5ba9e
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import psycopg2
import pandas as pd
from sentence_transformers import SentenceTransformer, util
class SkillMatch:
    def SkillMatcher(model,db_params,jdID):
        print("Checking Best Course for the JD...")  
        conn = psycopg2.connect(**db_params)
        cursor_obj = conn.cursor()

        query = "select * from JDDetailsAllSkill where jdmasterid = (%s)"
        params = (jdID,)
        cursor_obj.execute(query,params)
        jd_data = cursor_obj.fetchall()
        #connection_obj.commit()
        print(jd_data)
        query = "select * from CourseDetailsForMatching"
        cursor_obj.execute(query)
        cv_data = cursor_obj.fetchall()
        print(cv_data)
        #connection_obj.commit()
        query = "select jdmasterid || '-' || courseid from courseskillmatch"
        cursor_obj.execute(query)
        match_data = cursor_obj.fetchall()

        jd_skills = {}
        for obj in jd_data:
            if obj[0] not in jd_skills:
                jd_skills[obj[0]] = []

            jd_skills[obj[0]].append(obj[1])

        cv_skills = {}
        for obj in cv_data:
            if obj[0] not in cv_skills:
                cv_skills[obj[0]] = []
            
            cv_skills[obj[0]].append(obj[1])

        
        count = 0
        MatchSkillsId = 0
        isAlreadyInDb = False
        TopScore = 0
        CourseId = 0
        MatchId =''
        for jd in jd_skills:
            for cv in cv_skills:
            #if(cv in match_data[1] and jd in match_data[0]):
            #print("Already record : " + str(cv) + " , "  + str(jd))
                isAlreadyInDb = False
                match_details = str(jd) + "-" + str(cv)
                print("Checking for existing Profile")
                for i in match_data:
                    if(i[0] == match_details):
                        print( "Already in Database -----------"  + i[0])
                        isAlreadyInDb = True
                        break
                
                if(isAlreadyInDb == True):
                    continue
                #print(match_details)  
                print("Running Matching Algo")
                count += 1
                sentence1 = " ".join(cv_skills[cv])
                sentence2 = " ".join(jd_skills[jd])
                embedding1 = model.encode(sentence1, convert_to_tensor=True)
                embedding2 = model.encode(sentence2, convert_to_tensor=True)

                # Compute cosine similarity between the two sentence embeddings
                cosine_similarit = util.cos_sim(embedding1, embedding2)
                if(TopScore < cosine_similarit * 100):
                    TopScore = cosine_similarit * 100
                    CourseId = cv
                
                print("DB Entry for Matching Results")  
                #common = set(cv_skills[cv]) & set(jd_skills[jd])
                if(1==1):
                    if(MatchSkillsId == 0):
                        query = "select coalesce(max(skillmatchid),0) + 1 from courseskillmatch"
                        cursor_obj.execute(query)
                        MatchId = cursor_obj.fetchall()
                        MatchSkillsId = SkillMatch.tuple_to_int( MatchId[0])
                

                
                if(1==1):  
                    record = (MatchSkillsId, cv, jd, cosine_similarit[0][0].item(),1)
                    query = """INSERT INTO public.courseskillmatch(SkillMatchID, courseid, JDMasterID, MatchScore,isactive) VALUES (%s,%s,%s,%s,%s)"""
                    cursor_obj.execute(query, record)
                    conn.commit()
                    MatchSkillsId = MatchSkillsId + 1
                print( str( MatchSkillsId)  + " "+"Updating in DB - JD {} CV {} ".format(jd, cv), cosine_similarit[0][0].item())
                #print(TopScore)
            print(CourseId)
            query = "select filename from coursemaster where masterid = " + str(CourseId)
            df = pd.read_sql_query(query, conn)
            try:
                MatchId = df.iat[0,0]
            except:
                print(CourseId)    
        print("------------------------Beta Results  - " + MatchId)
        cursor_obj.close()
        conn.close()  
        return str(MatchId) + ";" + str(TopScore[0][0].item()) + ";" + str(CourseId)
        
    def tuple_to_int(tup):
        if len(tup) == 1:
            return tup[0]
        else:
            return tup[0] * (10 ** (len(tup) - 1)) + SkillMatch.tuple_to_int(tup[1:])