File size: 4,475 Bytes
ce5ba9e db967a4 ce5ba9e dc93d74 ce5ba9e 296fd89 ce5ba9e 37c0254 ce5ba9e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import psycopg2
import pandas as pd
from sentence_transformers import SentenceTransformer, util
class SkillMatch:
def SkillMatcher(model,db_params,jdID):
print("Checking Best Course for the JD...")
conn = psycopg2.connect(**db_params)
cursor_obj = conn.cursor()
query = "select * from JDDetailsAllSkill where jdmasterid = (%s)"
params = (jdID,)
cursor_obj.execute(query,params)
jd_data = cursor_obj.fetchall()
#connection_obj.commit()
print(jd_data)
query = "select * from CourseDetailsForMatching"
cursor_obj.execute(query)
cv_data = cursor_obj.fetchall()
print(cv_data)
#connection_obj.commit()
query = "select jdmasterid || '-' || courseid from courseskillmatch"
cursor_obj.execute(query)
match_data = cursor_obj.fetchall()
jd_skills = {}
for obj in jd_data:
if obj[0] not in jd_skills:
jd_skills[obj[0]] = []
jd_skills[obj[0]].append(obj[1])
cv_skills = {}
for obj in cv_data:
if obj[0] not in cv_skills:
cv_skills[obj[0]] = []
cv_skills[obj[0]].append(obj[1])
count = 0
MatchSkillsId = 0
isAlreadyInDb = False
TopScore = 0
CourseId = 0
MatchId =''
for jd in jd_skills:
for cv in cv_skills:
#if(cv in match_data[1] and jd in match_data[0]):
#print("Already record : " + str(cv) + " , " + str(jd))
isAlreadyInDb = False
match_details = str(jd) + "-" + str(cv)
print("Checking for existing Profile")
for i in match_data:
if(i[0] == match_details):
print( "Already in Database -----------" + i[0])
isAlreadyInDb = True
break
if(isAlreadyInDb == True):
continue
#print(match_details)
print("Running Matching Algo")
count += 1
sentence1 = " ".join(cv_skills[cv])
sentence2 = " ".join(jd_skills[jd])
embedding1 = model.encode(sentence1, convert_to_tensor=True)
embedding2 = model.encode(sentence2, convert_to_tensor=True)
# Compute cosine similarity between the two sentence embeddings
cosine_similarit = util.cos_sim(embedding1, embedding2)
if(TopScore < cosine_similarit * 100):
TopScore = cosine_similarit * 100
CourseId = cv
print("DB Entry for Matching Results")
#common = set(cv_skills[cv]) & set(jd_skills[jd])
if(1==1):
if(MatchSkillsId == 0):
query = "select coalesce(max(skillmatchid),0) + 1 from courseskillmatch"
cursor_obj.execute(query)
MatchId = cursor_obj.fetchall()
MatchSkillsId = SkillMatch.tuple_to_int( MatchId[0])
if(1==1):
record = (MatchSkillsId, cv, jd, cosine_similarit[0][0].item(),1)
query = """INSERT INTO public.courseskillmatch(SkillMatchID, courseid, JDMasterID, MatchScore,isactive) VALUES (%s,%s,%s,%s,%s)"""
cursor_obj.execute(query, record)
conn.commit()
MatchSkillsId = MatchSkillsId + 1
print( str( MatchSkillsId) + " "+"Updating in DB - JD {} CV {} ".format(jd, cv), cosine_similarit[0][0].item())
#print(TopScore)
print(CourseId)
query = "select filename from coursemaster where masterid = " + str(CourseId)
df = pd.read_sql_query(query, conn)
try:
MatchId = df.iat[0,0]
except:
print(CourseId)
print("------------------------Beta Results - " + MatchId)
cursor_obj.close()
conn.close()
return str(MatchId) + ";" + str(TopScore[0][0].item()) + ";" + str(CourseId)
def tuple_to_int(tup):
if len(tup) == 1:
return tup[0]
else:
return tup[0] * (10 ** (len(tup) - 1)) + SkillMatch.tuple_to_int(tup[1:]) |