|
import psycopg2 |
|
import pandas as pd |
|
from sentence_transformers import SentenceTransformer, util |
|
class SkillMatch: |
|
def SkillMatcher(model,db_params,jdID): |
|
print("Checking Best Course for the JD...") |
|
conn = psycopg2.connect(**db_params) |
|
cursor_obj = conn.cursor() |
|
|
|
query = "select * from JDDetailsAllSkill where jdmasterid = (%s)" |
|
params = (jdID,) |
|
cursor_obj.execute(query,params) |
|
jd_data = cursor_obj.fetchall() |
|
|
|
print(jd_data) |
|
query = "select * from CourseDetailsForMatching" |
|
cursor_obj.execute(query) |
|
cv_data = cursor_obj.fetchall() |
|
print(cv_data) |
|
|
|
query = "select jdmasterid || '-' || courseid from courseskillmatch" |
|
cursor_obj.execute(query) |
|
match_data = cursor_obj.fetchall() |
|
|
|
jd_skills = {} |
|
for obj in jd_data: |
|
if obj[0] not in jd_skills: |
|
jd_skills[obj[0]] = [] |
|
|
|
jd_skills[obj[0]].append(obj[1]) |
|
|
|
cv_skills = {} |
|
for obj in cv_data: |
|
if obj[0] not in cv_skills: |
|
cv_skills[obj[0]] = [] |
|
|
|
cv_skills[obj[0]].append(obj[1]) |
|
|
|
|
|
count = 0 |
|
MatchSkillsId = 0 |
|
isAlreadyInDb = False |
|
TopScore = 0 |
|
CourseId = 0 |
|
MatchId ='' |
|
for jd in jd_skills: |
|
for cv in cv_skills: |
|
|
|
|
|
isAlreadyInDb = False |
|
match_details = str(jd) + "-" + str(cv) |
|
print("Checking for existing Profile") |
|
for i in match_data: |
|
if(i[0] == match_details): |
|
print( "Already in Database -----------" + i[0]) |
|
isAlreadyInDb = True |
|
break |
|
|
|
if(isAlreadyInDb == True): |
|
continue |
|
|
|
print("Running Matching Algo") |
|
count += 1 |
|
sentence1 = " ".join(cv_skills[cv]) |
|
sentence2 = " ".join(jd_skills[jd]) |
|
embedding1 = model.encode(sentence1, convert_to_tensor=True) |
|
embedding2 = model.encode(sentence2, convert_to_tensor=True) |
|
|
|
|
|
cosine_similarit = util.cos_sim(embedding1, embedding2) |
|
if(TopScore < cosine_similarit * 100): |
|
TopScore = cosine_similarit * 100 |
|
CourseId = cv |
|
|
|
print("DB Entry for Matching Results") |
|
|
|
if(1==1): |
|
if(MatchSkillsId == 0): |
|
query = "select coalesce(max(skillmatchid),0) + 1 from courseskillmatch" |
|
cursor_obj.execute(query) |
|
MatchId = cursor_obj.fetchall() |
|
MatchSkillsId = SkillMatch.tuple_to_int( MatchId[0]) |
|
|
|
|
|
|
|
if(1==1): |
|
record = (MatchSkillsId, cv, jd, cosine_similarit[0][0].item(),1) |
|
query = """INSERT INTO public.courseskillmatch(SkillMatchID, courseid, JDMasterID, MatchScore,isactive) VALUES (%s,%s,%s,%s,%s)""" |
|
cursor_obj.execute(query, record) |
|
conn.commit() |
|
MatchSkillsId = MatchSkillsId + 1 |
|
print( str( MatchSkillsId) + " "+"Updating in DB - JD {} CV {} ".format(jd, cv), cosine_similarit[0][0].item()) |
|
|
|
print(CourseId) |
|
query = "select filename from coursemaster where masterid = " + str(CourseId) |
|
df = pd.read_sql_query(query, conn) |
|
try: |
|
MatchId = df.iat[0,0] |
|
except: |
|
print(CourseId) |
|
print("------------------------Beta Results - " + MatchId) |
|
cursor_obj.close() |
|
conn.close() |
|
return str(MatchId) + ";" + str(TopScore[0][0].item()) + ";" + str(CourseId) |
|
|
|
def tuple_to_int(tup): |
|
if len(tup) == 1: |
|
return tup[0] |
|
else: |
|
return tup[0] * (10 ** (len(tup) - 1)) + SkillMatch.tuple_to_int(tup[1:]) |