Spaces:
Sleeping
Sleeping
import psycopg2 | |
import pandas as pd | |
from sentence_transformers import SentenceTransformer, util | |
class SkillMatch: | |
def SkillMatcher(model,db_params,jdID): | |
print("Checking Best Course for the JD...") | |
conn = psycopg2.connect(**db_params) | |
cursor_obj = conn.cursor() | |
query = "select * from JDDetailsAllSkill where jdmasterid = (%s)" | |
params = (jdID,) | |
cursor_obj.execute(query,params) | |
jd_data = cursor_obj.fetchall() | |
#connection_obj.commit() | |
print(jd_data) | |
query = "select * from CourseDetailsForMatching" | |
cursor_obj.execute(query) | |
cv_data = cursor_obj.fetchall() | |
print(cv_data) | |
#connection_obj.commit() | |
query = "select jdmasterid || '-' || courseid from courseskillmatch" | |
cursor_obj.execute(query) | |
match_data = cursor_obj.fetchall() | |
jd_skills = {} | |
for obj in jd_data: | |
if obj[0] not in jd_skills: | |
jd_skills[obj[0]] = [] | |
jd_skills[obj[0]].append(obj[1]) | |
cv_skills = {} | |
for obj in cv_data: | |
if obj[0] not in cv_skills: | |
cv_skills[obj[0]] = [] | |
cv_skills[obj[0]].append(obj[1]) | |
count = 0 | |
MatchSkillsId = 0 | |
isAlreadyInDb = False | |
TopScore = 0 | |
CourseId = 0 | |
MatchId ='' | |
for jd in jd_skills: | |
for cv in cv_skills: | |
#if(cv in match_data[1] and jd in match_data[0]): | |
#print("Already record : " + str(cv) + " , " + str(jd)) | |
isAlreadyInDb = False | |
match_details = str(jd) + "-" + str(cv) | |
print("Checking for existing Profile") | |
for i in match_data: | |
if(i[0] == match_details): | |
print( "Already in Database -----------" + i[0]) | |
isAlreadyInDb = True | |
break | |
if(isAlreadyInDb == True): | |
continue | |
#print(match_details) | |
print("Running Matching Algo") | |
count += 1 | |
sentence1 = " ".join(cv_skills[cv]) | |
sentence2 = " ".join(jd_skills[jd]) | |
embedding1 = model.encode(sentence1, convert_to_tensor=True) | |
embedding2 = model.encode(sentence2, convert_to_tensor=True) | |
# Compute cosine similarity between the two sentence embeddings | |
cosine_similarit = util.cos_sim(embedding1, embedding2) | |
if(TopScore < cosine_similarit * 100): | |
TopScore = cosine_similarit * 100 | |
CourseId = cv | |
print("DB Entry for Matching Results") | |
#common = set(cv_skills[cv]) & set(jd_skills[jd]) | |
if(1==1): | |
if(MatchSkillsId == 0): | |
query = "select coalesce(max(skillmatchid),0) + 1 from courseskillmatch" | |
cursor_obj.execute(query) | |
MatchId = cursor_obj.fetchall() | |
MatchSkillsId = SkillMatch.tuple_to_int( MatchId[0]) | |
if(1==1): | |
record = (MatchSkillsId, cv, jd, cosine_similarit[0][0].item(),1) | |
query = """INSERT INTO public.courseskillmatch(SkillMatchID, courseid, JDMasterID, MatchScore,isactive) VALUES (%s,%s,%s,%s,%s)""" | |
cursor_obj.execute(query, record) | |
conn.commit() | |
MatchSkillsId = MatchSkillsId + 1 | |
print( str( MatchSkillsId) + " "+"Updating in DB - JD {} CV {} ".format(jd, cv), cosine_similarit[0][0].item()) | |
#print(TopScore) | |
print(CourseId) | |
query = "select filename from coursemaster where masterid = " + str(CourseId) | |
df = pd.read_sql_query(query, conn) | |
try: | |
MatchId = df.iat[0,0] | |
except: | |
print(CourseId) | |
print("------------------------Beta Results - " + MatchId) | |
cursor_obj.close() | |
conn.close() | |
return str(MatchId) + ";" + str(TopScore[0][0].item()) + ";" + str(CourseId) | |
def tuple_to_int(tup): | |
if len(tup) == 1: | |
return tup[0] | |
else: | |
return tup[0] * (10 ** (len(tup) - 1)) + SkillMatch.tuple_to_int(tup[1:]) |