import psycopg2 import pandas as pd from sentence_transformers import SentenceTransformer, util class SkillMatch: def SkillMatcher(model,db_params,jdID): print("Checking Best Course for the JD...") conn = psycopg2.connect(**db_params) cursor_obj = conn.cursor() query = "select * from JDDetailsAllSkill where jdmasterid = (%s)" params = (jdID,) cursor_obj.execute(query,params) jd_data = cursor_obj.fetchall() #connection_obj.commit() print(jd_data) query = "select * from CourseDetailsForMatching" cursor_obj.execute(query) cv_data = cursor_obj.fetchall() print(cv_data) #connection_obj.commit() query = "select jdmasterid || '-' || courseid from courseskillmatch" cursor_obj.execute(query) match_data = cursor_obj.fetchall() jd_skills = {} for obj in jd_data: if obj[0] not in jd_skills: jd_skills[obj[0]] = [] jd_skills[obj[0]].append(obj[1]) cv_skills = {} for obj in cv_data: if obj[0] not in cv_skills: cv_skills[obj[0]] = [] cv_skills[obj[0]].append(obj[1]) count = 0 MatchSkillsId = 0 isAlreadyInDb = False TopScore = 0 CourseId = 0 MatchId ='' for jd in jd_skills: for cv in cv_skills: #if(cv in match_data[1] and jd in match_data[0]): #print("Already record : " + str(cv) + " , " + str(jd)) isAlreadyInDb = False match_details = str(jd) + "-" + str(cv) print("Checking for existing Profile") for i in match_data: if(i[0] == match_details): print( "Already in Database -----------" + i[0]) isAlreadyInDb = True break if(isAlreadyInDb == True): continue #print(match_details) print("Running Matching Algo") count += 1 sentence1 = " ".join(cv_skills[cv]) sentence2 = " ".join(jd_skills[jd]) embedding1 = model.encode(sentence1, convert_to_tensor=True) embedding2 = model.encode(sentence2, convert_to_tensor=True) # Compute cosine similarity between the two sentence embeddings cosine_similarit = util.cos_sim(embedding1, embedding2) if(TopScore < cosine_similarit * 100): TopScore = cosine_similarit * 100 CourseId = cv print("DB Entry for Matching Results") #common = set(cv_skills[cv]) & set(jd_skills[jd]) if(1==1): if(MatchSkillsId == 0): query = "select coalesce(max(skillmatchid),0) + 1 from courseskillmatch" cursor_obj.execute(query) MatchId = cursor_obj.fetchall() MatchSkillsId = SkillMatch.tuple_to_int( MatchId[0]) if(1==1): record = (MatchSkillsId, cv, jd, cosine_similarit[0][0].item(),1) query = """INSERT INTO public.courseskillmatch(SkillMatchID, courseid, JDMasterID, MatchScore,isactive) VALUES (%s,%s,%s,%s,%s)""" cursor_obj.execute(query, record) conn.commit() MatchSkillsId = MatchSkillsId + 1 print( str( MatchSkillsId) + " "+"Updating in DB - JD {} CV {} ".format(jd, cv), cosine_similarit[0][0].item()) #print(TopScore) print(CourseId) query = "select filename from coursemaster where masterid = " + str(CourseId) df = pd.read_sql_query(query, conn) try: MatchId = df.iat[0,0] except: print(CourseId) print("------------------------Beta Results - " + MatchId) cursor_obj.close() conn.close() return str(MatchId) + ";" + str(TopScore[0][0].item()) + ";" + str(CourseId) def tuple_to_int(tup): if len(tup) == 1: return tup[0] else: return tup[0] * (10 ** (len(tup) - 1)) + SkillMatch.tuple_to_int(tup[1:])