changes
Browse files- SkillExtract.py +72 -31
- app.py +1 -1
SkillExtract.py
CHANGED
@@ -77,7 +77,7 @@ class SkillExtractorDetails:
|
|
77 |
except Exception as error:
|
78 |
test = 1
|
79 |
return job_roles
|
80 |
-
def SkillExtract(db_params,skill_extractor,JdID):
|
81 |
print("Extracting Skills for the JD...")
|
82 |
# Connect to the PostgreSQL database
|
83 |
conn = psycopg2.connect(**db_params)
|
@@ -87,9 +87,12 @@ class SkillExtractorDetails:
|
|
87 |
# Retrieve "id" and "description" columns from the table
|
88 |
#query = sql.SQL("select jdmasterid,jobdescription from JDMaster where isskillsextracted in (0)")
|
89 |
query = "select jdmasterid,jobdescription,filename from JDMaster where isskillsextracted = 0 and jdmasterid ="+ jd_id
|
90 |
-
|
91 |
# Use Pandas to read the data into a DataFrame
|
92 |
-
|
|
|
|
|
|
|
93 |
|
94 |
# Print the DataFrame (for demonstration purposes)
|
95 |
#print(df)
|
@@ -103,18 +106,22 @@ class SkillExtractorDetails:
|
|
103 |
OldSkillCount = 0
|
104 |
NewSkillCount = 0
|
105 |
if(len(df.index) > 0):
|
106 |
-
print("Total
|
107 |
for index, row in df.iterrows():
|
108 |
# Access individual columns using column names
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
filename_jd = row['filename']
|
111 |
OldSkillCount = 0
|
112 |
NewSkillCount = 0
|
113 |
skill_score = 0.0
|
114 |
print("Extracting Skills For ", filename_jd + " , Id : " + str(id_value) + " , Index " + str(index + 1))
|
115 |
|
116 |
-
description_value = row['jobdescription']
|
117 |
-
#print(description_value)
|
118 |
|
119 |
annotations = skill_extractor.annotate(description_value)
|
120 |
matches = annotations['results']['full_matches']+annotations['results']['ngram_scored']
|
@@ -145,18 +152,33 @@ class SkillExtractorDetails:
|
|
145 |
#print("Skill Already in SkillMaster")
|
146 |
OldSkillCount = OldSkillCount + 1
|
147 |
isOld = "Yes"
|
148 |
-
|
|
|
|
|
|
|
149 |
params = (row_as_int[0],id_value,)
|
150 |
cursor.execute(query, params)
|
151 |
if cursor.rowcount > 0:
|
152 |
weightage = -1.0
|
153 |
#print("Skill Already in SkillMaster and JDSkillDetails")
|
154 |
else:
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
#print("Skill Already in SkillMaster and Inserted in JDSkillDetails")
|
161 |
#print(row_as_int)
|
162 |
else:
|
@@ -175,23 +197,37 @@ class SkillExtractorDetails:
|
|
175 |
generated_skill_id = cursor.fetchone()[0]
|
176 |
Skillid = generated_skill_id
|
177 |
jdMasterid = id_value
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
print("Skill Identified : ", skill_name)
|
182 |
#print("Skill inserted in SkillMaster and Inserted in JDSkillDetails")
|
183 |
-
|
184 |
-
|
|
|
|
|
|
|
|
|
|
|
185 |
|
186 |
params = (id_value,)
|
187 |
cursor.execute(query, params)
|
188 |
conn.commit()
|
189 |
print("Skills Updated for Skills Extraction for file ", filename_jd)
|
190 |
print("Total Skills : ", len(skills_list))
|
191 |
-
return SkillExtractorDetails.latestSkillDetails(id_value,db_params)
|
192 |
-
def latestSkillDetails(jid,db_params):
|
193 |
data = ""
|
194 |
-
data = SkillExtractorDetails.display_skills(jid,db_params)
|
195 |
# jid = df.iat[0,0]
|
196 |
return data
|
197 |
def tuple_to_int(tup):
|
@@ -201,7 +237,7 @@ class SkillExtractorDetails:
|
|
201 |
return tup[0] * (10 ** (len(tup) - 1)) + SkillExtractorDetails.tuple_to_int(tup[1:])
|
202 |
|
203 |
|
204 |
-
def skill_check(dbQuery,db_params):
|
205 |
conn = psycopg2.connect(**db_params)
|
206 |
cursor = conn.cursor()
|
207 |
df = pd.read_sql_query(dbQuery, conn)
|
@@ -213,19 +249,24 @@ class SkillExtractorDetails:
|
|
213 |
|
214 |
Required_Skills = Required_Skills[2:]
|
215 |
return Required_Skills
|
216 |
-
def display_skills(id, db_params):
|
217 |
jd=str(id)
|
218 |
-
|
219 |
-
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
-
query = "select skillname from
|
222 |
-
RequiredSkills_Soft = SkillExtractorDetails.skill_check(query,db_params)
|
223 |
|
224 |
-
query = "select skillname from
|
225 |
-
RequiredSkills_G1 = SkillExtractorDetails.skill_check(query,db_params)
|
226 |
|
227 |
-
query = "select skillname from
|
228 |
-
RequiredSkills_G2 = SkillExtractorDetails.skill_check(query,db_params)
|
229 |
|
230 |
print('')
|
231 |
print("Required Skills : " + RequiredSkills_Hard)
|
|
|
77 |
except Exception as error:
|
78 |
test = 1
|
79 |
return job_roles
|
80 |
+
def SkillExtract(db_params,skill_extractor,JdID, IsJD):
|
81 |
print("Extracting Skills for the JD...")
|
82 |
# Connect to the PostgreSQL database
|
83 |
conn = psycopg2.connect(**db_params)
|
|
|
87 |
# Retrieve "id" and "description" columns from the table
|
88 |
#query = sql.SQL("select jdmasterid,jobdescription from JDMaster where isskillsextracted in (0)")
|
89 |
query = "select jdmasterid,jobdescription,filename from JDMaster where isskillsextracted = 0 and jdmasterid ="+ jd_id
|
90 |
+
CVquery = "select masterid,description,filename from CourseMaster where isskillsextracted = 0 and masterid ="+ jd_id
|
91 |
# Use Pandas to read the data into a DataFrame
|
92 |
+
if(IsJD):
|
93 |
+
df = pd.read_sql_query(query, conn)
|
94 |
+
else:
|
95 |
+
df = pd.read_sql_query(query, conn)
|
96 |
|
97 |
# Print the DataFrame (for demonstration purposes)
|
98 |
#print(df)
|
|
|
106 |
OldSkillCount = 0
|
107 |
NewSkillCount = 0
|
108 |
if(len(df.index) > 0):
|
109 |
+
print("Total IDs for Extractraction : " + str(len(df.index)))
|
110 |
for index, row in df.iterrows():
|
111 |
# Access individual columns using column names
|
112 |
+
if(IsJD):
|
113 |
+
id_value = row['jdmasterid']
|
114 |
+
description_value = row['jobdescription']
|
115 |
+
else:
|
116 |
+
id_value = row['masterid']
|
117 |
+
description_value = row['description']
|
118 |
+
|
119 |
filename_jd = row['filename']
|
120 |
OldSkillCount = 0
|
121 |
NewSkillCount = 0
|
122 |
skill_score = 0.0
|
123 |
print("Extracting Skills For ", filename_jd + " , Id : " + str(id_value) + " , Index " + str(index + 1))
|
124 |
|
|
|
|
|
125 |
|
126 |
annotations = skill_extractor.annotate(description_value)
|
127 |
matches = annotations['results']['full_matches']+annotations['results']['ngram_scored']
|
|
|
152 |
#print("Skill Already in SkillMaster")
|
153 |
OldSkillCount = OldSkillCount + 1
|
154 |
isOld = "Yes"
|
155 |
+
if(IsJD):
|
156 |
+
query = "SELECT skillid FROM jdSkilldetails WHERE skillid IN (%s) and jdMasterid in (%s)"
|
157 |
+
else:
|
158 |
+
query = "SELECT skillid FROM CourseSkilldetails WHERE skillid IN (%s) and Masterid in (%s)"
|
159 |
params = (row_as_int[0],id_value,)
|
160 |
cursor.execute(query, params)
|
161 |
if cursor.rowcount > 0:
|
162 |
weightage = -1.0
|
163 |
#print("Skill Already in SkillMaster and JDSkillDetails")
|
164 |
else:
|
165 |
+
if(IsJD):
|
166 |
+
Skillid = row_as_int[0]
|
167 |
+
jdMasterid = id_value
|
168 |
+
insert_query = sql.SQL("""INSERT INTO jdSkilldetails (Skillid, jdMasterid) VALUES (%s, %s)""")
|
169 |
+
cursor.execute(insert_query, (Skillid, jdMasterid))
|
170 |
+
conn.commit()
|
171 |
+
else:
|
172 |
+
query = "Select max(skilldetailsid) from courseskilldetails"
|
173 |
+
df = pd.read_sql_query(query, conn)
|
174 |
+
CourseID = df.iat[0,0] + 1
|
175 |
+
|
176 |
+
Skillid = row_as_int[0]
|
177 |
+
jdMasterid = id_value
|
178 |
+
insert_query = sql.SQL("""INSERT INTO Courseskilldetails (skilldetailsid, Skillid, Masterid) VALUES (%s, %s, %s)""")
|
179 |
+
cursor.execute(insert_query, (CourseID, Skillid, jdMasterid))
|
180 |
+
conn.commit()
|
181 |
+
|
182 |
#print("Skill Already in SkillMaster and Inserted in JDSkillDetails")
|
183 |
#print(row_as_int)
|
184 |
else:
|
|
|
197 |
generated_skill_id = cursor.fetchone()[0]
|
198 |
Skillid = generated_skill_id
|
199 |
jdMasterid = id_value
|
200 |
+
if(IsJD):
|
201 |
+
insert_query = sql.SQL("""INSERT INTO jdSkilldetails (Skillid, jdMasterid) VALUES (%s, %s)""")
|
202 |
+
cursor.execute(insert_query, (Skillid, jdMasterid))
|
203 |
+
conn.commit()
|
204 |
+
else:
|
205 |
+
query = "Select max(skilldetailsid) from courseskilldetails"
|
206 |
+
df = pd.read_sql_query(query, conn)
|
207 |
+
CourseID = df.iat[0,0] + 1
|
208 |
+
|
209 |
+
insert_query = sql.SQL("""INSERT INTO CourseSkilldetails (skilldetailsid,Skillid, Masterid) VALUES (%s, %s, %s)""")
|
210 |
+
cursor.execute(insert_query, (CourseID,Skillid, jdMasterid))
|
211 |
+
conn.commit()
|
212 |
print("Skill Identified : ", skill_name)
|
213 |
#print("Skill inserted in SkillMaster and Inserted in JDSkillDetails")
|
214 |
+
|
215 |
+
|
216 |
+
if(IsJD):
|
217 |
+
SkillExtractorDetails.extractWords(description_value,id_value,db_params)
|
218 |
+
query = "update public.jdmaster set isskillsextracted = 1 where jdmasterid = (%s)"
|
219 |
+
else:
|
220 |
+
query = "update public.coursemaster set isskillsextracted = 1 where masterid = (%s)"
|
221 |
|
222 |
params = (id_value,)
|
223 |
cursor.execute(query, params)
|
224 |
conn.commit()
|
225 |
print("Skills Updated for Skills Extraction for file ", filename_jd)
|
226 |
print("Total Skills : ", len(skills_list))
|
227 |
+
return SkillExtractorDetails.latestSkillDetails(id_value,db_params,IsJD)
|
228 |
+
def latestSkillDetails(jid,db_params,IsJD):
|
229 |
data = ""
|
230 |
+
data = SkillExtractorDetails.display_skills(jid,db_params,IsJD)
|
231 |
# jid = df.iat[0,0]
|
232 |
return data
|
233 |
def tuple_to_int(tup):
|
|
|
237 |
return tup[0] * (10 ** (len(tup) - 1)) + SkillExtractorDetails.tuple_to_int(tup[1:])
|
238 |
|
239 |
|
240 |
+
def skill_check(dbQuery,db_params,IsJD):
|
241 |
conn = psycopg2.connect(**db_params)
|
242 |
cursor = conn.cursor()
|
243 |
df = pd.read_sql_query(dbQuery, conn)
|
|
|
249 |
|
250 |
Required_Skills = Required_Skills[2:]
|
251 |
return Required_Skills
|
252 |
+
def display_skills(id, db_params,IsJD):
|
253 |
jd=str(id)
|
254 |
+
tableName = 'CVSkillDetails'
|
255 |
+
if(IsJD):
|
256 |
+
tableName = 'SkillDetails'
|
257 |
+
|
258 |
+
|
259 |
+
query = "select skillname from " + tableName +" where id = "+ jd +" and skillscore > 99 and skilltype = 'Hard Skill'"
|
260 |
+
RequiredSkills_Hard = SkillExtractorDetails.skill_check(query,db_params,IsJD)
|
261 |
|
262 |
+
query = "select skillname from " + tableName +" where id = "+ jd +" and skillscore > 50 and skilltype = 'Soft Skill'"
|
263 |
+
RequiredSkills_Soft = SkillExtractorDetails.skill_check(query,db_params,IsJD)
|
264 |
|
265 |
+
query = "select skillname from " + tableName +" where id = "+ jd +" and skillscore < 50 and skilltype = 'Soft Skill'"
|
266 |
+
RequiredSkills_G1 = SkillExtractorDetails.skill_check(query,db_params,IsJD)
|
267 |
|
268 |
+
query = "select skillname from " + tableName +" where id = "+ jd +" and skillscore < 99 and skilltype = 'Hard Skill'"
|
269 |
+
RequiredSkills_G2 = SkillExtractorDetails.skill_check(query,db_params,IsJD)
|
270 |
|
271 |
print('')
|
272 |
print("Required Skills : " + RequiredSkills_Hard)
|
app.py
CHANGED
@@ -50,7 +50,7 @@ def parse_csv(df):
|
|
50 |
def UploadJobDescription(file: bytes = File(...), FileName: str = "sample.pdf"):
|
51 |
text= ExtractContentFromFile.ExtractDataFromFile(FileName,file)
|
52 |
returnID = UploadOpenFile.uploadFile(text,FileName,db_params,True)
|
53 |
-
returnSkills = SkillExtractorDetails.SkillExtract(db_params,skill_extractor,returnID)
|
54 |
details = returnSkills.split('@')
|
55 |
data = {'Data':['Required Skills', 'Soft Skills', 'Good to have Skills'], 'Values':[details[0], details[1], details[2]]}
|
56 |
df = pd.DataFrame(data)
|
|
|
50 |
def UploadJobDescription(file: bytes = File(...), FileName: str = "sample.pdf"):
|
51 |
text= ExtractContentFromFile.ExtractDataFromFile(FileName,file)
|
52 |
returnID = UploadOpenFile.uploadFile(text,FileName,db_params,True)
|
53 |
+
returnSkills = SkillExtractorDetails.SkillExtract(db_params,skill_extractor,returnID,True)
|
54 |
details = returnSkills.split('@')
|
55 |
data = {'Data':['Required Skills', 'Soft Skills', 'Good to have Skills'], 'Values':[details[0], details[1], details[2]]}
|
56 |
df = pd.DataFrame(data)
|