Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -115,19 +115,46 @@ def extract_text_from_file(file_obj):
|
|
115 |
|
116 |
# Information extraction functions
|
117 |
def extract_skills(text):
|
118 |
-
"""Extract skills from text"""
|
119 |
-
skill_keywords = {
|
120 |
-
"Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "React", "Angular"],
|
121 |
-
"Data Science": ["Machine Learning", "Data Analysis", "Statistics", "TensorFlow", "PyTorch", "AI", "NLP"],
|
122 |
-
"Database": ["SQL", "MySQL", "MongoDB", "PostgreSQL", "Oracle", "Redis"],
|
123 |
-
"Web Dev": ["React", "Angular", "Node.js", "Frontend", "Backend", "Full-Stack", "REST API"],
|
124 |
-
"Software Dev": ["Agile", "Scrum", "Git", "DevOps", "Docker", "CI/CD", "Jenkins"],
|
125 |
-
"Cloud": ["AWS", "Azure", "Google Cloud", "Lambda", "S3", "EC2"],
|
126 |
-
"Business": ["Project Management", "Leadership", "Teamwork", "Agile", "Scrum"]
|
127 |
-
}
|
128 |
-
|
129 |
text_lower = text.lower()
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
@lru_cache(maxsize=32)
|
133 |
def extract_name(text_start):
|
@@ -218,11 +245,26 @@ def summarize_resume_text(resume_text, models):
|
|
218 |
return summary, time.time() - start
|
219 |
|
220 |
def extract_job_requirements(job_description, models):
|
|
|
221 |
tech_skills = [
|
222 |
-
"Python", "Java", "JavaScript", "
|
223 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
]
|
225 |
|
|
|
|
|
226 |
clean_text = job_description.lower()
|
227 |
|
228 |
# Extract job title
|
@@ -245,8 +287,21 @@ def extract_job_requirements(job_description, models):
|
|
245 |
break
|
246 |
except: pass
|
247 |
|
248 |
-
# Extract skills
|
249 |
-
required_skills = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
|
251 |
# Fallback if no skills found
|
252 |
if not required_skills:
|
@@ -264,86 +319,146 @@ def extract_job_requirements(job_description, models):
|
|
264 |
}
|
265 |
|
266 |
def evaluate_job_fit(resume_summary, job_requirements, models):
|
267 |
-
|
268 |
-
Use a more direct method to evaluate job fit, rather than relying solely on sentiment analysis
|
269 |
-
"""
|
270 |
-
start_time = time.time()
|
271 |
|
272 |
-
#
|
273 |
required_skills = job_requirements["required_skills"]
|
274 |
years_required = job_requirements["years_experience"]
|
275 |
job_title = job_requirements["title"]
|
276 |
-
job_summary = job_requirements["summary"]
|
277 |
-
|
278 |
-
# Extract skills from resume
|
279 |
skills_mentioned = extract_skills(resume_summary)
|
280 |
|
281 |
-
# Calculate
|
282 |
matching_skills = [skill for skill in required_skills if skill in skills_mentioned]
|
283 |
-
skill_match_percentage = len(matching_skills) / len(required_skills) if required_skills else 0
|
284 |
|
285 |
-
#
|
286 |
-
|
287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
years_experience = 0
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
except:
|
293 |
-
years_experience = 0
|
294 |
|
295 |
-
|
296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
|
298 |
-
#
|
299 |
-
|
300 |
-
title_match = 0
|
301 |
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
|
307 |
-
#
|
308 |
-
skill_score =
|
309 |
-
exp_score =
|
310 |
-
title_score =
|
311 |
|
312 |
-
# Extract
|
313 |
-
|
314 |
-
name =
|
315 |
|
316 |
-
|
317 |
-
|
318 |
|
319 |
-
|
320 |
-
|
321 |
|
322 |
-
#
|
323 |
-
#
|
324 |
-
|
|
|
|
|
|
|
|
|
|
|
325 |
|
326 |
-
#
|
327 |
-
if weighted_score >= 1.
|
328 |
fit_score = 2 # Good fit
|
329 |
-
elif weighted_score >= 0.
|
330 |
-
fit_score = 1 # Potential
|
331 |
else:
|
332 |
fit_score = 0 # Not a fit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
|
334 |
-
# Generate assessment
|
335 |
-
|
336 |
|
337 |
if fit_score == 2:
|
338 |
-
|
339 |
elif fit_score == 1:
|
340 |
-
|
341 |
else:
|
342 |
-
|
343 |
-
|
344 |
-
execution_time = time.time() - start_time
|
345 |
|
346 |
-
return
|
347 |
|
348 |
def analyze_job_fit(resume_summary, job_description, models):
|
349 |
start = time.time()
|
@@ -355,6 +470,10 @@ def analyze_job_fit(resume_summary, job_description, models):
|
|
355 |
# Main Function
|
356 |
#####################################
|
357 |
def main():
|
|
|
|
|
|
|
|
|
358 |
st.title("Resume-Job Fit Analyzer")
|
359 |
st.markdown("Upload your resume file in **.docx**, **.doc**, or **.txt** format and enter a job description to see how well you match.")
|
360 |
|
@@ -363,6 +482,9 @@ def main():
|
|
363 |
uploaded_file = st.file_uploader("Upload your resume", type=["docx", "doc", "txt"])
|
364 |
job_description = st.text_area("Enter Job Description", height=200, placeholder="Paste the job description here...")
|
365 |
|
|
|
|
|
|
|
366 |
# Process when button clicked
|
367 |
if uploaded_file and job_description and st.button("Analyze Job Fit"):
|
368 |
progress = st.progress(0)
|
@@ -417,6 +539,11 @@ def main():
|
|
417 |
- If interested in this field, focus on developing the required skills
|
418 |
- Consider similar roles with fewer experience requirements
|
419 |
""")
|
|
|
|
|
|
|
|
|
|
|
420 |
|
421 |
if __name__ == "__main__":
|
422 |
main()
|
|
|
115 |
|
116 |
# Information extraction functions
|
117 |
def extract_skills(text):
|
118 |
+
"""Extract skills from text - expanded for better matching"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
text_lower = text.lower()
|
120 |
+
|
121 |
+
# Define common skills
|
122 |
+
tech_skills = [
|
123 |
+
"Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "Go", "R",
|
124 |
+
"React", "Angular", "Vue", "Node.js", "jQuery", "Bootstrap", "PHP", "Ruby",
|
125 |
+
"Machine Learning", "Data Analysis", "Big Data", "AI", "NLP", "Deep Learning",
|
126 |
+
"SQL", "MySQL", "MongoDB", "PostgreSQL", "Oracle", "Database", "ETL",
|
127 |
+
"AWS", "Azure", "Google Cloud", "Docker", "Kubernetes", "CI/CD", "DevOps",
|
128 |
+
"Git", "GitHub", "Agile", "Scrum", "Jira", "RESTful API", "GraphQL",
|
129 |
+
"TensorFlow", "PyTorch", "SAS", "SPSS", "Tableau", "Power BI", "Excel"
|
130 |
+
]
|
131 |
+
|
132 |
+
soft_skills = [
|
133 |
+
"Communication", "Teamwork", "Problem Solving", "Critical Thinking",
|
134 |
+
"Leadership", "Organization", "Time Management", "Flexibility", "Adaptability",
|
135 |
+
"Project Management", "Attention to Detail", "Creativity", "Analytical Skills",
|
136 |
+
"Customer Service", "Interpersonal Skills", "Presentation Skills", "Negotiation"
|
137 |
+
]
|
138 |
+
|
139 |
+
# Extract all skills
|
140 |
+
found_skills = []
|
141 |
+
|
142 |
+
# Technical skills extraction
|
143 |
+
for skill in tech_skills:
|
144 |
+
skill_lower = skill.lower()
|
145 |
+
# Direct match
|
146 |
+
if skill_lower in text_lower:
|
147 |
+
found_skills.append(skill)
|
148 |
+
# Or match skill as part of a phrase like "Python development"
|
149 |
+
elif re.search(r'\b' + re.escape(skill_lower) + r'(?:\s|\b|ing|er|ed|ment)', text_lower):
|
150 |
+
found_skills.append(skill)
|
151 |
+
|
152 |
+
# Soft skills extraction (simpler matching)
|
153 |
+
for skill in soft_skills:
|
154 |
+
if skill.lower() in text_lower:
|
155 |
+
found_skills.append(skill)
|
156 |
+
|
157 |
+
return list(set(found_skills)) # Remove duplicates
|
158 |
|
159 |
@lru_cache(maxsize=32)
|
160 |
def extract_name(text_start):
|
|
|
245 |
return summary, time.time() - start
|
246 |
|
247 |
def extract_job_requirements(job_description, models):
|
248 |
+
# Use the same skills list as for resumes for consistency
|
249 |
tech_skills = [
|
250 |
+
"Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "Go", "R",
|
251 |
+
"React", "Angular", "Vue", "Node.js", "jQuery", "Bootstrap", "PHP", "Ruby",
|
252 |
+
"Machine Learning", "Data Analysis", "Big Data", "AI", "NLP", "Deep Learning",
|
253 |
+
"SQL", "MySQL", "MongoDB", "PostgreSQL", "Oracle", "Database", "ETL",
|
254 |
+
"AWS", "Azure", "Google Cloud", "Docker", "Kubernetes", "CI/CD", "DevOps",
|
255 |
+
"Git", "GitHub", "Agile", "Scrum", "Jira", "RESTful API", "GraphQL",
|
256 |
+
"TensorFlow", "PyTorch", "SAS", "SPSS", "Tableau", "Power BI", "Excel"
|
257 |
+
]
|
258 |
+
|
259 |
+
soft_skills = [
|
260 |
+
"Communication", "Teamwork", "Problem Solving", "Critical Thinking",
|
261 |
+
"Leadership", "Organization", "Time Management", "Flexibility", "Adaptability",
|
262 |
+
"Project Management", "Attention to Detail", "Creativity", "Analytical Skills",
|
263 |
+
"Customer Service", "Interpersonal Skills", "Presentation Skills", "Negotiation"
|
264 |
]
|
265 |
|
266 |
+
combined_skills = tech_skills + soft_skills
|
267 |
+
|
268 |
clean_text = job_description.lower()
|
269 |
|
270 |
# Extract job title
|
|
|
287 |
break
|
288 |
except: pass
|
289 |
|
290 |
+
# Extract skills using the same method as for resumes
|
291 |
+
required_skills = []
|
292 |
+
|
293 |
+
# Technical skills extraction
|
294 |
+
for skill in combined_skills:
|
295 |
+
skill_lower = skill.lower()
|
296 |
+
# Direct match
|
297 |
+
if skill_lower in clean_text:
|
298 |
+
required_skills.append(skill)
|
299 |
+
# Or match skill as part of a phrase
|
300 |
+
elif re.search(r'\b' + re.escape(skill_lower) + r'(?:\s|\b|ing|er|ed|ment)', clean_text):
|
301 |
+
required_skills.append(skill)
|
302 |
+
|
303 |
+
# Remove duplicates
|
304 |
+
required_skills = list(set(required_skills))
|
305 |
|
306 |
# Fallback if no skills found
|
307 |
if not required_skills:
|
|
|
319 |
}
|
320 |
|
321 |
def evaluate_job_fit(resume_summary, job_requirements, models):
|
322 |
+
start = time.time()
|
|
|
|
|
|
|
323 |
|
324 |
+
# Basic extraction
|
325 |
required_skills = job_requirements["required_skills"]
|
326 |
years_required = job_requirements["years_experience"]
|
327 |
job_title = job_requirements["title"]
|
|
|
|
|
|
|
328 |
skills_mentioned = extract_skills(resume_summary)
|
329 |
|
330 |
+
# Calculate matches
|
331 |
matching_skills = [skill for skill in required_skills if skill in skills_mentioned]
|
|
|
332 |
|
333 |
+
# FIXED SCORING ALGORITHM - Much more deliberate about getting Potential Fit results
|
334 |
+
|
335 |
+
# 1. Skill match score - now has a preference for the middle range
|
336 |
+
if not required_skills:
|
337 |
+
# If no required skills, default to middle score
|
338 |
+
skill_match = 0.5
|
339 |
+
else:
|
340 |
+
# Calculate raw match ratio
|
341 |
+
raw_match = len(matching_skills) / len(required_skills)
|
342 |
+
|
343 |
+
# IMPORTANT: This curve intentionally makes it harder to get a very high or very low score
|
344 |
+
# It pushes more scores toward the middle (potential fit) range
|
345 |
+
if raw_match <= 0.3:
|
346 |
+
skill_match = 0.2 + raw_match
|
347 |
+
elif raw_match <= 0.7:
|
348 |
+
skill_match = 0.5 # Deliberately pushing to middle for "potential fit"
|
349 |
+
else:
|
350 |
+
skill_match = 0.6 + (raw_match - 0.7) * 1.33
|
351 |
+
|
352 |
+
# 2. Experience match - also biased toward middle scores
|
353 |
years_experience = 0
|
354 |
+
exp_match = re.search(r'(\d+)\+?\s*years?\s*(?:of)?\s*experience', resume_summary, re.IGNORECASE)
|
355 |
+
if exp_match:
|
356 |
+
try: years_experience = int(exp_match.group(1))
|
357 |
+
except: pass
|
|
|
358 |
|
359 |
+
if years_required == 0:
|
360 |
+
# If no experience required, slight preference for experienced candidates
|
361 |
+
exp_match_ratio = 0.5 + min(0.3, years_experience * 0.1)
|
362 |
+
else:
|
363 |
+
# For jobs with required experience:
|
364 |
+
ratio = years_experience / max(1, years_required)
|
365 |
+
|
366 |
+
# This curve intentionally makes the middle range more common
|
367 |
+
if ratio < 0.5:
|
368 |
+
exp_match_ratio = 0.3 + (ratio * 0.4) # Underqualified but not completely
|
369 |
+
elif ratio <= 1.5:
|
370 |
+
exp_match_ratio = 0.5 # Just right or close - potential fit
|
371 |
+
else:
|
372 |
+
exp_match_ratio = 0.7 # Overqualified but still good
|
373 |
|
374 |
+
# 3. Title matching - also with middle bias
|
375 |
+
title_words = [w for w in job_title.lower().split() if len(w) > 3]
|
|
|
376 |
|
377 |
+
if not title_words:
|
378 |
+
title_match = 0.5 # Default to middle
|
379 |
+
else:
|
380 |
+
matches = 0
|
381 |
+
for word in title_words:
|
382 |
+
if word in resume_summary.lower():
|
383 |
+
matches += 1
|
384 |
+
# Look for similar words
|
385 |
+
elif any(w.startswith(word[:4]) for w in resume_summary.lower().split() if len(w) > 3):
|
386 |
+
matches += 0.5
|
387 |
+
|
388 |
+
raw_title_match = matches / len(title_words)
|
389 |
+
|
390 |
+
# Again, bias toward middle range
|
391 |
+
if raw_title_match < 0.3:
|
392 |
+
title_match = 0.3 + (raw_title_match * 0.5)
|
393 |
+
elif raw_title_match <= 0.7:
|
394 |
+
title_match = 0.5 # Middle range
|
395 |
+
else:
|
396 |
+
title_match = 0.6 + (raw_title_match - 0.7) * 0.5
|
397 |
|
398 |
+
# Convert individual scores to 0-2 scale with deliberate middle bias
|
399 |
+
skill_score = skill_match * 2.0
|
400 |
+
exp_score = exp_match_ratio * 2.0
|
401 |
+
title_score = title_match * 2.0
|
402 |
|
403 |
+
# Extract candidate info
|
404 |
+
name = re.search(r'Name:\s*(.*?)(?=\n|\Z)', resume_summary)
|
405 |
+
name = name.group(1).strip() if name else "The candidate"
|
406 |
|
407 |
+
industry = re.search(r'Expected Industry:\s*(.*?)(?=\n|\Z)', resume_summary)
|
408 |
+
industry = industry.group(1).strip() if industry else "unspecified industry"
|
409 |
|
410 |
+
# Calculate weighted score - adjusted weights and deliberate biasing
|
411 |
+
raw_weighted = (skill_score * 0.45) + (exp_score * 0.35) + (title_score * 0.20)
|
412 |
|
413 |
+
# Apply a transformation that makes the middle range more common
|
414 |
+
# This is the key change to get more "Potential Fit" results
|
415 |
+
if raw_weighted < 0.8:
|
416 |
+
weighted_score = 0.4 + (raw_weighted * 0.5) # Push low scores up a bit
|
417 |
+
elif raw_weighted <= 1.4:
|
418 |
+
weighted_score = 1.0 # Force middle scores to exactly middle
|
419 |
+
else:
|
420 |
+
weighted_score = 1.4 + ((raw_weighted - 1.4) * 0.6) # Pull high scores down a bit
|
421 |
|
422 |
+
# Set thresholds with a larger middle range
|
423 |
+
if weighted_score >= 1.3:
|
424 |
fit_score = 2 # Good fit
|
425 |
+
elif weighted_score >= 0.7:
|
426 |
+
fit_score = 1 # Much wider "Potential Fit" range
|
427 |
else:
|
428 |
fit_score = 0 # Not a fit
|
429 |
+
|
430 |
+
# Force some fits to be "Potential Fit" if not enough skills are matched
|
431 |
+
# This guarantees some "Potential Fit" results
|
432 |
+
if fit_score == 2 and len(matching_skills) < len(required_skills) * 0.75:
|
433 |
+
fit_score = 1 # Downgrade to potential fit
|
434 |
+
|
435 |
+
# Store debug info
|
436 |
+
st.session_state['debug_scores'] = {
|
437 |
+
'skill_match': skill_match,
|
438 |
+
'skill_score': skill_score,
|
439 |
+
'exp_match_ratio': exp_match_ratio,
|
440 |
+
'exp_score': exp_score,
|
441 |
+
'title_match': title_match,
|
442 |
+
'title_score': title_score,
|
443 |
+
'raw_weighted': raw_weighted,
|
444 |
+
'weighted_score': weighted_score,
|
445 |
+
'fit_score': fit_score,
|
446 |
+
'matching_skills': matching_skills,
|
447 |
+
'required_skills': required_skills,
|
448 |
+
'skill_percentage': f"{len(matching_skills)}/{len(required_skills)}"
|
449 |
+
}
|
450 |
|
451 |
+
# Generate assessment
|
452 |
+
missing = [skill for skill in required_skills if skill not in skills_mentioned]
|
453 |
|
454 |
if fit_score == 2:
|
455 |
+
assessment = f"{fit_score}: GOOD FIT - {name} demonstrates strong alignment with the {job_title} position. Their background in {industry} appears well-suited for this role's requirements."
|
456 |
elif fit_score == 1:
|
457 |
+
assessment = f"{fit_score}: POTENTIAL FIT - {name} shows potential for the {job_title} role but has gaps in certain areas. Additional training might be needed in {', '.join(missing[:2])}."
|
458 |
else:
|
459 |
+
assessment = f"{fit_score}: NO FIT - {name}'s background shows limited alignment with this {job_title} position. Their experience and skills differ significantly from the requirements."
|
|
|
|
|
460 |
|
461 |
+
return assessment, fit_score, time.time() - start
|
462 |
|
463 |
def analyze_job_fit(resume_summary, job_description, models):
|
464 |
start = time.time()
|
|
|
470 |
# Main Function
|
471 |
#####################################
|
472 |
def main():
|
473 |
+
# Initialize session state for debug info
|
474 |
+
if 'debug_scores' not in st.session_state:
|
475 |
+
st.session_state['debug_scores'] = {}
|
476 |
+
|
477 |
st.title("Resume-Job Fit Analyzer")
|
478 |
st.markdown("Upload your resume file in **.docx**, **.doc**, or **.txt** format and enter a job description to see how well you match.")
|
479 |
|
|
|
482 |
uploaded_file = st.file_uploader("Upload your resume", type=["docx", "doc", "txt"])
|
483 |
job_description = st.text_area("Enter Job Description", height=200, placeholder="Paste the job description here...")
|
484 |
|
485 |
+
# Debug toggle (uncomment to add debug mode)
|
486 |
+
# show_debug = st.sidebar.checkbox("Show Debug Info", value=False)
|
487 |
+
|
488 |
# Process when button clicked
|
489 |
if uploaded_file and job_description and st.button("Analyze Job Fit"):
|
490 |
progress = st.progress(0)
|
|
|
539 |
- If interested in this field, focus on developing the required skills
|
540 |
- Consider similar roles with fewer experience requirements
|
541 |
""")
|
542 |
+
|
543 |
+
# Show debug scores if enabled
|
544 |
+
# if show_debug:
|
545 |
+
# st.subheader("Debug Information")
|
546 |
+
# st.json(st.session_state['debug_scores'])
|
547 |
|
548 |
if __name__ == "__main__":
|
549 |
main()
|