Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -115,53 +115,19 @@ def extract_text_from_file(file_obj):
|
|
115 |
|
116 |
# Information extraction functions
|
117 |
def extract_skills(text):
|
118 |
-
"""Extract skills from text
|
119 |
-
# Expanded skill keywords dictionary
|
120 |
skill_keywords = {
|
121 |
-
"Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "React", "Angular",
|
122 |
-
|
123 |
-
"
|
124 |
-
|
125 |
-
|
126 |
-
"
|
127 |
-
|
128 |
-
"Web Dev": ["React", "Angular", "Node.js", "Frontend", "Backend", "Full-Stack", "REST API", "GraphQL",
|
129 |
-
"Web Development", "WordPress", "Drupal", "CMS", "SEO", "UI/UX", "Responsive Design", "AJAX"],
|
130 |
-
"Software Dev": ["Agile", "Scrum", "Git", "DevOps", "Docker", "CI/CD", "Jenkins", "Software Development",
|
131 |
-
"Object-Oriented Programming", "Design Patterns", "Testing", "QA", "Software Architecture",
|
132 |
-
"Version Control", "JIRA", "Microservices", "Code Review", "Debugging"],
|
133 |
-
"Cloud": ["AWS", "Azure", "Google Cloud", "Lambda", "S3", "EC2", "Cloud Computing", "Serverless",
|
134 |
-
"Infrastructure as Code", "Cloud Architecture", "Cloud Security", "Kubernetes", "Load Balancing"],
|
135 |
-
"Business": ["Project Management", "Leadership", "Teamwork", "Agile", "Scrum", "Business Analysis",
|
136 |
-
"Requirements Gathering", "Client Relations", "Communication", "Presentation", "Meeting Facilitation",
|
137 |
-
"Strategic Planning", "Process Improvement", "Problem Solving", "Decision Making", "Stakeholder Management"]
|
138 |
}
|
139 |
|
140 |
text_lower = text.lower()
|
141 |
-
|
142 |
-
# Method 1: Look for exact matches
|
143 |
-
exact_skills = [skill for _, skills in skill_keywords.items() for skill in skills if skill.lower() in text_lower]
|
144 |
-
|
145 |
-
# Method 2: Use regex for more flexible matching (accounts for variations)
|
146 |
-
more_skills = []
|
147 |
-
for category, skills in skill_keywords.items():
|
148 |
-
for skill in skills:
|
149 |
-
# This handles cases like "Python developer" or "experienced in Python"
|
150 |
-
if re.search(r'\b' + re.escape(skill.lower()) + r'(?:\s|\b|ing|er|ed)', text_lower):
|
151 |
-
more_skills.append(skill)
|
152 |
-
|
153 |
-
# Combine both methods and remove duplicates
|
154 |
-
all_skills = list(set(exact_skills + more_skills))
|
155 |
-
|
156 |
-
# Add soft skill detection
|
157 |
-
soft_skills = ["Communication", "Teamwork", "Problem Solving", "Critical Thinking",
|
158 |
-
"Leadership", "Organization", "Time Management", "Flexibility", "Adaptability"]
|
159 |
-
|
160 |
-
for skill in soft_skills:
|
161 |
-
if skill.lower() in text_lower or re.search(r'\b' + re.escape(skill.lower()) + r'(?:\s|$)', text_lower):
|
162 |
-
all_skills.append(skill)
|
163 |
-
|
164 |
-
return all_skills
|
165 |
|
166 |
@lru_cache(maxsize=32)
|
167 |
def extract_name(text_start):
|
@@ -252,19 +218,9 @@ def summarize_resume_text(resume_text, models):
|
|
252 |
return summary, time.time() - start
|
253 |
|
254 |
def extract_job_requirements(job_description, models):
|
255 |
-
# Expanded technical skills list for better matching
|
256 |
tech_skills = [
|
257 |
-
"Python", "Java", "JavaScript", "SQL", "HTML", "CSS", "React", "Angular", "
|
258 |
-
"
|
259 |
-
"AWS", "Azure", "Google Cloud", "Docker", "Kubernetes", "CI/CD", "DevOps",
|
260 |
-
"MySQL", "MongoDB", "PostgreSQL", "Oracle", "NoSQL", "Database", "Data Analysis",
|
261 |
-
"Project Management", "Agile", "Scrum", "Leadership", "Communication", "Teamwork",
|
262 |
-
"Git", "Software Development", "Full Stack", "Frontend", "Backend", "RESTful API",
|
263 |
-
"Mobile Development", "Android", "iOS", "Swift", "Kotlin", "React Native", "Flutter",
|
264 |
-
"Business Analysis", "Requirements", "UX/UI", "Design", "Product Management",
|
265 |
-
"Testing", "QA", "Security", "Cloud Computing", "Networking", "System Administration",
|
266 |
-
"Linux", "Windows", "Excel", "PowerPoint", "Word", "Microsoft Office",
|
267 |
-
"Problem Solving", "Critical Thinking", "Analytical Skills"
|
268 |
]
|
269 |
|
270 |
clean_text = job_description.lower()
|
@@ -308,108 +264,86 @@ def extract_job_requirements(job_description, models):
|
|
308 |
}
|
309 |
|
310 |
def evaluate_job_fit(resume_summary, job_requirements, models):
|
311 |
-
|
|
|
|
|
|
|
312 |
|
313 |
-
#
|
314 |
required_skills = job_requirements["required_skills"]
|
315 |
years_required = job_requirements["years_experience"]
|
316 |
job_title = job_requirements["title"]
|
|
|
|
|
|
|
317 |
skills_mentioned = extract_skills(resume_summary)
|
318 |
|
319 |
-
# Calculate
|
320 |
matching_skills = [skill for skill in required_skills if skill in skills_mentioned]
|
|
|
321 |
|
322 |
-
#
|
323 |
-
|
324 |
-
|
325 |
-
if not required_skills:
|
326 |
-
skill_match = 0.5
|
327 |
-
else:
|
328 |
-
raw_match = len(matching_skills) / len(required_skills)
|
329 |
-
# Apply a more gradual scaling to avoid big jumps
|
330 |
-
skill_match = raw_match ** 0.7 # Using power < 1 gives more weight to partial matches
|
331 |
-
|
332 |
-
# Extract experience
|
333 |
years_experience = 0
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
except:
|
|
|
338 |
|
339 |
-
#
|
340 |
-
|
341 |
-
if years_required == 0:
|
342 |
-
# If no experience required, having 1+ years is good, 0 is neutral
|
343 |
-
exp_match_ratio = min(1.0, years_experience / 2 + 0.5)
|
344 |
-
else:
|
345 |
-
# For jobs requiring experience, use a more gradual scale
|
346 |
-
exp_match_ratio = min(1.0, (years_experience / max(1, years_required)) ** 0.8)
|
347 |
|
348 |
-
#
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
#
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
# Good Fit: 1.25+ (was 1.5)
|
379 |
-
# Potential Fit: 0.6-1.25 (was 0.8-1.5)
|
380 |
-
# No Fit: <0.6 (was <0.8)
|
381 |
-
if weighted_score >= 1.25:
|
382 |
fit_score = 2 # Good fit
|
383 |
-
elif weighted_score >= 0.
|
384 |
-
fit_score = 1 # Potential fit
|
385 |
else:
|
386 |
fit_score = 0 # Not a fit
|
387 |
|
388 |
-
#
|
389 |
-
|
390 |
-
'skill_match': skill_match,
|
391 |
-
'skill_score': skill_score,
|
392 |
-
'exp_match_ratio': exp_match_ratio,
|
393 |
-
'exp_score': exp_score,
|
394 |
-
'title_match': title_match,
|
395 |
-
'title_score': title_score,
|
396 |
-
'weighted_score': weighted_score,
|
397 |
-
'fit_score': fit_score,
|
398 |
-
'matching_skills': matching_skills,
|
399 |
-
'required_skills': required_skills
|
400 |
-
}
|
401 |
-
|
402 |
-
# Generate assessment
|
403 |
-
missing = [skill for skill in required_skills if skill not in skills_mentioned]
|
404 |
|
405 |
if fit_score == 2:
|
406 |
-
|
407 |
elif fit_score == 1:
|
408 |
-
|
409 |
else:
|
410 |
-
|
411 |
|
412 |
-
|
|
|
|
|
413 |
|
414 |
def analyze_job_fit(resume_summary, job_description, models):
|
415 |
start = time.time()
|
@@ -421,10 +355,6 @@ def analyze_job_fit(resume_summary, job_description, models):
|
|
421 |
# Main Function
|
422 |
#####################################
|
423 |
def main():
|
424 |
-
# Initialize session state for debug info
|
425 |
-
if 'debug_scores' not in st.session_state:
|
426 |
-
st.session_state['debug_scores'] = {}
|
427 |
-
|
428 |
st.title("Resume-Job Fit Analyzer")
|
429 |
st.markdown("Upload your resume file in **.docx**, **.doc**, or **.txt** format and enter a job description to see how well you match.")
|
430 |
|
@@ -487,10 +417,6 @@ def main():
|
|
487 |
- If interested in this field, focus on developing the required skills
|
488 |
- Consider similar roles with fewer experience requirements
|
489 |
""")
|
490 |
-
|
491 |
-
# Show debug scores if needed (uncomment this to debug scoring)
|
492 |
-
# st.subheader("Debug Information")
|
493 |
-
# st.json(st.session_state['debug_scores'])
|
494 |
|
495 |
if __name__ == "__main__":
|
496 |
main()
|
|
|
115 |
|
116 |
# Information extraction functions
|
117 |
def extract_skills(text):
|
118 |
+
"""Extract skills from text"""
|
|
|
119 |
skill_keywords = {
|
120 |
+
"Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "React", "Angular"],
|
121 |
+
"Data Science": ["Machine Learning", "Data Analysis", "Statistics", "TensorFlow", "PyTorch", "AI", "NLP"],
|
122 |
+
"Database": ["SQL", "MySQL", "MongoDB", "PostgreSQL", "Oracle", "Redis"],
|
123 |
+
"Web Dev": ["React", "Angular", "Node.js", "Frontend", "Backend", "Full-Stack", "REST API"],
|
124 |
+
"Software Dev": ["Agile", "Scrum", "Git", "DevOps", "Docker", "CI/CD", "Jenkins"],
|
125 |
+
"Cloud": ["AWS", "Azure", "Google Cloud", "Lambda", "S3", "EC2"],
|
126 |
+
"Business": ["Project Management", "Leadership", "Teamwork", "Agile", "Scrum"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
}
|
128 |
|
129 |
text_lower = text.lower()
|
130 |
+
return [skill for _, skills in skill_keywords.items() for skill in skills if skill.lower() in text_lower]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
@lru_cache(maxsize=32)
|
133 |
def extract_name(text_start):
|
|
|
218 |
return summary, time.time() - start
|
219 |
|
220 |
def extract_job_requirements(job_description, models):
|
|
|
221 |
tech_skills = [
|
222 |
+
"Python", "Java", "JavaScript", "SQL", "HTML", "CSS", "React", "Angular", "Machine Learning", "AWS",
|
223 |
+
"Azure", "Docker", "MySQL", "MongoDB", "Project Management", "Agile", "Leadership", "Git", "DevOps"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
]
|
225 |
|
226 |
clean_text = job_description.lower()
|
|
|
264 |
}
|
265 |
|
266 |
def evaluate_job_fit(resume_summary, job_requirements, models):
|
267 |
+
"""
|
268 |
+
Use a more direct method to evaluate job fit, rather than relying solely on sentiment analysis
|
269 |
+
"""
|
270 |
+
start_time = time.time()
|
271 |
|
272 |
+
# Extract basic information for context
|
273 |
required_skills = job_requirements["required_skills"]
|
274 |
years_required = job_requirements["years_experience"]
|
275 |
job_title = job_requirements["title"]
|
276 |
+
job_summary = job_requirements["summary"]
|
277 |
+
|
278 |
+
# Extract skills from resume
|
279 |
skills_mentioned = extract_skills(resume_summary)
|
280 |
|
281 |
+
# Calculate skill match percentage
|
282 |
matching_skills = [skill for skill in required_skills if skill in skills_mentioned]
|
283 |
+
skill_match_percentage = len(matching_skills) / len(required_skills) if required_skills else 0
|
284 |
|
285 |
+
# Extract experience level from resume
|
286 |
+
experience_pattern = r'(\d+)\+?\s*years?\s*(?:of)?\s*experience'
|
287 |
+
experience_match = re.search(experience_pattern, resume_summary, re.IGNORECASE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
years_experience = 0
|
289 |
+
if experience_match:
|
290 |
+
try:
|
291 |
+
years_experience = int(experience_match.group(1))
|
292 |
+
except:
|
293 |
+
years_experience = 0
|
294 |
|
295 |
+
# Experience match
|
296 |
+
exp_match_ratio = min(1.0, years_experience / max(1, years_required)) if years_required > 0 else 0.5
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
|
298 |
+
# Check job title match
|
299 |
+
job_title_lower = job_title.lower()
|
300 |
+
title_match = 0
|
301 |
+
|
302 |
+
# Look for job title words in resume
|
303 |
+
title_words = [word for word in job_title_lower.split() if len(word) > 3]
|
304 |
+
title_matches = sum(1 for word in title_words if word in resume_summary.lower())
|
305 |
+
title_match = title_matches / len(title_words) if title_words else 0
|
306 |
+
|
307 |
+
# Calculate scores for each dimension
|
308 |
+
skill_score = min(2, skill_match_percentage * 3) # 0-2 scale
|
309 |
+
exp_score = min(2, exp_match_ratio * 2) # 0-2 scale
|
310 |
+
title_score = min(2, title_match * 2) # 0-2 scale
|
311 |
+
|
312 |
+
# Extract name, age, industry from resume summary
|
313 |
+
name_match = re.search(r'Name:\s*(.*?)(?=\n|\Z)', resume_summary)
|
314 |
+
name = name_match.group(1).strip() if name_match else "The candidate"
|
315 |
+
|
316 |
+
age_match = re.search(r'Age:\s*(.*?)(?=\n|\Z)', resume_summary)
|
317 |
+
age = age_match.group(1).strip() if age_match else "unspecified age"
|
318 |
+
|
319 |
+
industry_match = re.search(r'Expected Industry:\s*(.*?)(?=\n|\Z)', resume_summary)
|
320 |
+
industry = industry_match.group(1).strip() if industry_match else "unspecified industry"
|
321 |
+
|
322 |
+
# Calculate weighted final score
|
323 |
+
# Skills: 50%, Experience: 30%, Title match: 20%
|
324 |
+
weighted_score = (skill_score * 0.5) + (exp_score * 0.3) + (title_score * 0.2)
|
325 |
+
|
326 |
+
# Convert to integer score (0-2)
|
327 |
+
if weighted_score >= 1.5:
|
|
|
|
|
|
|
|
|
328 |
fit_score = 2 # Good fit
|
329 |
+
elif weighted_score >= 0.8:
|
330 |
+
fit_score = 1 # Potential fit
|
331 |
else:
|
332 |
fit_score = 0 # Not a fit
|
333 |
|
334 |
+
# Generate assessment text based on score
|
335 |
+
missing_skills = [skill for skill in required_skills if skill not in skills_mentioned]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
|
337 |
if fit_score == 2:
|
338 |
+
fit_assessment = f"{fit_score}: GOOD FIT - {name} demonstrates strong alignment with the {job_title} position. Their background in {industry} and professional experience appear well-suited for this role's requirements. The technical expertise matches what the position demands."
|
339 |
elif fit_score == 1:
|
340 |
+
fit_assessment = f"{fit_score}: POTENTIAL FIT - {name} shows potential for the {job_title} role with some relevant experience, though there are gaps in certain technical areas. Their {industry} background provides partial alignment with the position requirements. Additional training might be needed in {', '.join(missing_skills[:2])} if pursuing this opportunity."
|
341 |
else:
|
342 |
+
fit_assessment = f"{fit_score}: NO FIT - {name}'s current background shows limited alignment with this {job_title} position. Their experience level and technical background differ significantly from the role requirements. A position better matching their {industry} expertise might be more suitable."
|
343 |
|
344 |
+
execution_time = time.time() - start_time
|
345 |
+
|
346 |
+
return fit_assessment, fit_score, execution_time
|
347 |
|
348 |
def analyze_job_fit(resume_summary, job_description, models):
|
349 |
start = time.time()
|
|
|
355 |
# Main Function
|
356 |
#####################################
|
357 |
def main():
|
|
|
|
|
|
|
|
|
358 |
st.title("Resume-Job Fit Analyzer")
|
359 |
st.markdown("Upload your resume file in **.docx**, **.doc**, or **.txt** format and enter a job description to see how well you match.")
|
360 |
|
|
|
417 |
- If interested in this field, focus on developing the required skills
|
418 |
- Consider similar roles with fewer experience requirements
|
419 |
""")
|
|
|
|
|
|
|
|
|
420 |
|
421 |
if __name__ == "__main__":
|
422 |
main()
|