Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -171,9 +171,6 @@ def basic_summarize(text, max_length=100):
|
|
171 |
def evaluate_job_fit(resume_summary, job_requirements, models):
|
172 |
"""
|
173 |
Use the sentiment model to evaluate job fit with multiple analyses
|
174 |
-
|
175 |
-
This function deliberately takes time to do a more thorough analysis, creating
|
176 |
-
multiple perspectives for the sentiment model to evaluate.
|
177 |
"""
|
178 |
start_time = time.time()
|
179 |
|
@@ -332,19 +329,19 @@ def evaluate_job_fit(resume_summary, job_requirements, models):
|
|
332 |
|
333 |
# Now determine the final score (0, 1, or 2)
|
334 |
if combined_score >= 0.7 and skills_match_percentage >= 70 and experience_match == "sufficient":
|
335 |
-
final_score = 2 #
|
336 |
elif combined_score >= 0.4 or (skills_match_percentage >= 50 and experience_match == "sufficient"):
|
337 |
final_score = 1 # Potential fit
|
338 |
else:
|
339 |
final_score = 0 # Not fit
|
340 |
|
341 |
-
# Generate assessment text based on the score
|
342 |
if final_score == 2:
|
343 |
-
assessment = f"{final_score}:
|
344 |
elif final_score == 1:
|
345 |
-
assessment = f"{final_score}:
|
346 |
else:
|
347 |
-
assessment = f"{final_score}:
|
348 |
|
349 |
execution_time = time.time() - start_time
|
350 |
|
@@ -401,7 +398,117 @@ def extract_text_from_file(file_obj):
|
|
401 |
# Functions for Information Extraction
|
402 |
#####################################
|
403 |
|
404 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
405 |
@lru_cache(maxsize=32)
|
406 |
def extract_name(text_start):
|
407 |
"""Extract candidate name from the beginning of resume text"""
|
@@ -424,8 +531,9 @@ def extract_name(text_start):
|
|
424 |
|
425 |
return "Unknown (please extract from resume)"
|
426 |
|
427 |
-
|
428 |
-
|
|
|
429 |
# Common skill categories - reduced keyword list for speed
|
430 |
skill_categories = {
|
431 |
"Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "Go"],
|
@@ -439,110 +547,44 @@ def extract_skills_and_work(text):
|
|
439 |
"Design": ["UX/UI", "User Experience", "Design Thinking", "Adobe"]
|
440 |
}
|
441 |
|
442 |
-
# Work experience extraction
|
443 |
-
work_headers = [
|
444 |
-
"work experience", "professional experience", "employment history",
|
445 |
-
"work history", "experience"
|
446 |
-
]
|
447 |
-
|
448 |
-
next_section_headers = [
|
449 |
-
"education", "skills", "certifications", "projects", "achievements"
|
450 |
-
]
|
451 |
-
|
452 |
# Process everything at once
|
453 |
-
lines = text.split('\n')
|
454 |
text_lower = text.lower()
|
455 |
|
456 |
# Skills extraction
|
457 |
-
|
458 |
for category, skills in skill_categories.items():
|
459 |
-
category_skills = []
|
460 |
for skill in skills:
|
461 |
if skill.lower() in text_lower:
|
462 |
-
|
463 |
-
|
464 |
-
if category_skills:
|
465 |
-
found_skills.append(f"{category}: {', '.join(category_skills)}")
|
466 |
-
|
467 |
-
# Work experience extraction - simplified approach
|
468 |
-
work_section = []
|
469 |
-
in_work_section = False
|
470 |
-
|
471 |
-
for idx, line in enumerate(lines):
|
472 |
-
line_lower = line.lower().strip()
|
473 |
-
|
474 |
-
# Start of work section
|
475 |
-
if not in_work_section:
|
476 |
-
if any(header in line_lower for header in work_headers):
|
477 |
-
in_work_section = True
|
478 |
-
continue
|
479 |
-
# End of work section
|
480 |
-
elif in_work_section:
|
481 |
-
if any(header in line_lower for header in next_section_headers):
|
482 |
-
break
|
483 |
-
|
484 |
-
if line.strip():
|
485 |
-
work_section.append(line.strip())
|
486 |
-
|
487 |
-
# Simplified work formatting
|
488 |
-
if not work_section:
|
489 |
-
work_experience = "Work experience not clearly identified"
|
490 |
-
else:
|
491 |
-
# Just take the first 5-7 lines of the work section as a summary
|
492 |
-
work_lines = []
|
493 |
-
company_count = 0
|
494 |
-
current_company = ""
|
495 |
-
|
496 |
-
for line in work_section:
|
497 |
-
# New company entry often has a date
|
498 |
-
if re.search(r'(19|20)\d{2}', line):
|
499 |
-
company_count += 1
|
500 |
-
if company_count <= 3: # Limit to 3 most recent positions
|
501 |
-
current_company = line
|
502 |
-
work_lines.append(f"**{line}**")
|
503 |
-
else:
|
504 |
-
break
|
505 |
-
elif company_count <= 3 and len(work_lines) < 10: # Limit total lines
|
506 |
-
work_lines.append(line)
|
507 |
-
|
508 |
-
work_experience = "\n• " + "\n• ".join(work_lines[:7]) if work_lines else "Work experience not clearly structured"
|
509 |
-
|
510 |
-
skills_formatted = "\n• " + "\n• ".join(found_skills) if found_skills else "No specific technical skills clearly identified"
|
511 |
|
512 |
-
return
|
513 |
|
514 |
#####################################
|
515 |
# Function: Summarize Resume Text
|
516 |
#####################################
|
517 |
def summarize_resume_text(resume_text, models):
|
518 |
"""
|
519 |
-
Generates a structured summary of the resume text
|
520 |
"""
|
521 |
start_time = time.time()
|
522 |
|
523 |
-
#
|
524 |
-
base_summary = summarize_text(resume_text, models, max_length=100)
|
525 |
-
|
526 |
-
# Extract name from the beginning of the resume
|
527 |
name = extract_name(resume_text[:500])
|
|
|
|
|
|
|
|
|
528 |
|
529 |
-
#
|
530 |
-
|
531 |
-
|
532 |
-
# Extract education level - simplified approach
|
533 |
-
education_level = "Not specified"
|
534 |
-
education_terms = ["bachelor", "master", "phd", "doctorate", "mba", "degree"]
|
535 |
-
for term in education_terms:
|
536 |
-
if term in resume_text.lower():
|
537 |
-
education_level = "Higher education degree mentioned"
|
538 |
-
break
|
539 |
|
540 |
-
# Format the structured summary
|
541 |
formatted_summary = f"Name: {name}\n\n"
|
542 |
-
formatted_summary += f"
|
543 |
-
formatted_summary += f"
|
544 |
-
formatted_summary += f"
|
545 |
-
formatted_summary += f"
|
|
|
546 |
|
547 |
execution_time = time.time() - start_time
|
548 |
|
@@ -688,9 +730,9 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
|
|
688 |
|
689 |
# Display fit score with label
|
690 |
fit_labels = {
|
691 |
-
0: "NOT FIT
|
692 |
-
1: "POTENTIAL FIT
|
693 |
-
2: "
|
694 |
}
|
695 |
|
696 |
# Show the score prominently
|
@@ -706,7 +748,7 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
|
|
706 |
|
707 |
if fit_score == 2:
|
708 |
st.markdown("""
|
709 |
-
- Apply for this position as you appear to be a
|
710 |
- Prepare for interviews by focusing on your relevant experience
|
711 |
- Highlight your matching skills in your cover letter
|
712 |
""")
|
|
|
171 |
def evaluate_job_fit(resume_summary, job_requirements, models):
|
172 |
"""
|
173 |
Use the sentiment model to evaluate job fit with multiple analyses
|
|
|
|
|
|
|
174 |
"""
|
175 |
start_time = time.time()
|
176 |
|
|
|
329 |
|
330 |
# Now determine the final score (0, 1, or 2)
|
331 |
if combined_score >= 0.7 and skills_match_percentage >= 70 and experience_match == "sufficient":
|
332 |
+
final_score = 2 # Good fit
|
333 |
elif combined_score >= 0.4 or (skills_match_percentage >= 50 and experience_match == "sufficient"):
|
334 |
final_score = 1 # Potential fit
|
335 |
else:
|
336 |
final_score = 0 # Not fit
|
337 |
|
338 |
+
# Generate concise assessment text based on the score
|
339 |
if final_score == 2:
|
340 |
+
assessment = f"{final_score}: Skills match {skills_match_percentage}%, Experience match {experience_years}/{years_required} yrs. Strong technical alignment with {len(skills_in_resume)}/{len(required_skills)} required skills."
|
341 |
elif final_score == 1:
|
342 |
+
assessment = f"{final_score}: Skills match {skills_match_percentage}%, Experience {experience_match}. Meets some requirements but has gaps in {len(required_skills) - len(skills_in_resume)} skill areas."
|
343 |
else:
|
344 |
+
assessment = f"{final_score}: Skills match only {skills_match_percentage}%, Experience {experience_match}. Significant gaps in critical requirements for this position."
|
345 |
|
346 |
execution_time = time.time() - start_time
|
347 |
|
|
|
398 |
# Functions for Information Extraction
|
399 |
#####################################
|
400 |
|
401 |
+
# Extract age from resume
|
402 |
+
def extract_age(text):
|
403 |
+
"""Extract candidate age from resume text"""
|
404 |
+
# Simplified: just check a few common patterns
|
405 |
+
age_patterns = [
|
406 |
+
r'age:?\s*(\d{1,2})',
|
407 |
+
r'(\d{1,2})\s*years\s*old',
|
408 |
+
r'dob:.*(\d{4})', # Year of birth
|
409 |
+
r'date of birth:.*(\d{4})' # Year of birth
|
410 |
+
]
|
411 |
+
|
412 |
+
text_lower = text.lower()
|
413 |
+
for pattern in age_patterns:
|
414 |
+
matches = re.search(pattern, text_lower)
|
415 |
+
if matches:
|
416 |
+
# If it's a year of birth, calculate approximate age
|
417 |
+
if len(matches.group(1)) == 4: # It's a year
|
418 |
+
try:
|
419 |
+
birth_year = int(matches.group(1))
|
420 |
+
current_year = 2025 # Current year
|
421 |
+
return str(current_year - birth_year)
|
422 |
+
except:
|
423 |
+
pass
|
424 |
+
return matches.group(1)
|
425 |
+
|
426 |
+
return "Not specified"
|
427 |
+
|
428 |
+
# Extract industry preference
|
429 |
+
def extract_industry(text):
|
430 |
+
"""Extract expected job industry from resume"""
|
431 |
+
# Common industry keywords
|
432 |
+
industry_keywords = {
|
433 |
+
"Technology": ["software", "programming", "developer", "IT", "tech", "computer", "digital"],
|
434 |
+
"Finance": ["banking", "financial", "accounting", "finance", "analyst"],
|
435 |
+
"Healthcare": ["medical", "health", "hospital", "clinical", "nurse", "doctor", "patient"],
|
436 |
+
"Education": ["teaching", "teacher", "professor", "education", "university", "school", "academic"],
|
437 |
+
"Marketing": ["marketing", "advertising", "digital marketing", "social media", "brand"],
|
438 |
+
"Engineering": ["engineer", "engineering", "mechanical", "civil", "electrical"],
|
439 |
+
"Data Science": ["data science", "machine learning", "AI", "analytics", "big data"],
|
440 |
+
"Management": ["manager", "management", "leadership", "executive", "director"],
|
441 |
+
"Consulting": ["consultant", "consulting", "advisor"],
|
442 |
+
"Sales": ["sales", "business development", "account manager", "client relations"]
|
443 |
+
}
|
444 |
+
|
445 |
+
text_lower = text.lower()
|
446 |
+
industry_counts = {}
|
447 |
+
|
448 |
+
for industry, keywords in industry_keywords.items():
|
449 |
+
count = sum(text_lower.count(keyword.lower()) for keyword in keywords)
|
450 |
+
if count > 0:
|
451 |
+
industry_counts[industry] = count
|
452 |
+
|
453 |
+
if industry_counts:
|
454 |
+
# Return the industry with the highest keyword count
|
455 |
+
return max(industry_counts.items(), key=lambda x: x[1])[0]
|
456 |
+
|
457 |
+
return "Not clearly specified"
|
458 |
+
|
459 |
+
# Extract job position preference
|
460 |
+
def extract_job_position(text):
|
461 |
+
"""Extract expected job position from resume"""
|
462 |
+
# Look for objective or summary section
|
463 |
+
objective_patterns = [
|
464 |
+
r'objective:?\s*(.*?)(?=\n\n|\n\w+:|\Z)',
|
465 |
+
r'career\s*objective:?\s*(.*?)(?=\n\n|\n\w+:|\Z)',
|
466 |
+
r'professional\s*summary:?\s*(.*?)(?=\n\n|\n\w+:|\Z)',
|
467 |
+
r'summary:?\s*(.*?)(?=\n\n|\n\w+:|\Z)',
|
468 |
+
r'seeking\s*(?:a|an)?\s*(?:position|role|opportunity)\s*(?:as|in)?\s*(?:a|an)?\s*([^.]*)'
|
469 |
+
]
|
470 |
+
|
471 |
+
text_lower = text.lower()
|
472 |
+
for pattern in objective_patterns:
|
473 |
+
match = re.search(pattern, text_lower, re.IGNORECASE | re.DOTALL)
|
474 |
+
if match:
|
475 |
+
objective_text = match.group(1).strip()
|
476 |
+
# Look for job titles in the objective
|
477 |
+
job_titles = ["developer", "engineer", "analyst", "manager", "director", "specialist",
|
478 |
+
"coordinator", "consultant", "designer", "architect", "administrator"]
|
479 |
+
|
480 |
+
for title in job_titles:
|
481 |
+
if title in objective_text:
|
482 |
+
# Try to get the full title with context
|
483 |
+
title_pattern = r'(?:a|an)?\s*(\w+\s+' + title + r'|\w+\s+\w+\s+' + title + r')'
|
484 |
+
title_match = re.search(title_pattern, objective_text)
|
485 |
+
if title_match:
|
486 |
+
return title_match.group(1).strip().title()
|
487 |
+
return title.title()
|
488 |
+
|
489 |
+
# If no specific title found but we have objective text, return a summary
|
490 |
+
if len(objective_text) > 10:
|
491 |
+
# Truncate and clean up objective
|
492 |
+
words = objective_text.split()
|
493 |
+
if len(words) > 10:
|
494 |
+
return " ".join(words[:10]).title() + "..."
|
495 |
+
return objective_text.title()
|
496 |
+
|
497 |
+
# Check current/most recent job title
|
498 |
+
job_patterns = [
|
499 |
+
r'experience:.*?(\w+\s+\w+(?:\s+\w+)?)(?=\s*at|\s*\(|\s*-|\s*,|\s*\d{4}|\n)',
|
500 |
+
r'(\w+\s+\w+(?:\s+\w+)?)\s*\(\s*current\s*\)',
|
501 |
+
r'(\w+\s+\w+(?:\s+\w+)?)\s*\(\s*present\s*\)'
|
502 |
+
]
|
503 |
+
|
504 |
+
for pattern in job_patterns:
|
505 |
+
match = re.search(pattern, text_lower, re.IGNORECASE)
|
506 |
+
if match:
|
507 |
+
return match.group(1).strip().title()
|
508 |
+
|
509 |
+
return "Not explicitly stated"
|
510 |
+
|
511 |
+
# Extract name
|
512 |
@lru_cache(maxsize=32)
|
513 |
def extract_name(text_start):
|
514 |
"""Extract candidate name from the beginning of resume text"""
|
|
|
531 |
|
532 |
return "Unknown (please extract from resume)"
|
533 |
|
534 |
+
# Extract skills
|
535 |
+
def extract_skills(text):
|
536 |
+
"""Extract key skills from the resume"""
|
537 |
# Common skill categories - reduced keyword list for speed
|
538 |
skill_categories = {
|
539 |
"Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "Go"],
|
|
|
547 |
"Design": ["UX/UI", "User Experience", "Design Thinking", "Adobe"]
|
548 |
}
|
549 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
550 |
# Process everything at once
|
|
|
551 |
text_lower = text.lower()
|
552 |
|
553 |
# Skills extraction
|
554 |
+
all_skills = []
|
555 |
for category, skills in skill_categories.items():
|
|
|
556 |
for skill in skills:
|
557 |
if skill.lower() in text_lower:
|
558 |
+
all_skills.append(skill)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
559 |
|
560 |
+
return all_skills
|
561 |
|
562 |
#####################################
|
563 |
# Function: Summarize Resume Text
|
564 |
#####################################
|
565 |
def summarize_resume_text(resume_text, models):
|
566 |
"""
|
567 |
+
Generates a structured summary of the resume text with the critical information
|
568 |
"""
|
569 |
start_time = time.time()
|
570 |
|
571 |
+
# Extract critical information
|
|
|
|
|
|
|
572 |
name = extract_name(resume_text[:500])
|
573 |
+
age = extract_age(resume_text)
|
574 |
+
industry = extract_industry(resume_text)
|
575 |
+
job_position = extract_job_position(resume_text)
|
576 |
+
skills = extract_skills(resume_text)
|
577 |
|
578 |
+
# Use our summarize_text function for a general summary
|
579 |
+
general_summary = summarize_text(resume_text, models, max_length=100)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
580 |
|
581 |
+
# Format the structured summary with different paragraphs for each critical piece
|
582 |
formatted_summary = f"Name: {name}\n\n"
|
583 |
+
formatted_summary += f"Age: {age}\n\n"
|
584 |
+
formatted_summary += f"Expected Industry: {industry}\n\n"
|
585 |
+
formatted_summary += f"Expected Job Position: {job_position}\n\n"
|
586 |
+
formatted_summary += f"Skills: {', '.join(skills)}\n\n"
|
587 |
+
formatted_summary += f"Summary: {general_summary}"
|
588 |
|
589 |
execution_time = time.time() - start_time
|
590 |
|
|
|
730 |
|
731 |
# Display fit score with label
|
732 |
fit_labels = {
|
733 |
+
0: "NOT FIT",
|
734 |
+
1: "POTENTIAL FIT",
|
735 |
+
2: "GOOD FIT"
|
736 |
}
|
737 |
|
738 |
# Show the score prominently
|
|
|
748 |
|
749 |
if fit_score == 2:
|
750 |
st.markdown("""
|
751 |
+
- Apply for this position as you appear to be a good match
|
752 |
- Prepare for interviews by focusing on your relevant experience
|
753 |
- Highlight your matching skills in your cover letter
|
754 |
""")
|