CR7CAD commited on
Commit
99e5c00
·
verified ·
1 Parent(s): 8e57a3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -98
app.py CHANGED
@@ -171,9 +171,6 @@ def basic_summarize(text, max_length=100):
171
  def evaluate_job_fit(resume_summary, job_requirements, models):
172
  """
173
  Use the sentiment model to evaluate job fit with multiple analyses
174
-
175
- This function deliberately takes time to do a more thorough analysis, creating
176
- multiple perspectives for the sentiment model to evaluate.
177
  """
178
  start_time = time.time()
179
 
@@ -332,19 +329,19 @@ def evaluate_job_fit(resume_summary, job_requirements, models):
332
 
333
  # Now determine the final score (0, 1, or 2)
334
  if combined_score >= 0.7 and skills_match_percentage >= 70 and experience_match == "sufficient":
335
- final_score = 2 # Strong fit
336
  elif combined_score >= 0.4 or (skills_match_percentage >= 50 and experience_match == "sufficient"):
337
  final_score = 1 # Potential fit
338
  else:
339
  final_score = 0 # Not fit
340
 
341
- # Generate assessment text based on the score
342
  if final_score == 2:
343
- assessment = f"{final_score}: The candidate is a strong match for this {job_title} position. They have the required {experience_years} years of experience and demonstrate proficiency in key skills including {', '.join(skills_in_resume[:5])}. Their background aligns well with the job requirements."
344
  elif final_score == 1:
345
- assessment = f"{final_score}: The candidate shows potential for this {job_title} position, but has some skill gaps. They match on {skills_match_percentage}% of required skills including {', '.join(skills_in_resume[:3]) if skills_in_resume else 'minimal required skills'}, and their experience is {experience_match}."
346
  else:
347
- assessment = f"{final_score}: The candidate does not appear to be a good match for this {job_title} position. Their profile shows limited alignment with key requirements, matching only {skills_match_percentage}% of required skills, and their experience level is {experience_match}."
348
 
349
  execution_time = time.time() - start_time
350
 
@@ -401,7 +398,117 @@ def extract_text_from_file(file_obj):
401
  # Functions for Information Extraction
402
  #####################################
403
 
404
- # Cache the extraction functions to avoid reprocessing
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  @lru_cache(maxsize=32)
406
  def extract_name(text_start):
407
  """Extract candidate name from the beginning of resume text"""
@@ -424,8 +531,9 @@ def extract_name(text_start):
424
 
425
  return "Unknown (please extract from resume)"
426
 
427
- def extract_skills_and_work(text):
428
- """Extract both skills and work experience at once to save processing time"""
 
429
  # Common skill categories - reduced keyword list for speed
430
  skill_categories = {
431
  "Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "Go"],
@@ -439,110 +547,44 @@ def extract_skills_and_work(text):
439
  "Design": ["UX/UI", "User Experience", "Design Thinking", "Adobe"]
440
  }
441
 
442
- # Work experience extraction
443
- work_headers = [
444
- "work experience", "professional experience", "employment history",
445
- "work history", "experience"
446
- ]
447
-
448
- next_section_headers = [
449
- "education", "skills", "certifications", "projects", "achievements"
450
- ]
451
-
452
  # Process everything at once
453
- lines = text.split('\n')
454
  text_lower = text.lower()
455
 
456
  # Skills extraction
457
- found_skills = []
458
  for category, skills in skill_categories.items():
459
- category_skills = []
460
  for skill in skills:
461
  if skill.lower() in text_lower:
462
- category_skills.append(skill)
463
-
464
- if category_skills:
465
- found_skills.append(f"{category}: {', '.join(category_skills)}")
466
-
467
- # Work experience extraction - simplified approach
468
- work_section = []
469
- in_work_section = False
470
-
471
- for idx, line in enumerate(lines):
472
- line_lower = line.lower().strip()
473
-
474
- # Start of work section
475
- if not in_work_section:
476
- if any(header in line_lower for header in work_headers):
477
- in_work_section = True
478
- continue
479
- # End of work section
480
- elif in_work_section:
481
- if any(header in line_lower for header in next_section_headers):
482
- break
483
-
484
- if line.strip():
485
- work_section.append(line.strip())
486
-
487
- # Simplified work formatting
488
- if not work_section:
489
- work_experience = "Work experience not clearly identified"
490
- else:
491
- # Just take the first 5-7 lines of the work section as a summary
492
- work_lines = []
493
- company_count = 0
494
- current_company = ""
495
-
496
- for line in work_section:
497
- # New company entry often has a date
498
- if re.search(r'(19|20)\d{2}', line):
499
- company_count += 1
500
- if company_count <= 3: # Limit to 3 most recent positions
501
- current_company = line
502
- work_lines.append(f"**{line}**")
503
- else:
504
- break
505
- elif company_count <= 3 and len(work_lines) < 10: # Limit total lines
506
- work_lines.append(line)
507
-
508
- work_experience = "\n• " + "\n• ".join(work_lines[:7]) if work_lines else "Work experience not clearly structured"
509
-
510
- skills_formatted = "\n• " + "\n• ".join(found_skills) if found_skills else "No specific technical skills clearly identified"
511
 
512
- return skills_formatted, work_experience
513
 
514
  #####################################
515
  # Function: Summarize Resume Text
516
  #####################################
517
  def summarize_resume_text(resume_text, models):
518
  """
519
- Generates a structured summary of the resume text
520
  """
521
  start_time = time.time()
522
 
523
- # Use our summarize_text function which handles both pipeline and non-pipeline cases
524
- base_summary = summarize_text(resume_text, models, max_length=100)
525
-
526
- # Extract name from the beginning of the resume
527
  name = extract_name(resume_text[:500])
 
 
 
 
528
 
529
- # Extract skills and work experience
530
- skills, work_experience = extract_skills_and_work(resume_text)
531
-
532
- # Extract education level - simplified approach
533
- education_level = "Not specified"
534
- education_terms = ["bachelor", "master", "phd", "doctorate", "mba", "degree"]
535
- for term in education_terms:
536
- if term in resume_text.lower():
537
- education_level = "Higher education degree mentioned"
538
- break
539
 
540
- # Format the structured summary
541
  formatted_summary = f"Name: {name}\n\n"
542
- formatted_summary += f"Summary: {base_summary}\n\n"
543
- formatted_summary += f"Previous Work Experience: {work_experience}\n\n"
544
- formatted_summary += f"Skills: {skills}\n\n"
545
- formatted_summary += f"Education: {education_level}"
 
546
 
547
  execution_time = time.time() - start_time
548
 
@@ -688,9 +730,9 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
688
 
689
  # Display fit score with label
690
  fit_labels = {
691
- 0: "NOT FIT",
692
- 1: "POTENTIAL FIT ⚠️",
693
- 2: "STRONG FIT"
694
  }
695
 
696
  # Show the score prominently
@@ -706,7 +748,7 @@ if uploaded_file is not None and job_description and st.button("Analyze Job Fit"
706
 
707
  if fit_score == 2:
708
  st.markdown("""
709
- - Apply for this position as you appear to be a strong match
710
  - Prepare for interviews by focusing on your relevant experience
711
  - Highlight your matching skills in your cover letter
712
  """)
 
171
  def evaluate_job_fit(resume_summary, job_requirements, models):
172
  """
173
  Use the sentiment model to evaluate job fit with multiple analyses
 
 
 
174
  """
175
  start_time = time.time()
176
 
 
329
 
330
  # Now determine the final score (0, 1, or 2)
331
  if combined_score >= 0.7 and skills_match_percentage >= 70 and experience_match == "sufficient":
332
+ final_score = 2 # Good fit
333
  elif combined_score >= 0.4 or (skills_match_percentage >= 50 and experience_match == "sufficient"):
334
  final_score = 1 # Potential fit
335
  else:
336
  final_score = 0 # Not fit
337
 
338
+ # Generate concise assessment text based on the score
339
  if final_score == 2:
340
+ assessment = f"{final_score}: Skills match {skills_match_percentage}%, Experience match {experience_years}/{years_required} yrs. Strong technical alignment with {len(skills_in_resume)}/{len(required_skills)} required skills."
341
  elif final_score == 1:
342
+ assessment = f"{final_score}: Skills match {skills_match_percentage}%, Experience {experience_match}. Meets some requirements but has gaps in {len(required_skills) - len(skills_in_resume)} skill areas."
343
  else:
344
+ assessment = f"{final_score}: Skills match only {skills_match_percentage}%, Experience {experience_match}. Significant gaps in critical requirements for this position."
345
 
346
  execution_time = time.time() - start_time
347
 
 
398
  # Functions for Information Extraction
399
  #####################################
400
 
401
+ # Extract age from resume
402
+ def extract_age(text):
403
+ """Extract candidate age from resume text"""
404
+ # Simplified: just check a few common patterns
405
+ age_patterns = [
406
+ r'age:?\s*(\d{1,2})',
407
+ r'(\d{1,2})\s*years\s*old',
408
+ r'dob:.*(\d{4})', # Year of birth
409
+ r'date of birth:.*(\d{4})' # Year of birth
410
+ ]
411
+
412
+ text_lower = text.lower()
413
+ for pattern in age_patterns:
414
+ matches = re.search(pattern, text_lower)
415
+ if matches:
416
+ # If it's a year of birth, calculate approximate age
417
+ if len(matches.group(1)) == 4: # It's a year
418
+ try:
419
+ birth_year = int(matches.group(1))
420
+ current_year = 2025 # Current year
421
+ return str(current_year - birth_year)
422
+ except:
423
+ pass
424
+ return matches.group(1)
425
+
426
+ return "Not specified"
427
+
428
+ # Extract industry preference
429
+ def extract_industry(text):
430
+ """Extract expected job industry from resume"""
431
+ # Common industry keywords
432
+ industry_keywords = {
433
+ "Technology": ["software", "programming", "developer", "IT", "tech", "computer", "digital"],
434
+ "Finance": ["banking", "financial", "accounting", "finance", "analyst"],
435
+ "Healthcare": ["medical", "health", "hospital", "clinical", "nurse", "doctor", "patient"],
436
+ "Education": ["teaching", "teacher", "professor", "education", "university", "school", "academic"],
437
+ "Marketing": ["marketing", "advertising", "digital marketing", "social media", "brand"],
438
+ "Engineering": ["engineer", "engineering", "mechanical", "civil", "electrical"],
439
+ "Data Science": ["data science", "machine learning", "AI", "analytics", "big data"],
440
+ "Management": ["manager", "management", "leadership", "executive", "director"],
441
+ "Consulting": ["consultant", "consulting", "advisor"],
442
+ "Sales": ["sales", "business development", "account manager", "client relations"]
443
+ }
444
+
445
+ text_lower = text.lower()
446
+ industry_counts = {}
447
+
448
+ for industry, keywords in industry_keywords.items():
449
+ count = sum(text_lower.count(keyword.lower()) for keyword in keywords)
450
+ if count > 0:
451
+ industry_counts[industry] = count
452
+
453
+ if industry_counts:
454
+ # Return the industry with the highest keyword count
455
+ return max(industry_counts.items(), key=lambda x: x[1])[0]
456
+
457
+ return "Not clearly specified"
458
+
459
+ # Extract job position preference
460
+ def extract_job_position(text):
461
+ """Extract expected job position from resume"""
462
+ # Look for objective or summary section
463
+ objective_patterns = [
464
+ r'objective:?\s*(.*?)(?=\n\n|\n\w+:|\Z)',
465
+ r'career\s*objective:?\s*(.*?)(?=\n\n|\n\w+:|\Z)',
466
+ r'professional\s*summary:?\s*(.*?)(?=\n\n|\n\w+:|\Z)',
467
+ r'summary:?\s*(.*?)(?=\n\n|\n\w+:|\Z)',
468
+ r'seeking\s*(?:a|an)?\s*(?:position|role|opportunity)\s*(?:as|in)?\s*(?:a|an)?\s*([^.]*)'
469
+ ]
470
+
471
+ text_lower = text.lower()
472
+ for pattern in objective_patterns:
473
+ match = re.search(pattern, text_lower, re.IGNORECASE | re.DOTALL)
474
+ if match:
475
+ objective_text = match.group(1).strip()
476
+ # Look for job titles in the objective
477
+ job_titles = ["developer", "engineer", "analyst", "manager", "director", "specialist",
478
+ "coordinator", "consultant", "designer", "architect", "administrator"]
479
+
480
+ for title in job_titles:
481
+ if title in objective_text:
482
+ # Try to get the full title with context
483
+ title_pattern = r'(?:a|an)?\s*(\w+\s+' + title + r'|\w+\s+\w+\s+' + title + r')'
484
+ title_match = re.search(title_pattern, objective_text)
485
+ if title_match:
486
+ return title_match.group(1).strip().title()
487
+ return title.title()
488
+
489
+ # If no specific title found but we have objective text, return a summary
490
+ if len(objective_text) > 10:
491
+ # Truncate and clean up objective
492
+ words = objective_text.split()
493
+ if len(words) > 10:
494
+ return " ".join(words[:10]).title() + "..."
495
+ return objective_text.title()
496
+
497
+ # Check current/most recent job title
498
+ job_patterns = [
499
+ r'experience:.*?(\w+\s+\w+(?:\s+\w+)?)(?=\s*at|\s*\(|\s*-|\s*,|\s*\d{4}|\n)',
500
+ r'(\w+\s+\w+(?:\s+\w+)?)\s*\(\s*current\s*\)',
501
+ r'(\w+\s+\w+(?:\s+\w+)?)\s*\(\s*present\s*\)'
502
+ ]
503
+
504
+ for pattern in job_patterns:
505
+ match = re.search(pattern, text_lower, re.IGNORECASE)
506
+ if match:
507
+ return match.group(1).strip().title()
508
+
509
+ return "Not explicitly stated"
510
+
511
+ # Extract name
512
  @lru_cache(maxsize=32)
513
  def extract_name(text_start):
514
  """Extract candidate name from the beginning of resume text"""
 
531
 
532
  return "Unknown (please extract from resume)"
533
 
534
+ # Extract skills
535
+ def extract_skills(text):
536
+ """Extract key skills from the resume"""
537
  # Common skill categories - reduced keyword list for speed
538
  skill_categories = {
539
  "Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "Go"],
 
547
  "Design": ["UX/UI", "User Experience", "Design Thinking", "Adobe"]
548
  }
549
 
 
 
 
 
 
 
 
 
 
 
550
  # Process everything at once
 
551
  text_lower = text.lower()
552
 
553
  # Skills extraction
554
+ all_skills = []
555
  for category, skills in skill_categories.items():
 
556
  for skill in skills:
557
  if skill.lower() in text_lower:
558
+ all_skills.append(skill)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
559
 
560
+ return all_skills
561
 
562
  #####################################
563
  # Function: Summarize Resume Text
564
  #####################################
565
  def summarize_resume_text(resume_text, models):
566
  """
567
+ Generates a structured summary of the resume text with the critical information
568
  """
569
  start_time = time.time()
570
 
571
+ # Extract critical information
 
 
 
572
  name = extract_name(resume_text[:500])
573
+ age = extract_age(resume_text)
574
+ industry = extract_industry(resume_text)
575
+ job_position = extract_job_position(resume_text)
576
+ skills = extract_skills(resume_text)
577
 
578
+ # Use our summarize_text function for a general summary
579
+ general_summary = summarize_text(resume_text, models, max_length=100)
 
 
 
 
 
 
 
 
580
 
581
+ # Format the structured summary with different paragraphs for each critical piece
582
  formatted_summary = f"Name: {name}\n\n"
583
+ formatted_summary += f"Age: {age}\n\n"
584
+ formatted_summary += f"Expected Industry: {industry}\n\n"
585
+ formatted_summary += f"Expected Job Position: {job_position}\n\n"
586
+ formatted_summary += f"Skills: {', '.join(skills)}\n\n"
587
+ formatted_summary += f"Summary: {general_summary}"
588
 
589
  execution_time = time.time() - start_time
590
 
 
730
 
731
  # Display fit score with label
732
  fit_labels = {
733
+ 0: "NOT FIT",
734
+ 1: "POTENTIAL FIT",
735
+ 2: "GOOD FIT"
736
  }
737
 
738
  # Show the score prominently
 
748
 
749
  if fit_score == 2:
750
  st.markdown("""
751
+ - Apply for this position as you appear to be a good match
752
  - Prepare for interviews by focusing on your relevant experience
753
  - Highlight your matching skills in your cover letter
754
  """)