CR7CAD commited on
Commit
92e31bf
Β·
verified Β·
1 Parent(s): 0a0fafe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -123
app.py CHANGED
@@ -315,173 +315,187 @@ def summarize_resume_text(resume_text):
315
  #####################################
316
  def analyze_google_fit(resume_summary):
317
  """
318
- Analyze how well the candidate fits Google's requirements.
319
- This uses the model to generate a natural language assessment with a realistic match score.
320
  """
321
  start_time = time.time()
322
 
323
- # First, calculate a realistic score based on keyword matching and balanced criteria
324
  google_keywords = {
325
- "technical_skills": ["python", "java", "c++", "javascript", "go", "sql", "algorithms", "data structures", "coding"],
326
- "advanced_tech": ["machine learning", "ai", "artificial intelligence", "cloud", "data science", "big data", "tensorflow", "deep learning"],
327
- "problem_solving": ["problem solving", "analytical", "critical thinking", "troubleshooting", "debugging", "optimization"],
328
- "innovation": ["innovation", "creative", "creativity", "design thinking", "research", "novel"],
329
- "soft_skills": ["team", "leadership", "collaboration", "communication", "agile", "project management"]
 
 
 
 
 
330
  }
331
 
332
- # Calculate realistic score with category weights
333
  category_weights = {
334
- "technical_skills": 0.35,
335
- "advanced_tech": 0.25,
336
- "problem_solving": 0.20,
337
- "innovation": 0.10,
338
- "soft_skills": 0.10
339
  }
340
 
341
  resume_lower = resume_summary.lower()
 
 
342
  category_scores = {}
 
 
343
 
344
  for category, keywords in google_keywords.items():
 
 
 
 
345
  # Count matches but cap at a reasonable level
346
- matches = sum(1 for keyword in keywords if keyword in resume_lower)
347
- max_matches = min(len(keywords), 5) # Cap maximum possible matches
348
 
349
- # Calculate category score with diminishing returns
350
- # First few matches matter more than later ones
 
 
351
  if matches == 0:
352
- category_scores[category] = 0.0
353
  else:
354
- # Logarithmic scaling to prevent perfect scores and create more realistic distribution
355
- category_scores[category] = min(0.9, (math.log(matches + 1) / math.log(max_matches + 1)) * 0.9)
356
-
357
- # Calculate weighted score (max should be around 80-85% for an exceptional candidate)
358
- weighted_score = sum(score * category_weights[category] for category, score in category_scores.items())
 
 
 
 
 
 
 
 
 
 
359
 
360
  # Apply final curve to keep scores in a realistic range
361
- # Even exceptional candidates should rarely exceed 90%
362
  match_percentage = min(92, max(35, int(weighted_score * 100)))
363
 
364
- # Now create a focused prompt for generating the assessment
365
- strengths = [category.replace("_", " ") for category, score in category_scores.items() if score > 0.5]
366
- weaknesses = [category.replace("_", " ") for category, score in category_scores.items() if score < 0.4]
 
 
 
 
 
 
 
 
 
 
 
 
 
367
 
368
- # Extract key parts from resume for better context
369
  skills_match = re.search(r'Skills:.*?(?=\n\n|$)', resume_summary, re.DOTALL)
370
  skills_text = skills_match.group(0) if skills_match else ""
371
 
372
  work_match = re.search(r'Previous Work Experience:.*?(?=\n\n|$)', resume_summary, re.DOTALL)
373
  work_text = work_match.group(0) if work_match else ""
374
 
 
 
 
 
 
 
 
 
375
  prompt = f"""
376
- Resume shows: {skills_text} {work_text}
377
- Google needs: {GOOGLE_DESCRIPTION[:100]}
378
- Analyze fit (strengths: {', '.join(strengths)}, areas for improvement: {', '.join(weaknesses)})
379
- This candidate """
 
 
 
 
 
380
 
381
  try:
382
  # Generate the assessment
383
  assessment_results = models['evaluator'](
384
  prompt,
385
- max_length=250,
386
  do_sample=True,
387
- temperature=0.4,
388
- num_return_sequences=2
389
  )
390
 
391
- # Find a good response
392
  assessment = None
393
  for result in assessment_results:
394
  text = result['generated_text'].strip()
395
 
396
- # Clean up obvious artifacts
397
- text = text.replace("This candidate This candidate", "This candidate")
398
- text = re.sub(r'(Resume shows:|Google needs:|Analyze fit|strengths:|areas for improvement:)', '', text)
399
 
400
  # Check if it looks valid
401
- if text.startswith("This candidate") and len(text) > 40:
402
  assessment = text
403
  break
404
 
405
  # If no good response was found, fall back to manual assessment
406
  if not assessment:
407
- assessment, _ = generate_manual_assessment(resume_summary, match_percentage)
408
 
409
  except Exception as e:
410
- # Fallback assessment with the calculated match percentage
411
- assessment, _ = generate_manual_assessment(resume_summary, match_percentage)
412
  print(f"Error in assessment generation: {e}")
413
 
414
- # Final cleanup to remove any remaining prompt artifacts
415
- assessment = re.sub(r'score: \d+%', '', assessment) # Remove any existing score
416
-
417
- # Add the calculated score if not already present
418
- if "%" not in assessment:
419
- assessment += f" Overall, they have approximately a {match_percentage}% match with Google's requirements."
420
 
421
  execution_time = time.time() - start_time
422
 
423
- return assessment, match_percentage, execution_time
424
 
425
- def generate_manual_assessment(resume_summary, match_percentage):
426
  """
427
- Generate a manual assessment based on keywords in the resume
428
- as a fallback when the model fails. Uses the pre-calculated match percentage.
429
  """
430
- # Define key Google skill categories
431
- key_skills = {
432
- "technical": ["python", "java", "javascript", "c++", "go", "programming", "coding", "software development"],
433
- "advanced_tech": ["machine learning", "ai", "artificial intelligence", "cloud", "data science", "big data"],
434
- "problem_solving": ["problem solving", "algorithms", "analytical", "critical thinking", "troubleshooting"],
435
- "innovation": ["innovation", "creative", "creativity", "design thinking"],
436
- "teamwork": ["team", "leadership", "collaboration", "communication", "agile"]
437
- }
438
-
439
- summary_lower = resume_summary.lower()
440
-
441
- # Count matches in each category
442
- strengths = []
443
- weaknesses = []
444
-
445
- for category, keywords in key_skills.items():
446
- matches = sum(1 for keyword in keywords if keyword in summary_lower)
447
-
448
- if matches >= 2:
449
- if category == "technical":
450
- strengths.append("strong technical skills")
451
- elif category == "advanced_tech":
452
- strengths.append("experience with advanced technologies")
453
- elif category == "problem_solving":
454
- strengths.append("problem-solving abilities")
455
- elif category == "innovation":
456
- strengths.append("innovative thinking")
457
- elif category == "teamwork":
458
- strengths.append("teamwork and collaboration skills")
459
- elif matches == 0:
460
- if category == "technical":
461
- weaknesses.append("technical programming skills")
462
- elif category == "advanced_tech":
463
- weaknesses.append("knowledge of advanced technologies")
464
- elif category == "problem_solving":
465
- weaknesses.append("demonstrated problem-solving capabilities")
466
- elif category == "innovation":
467
- weaknesses.append("innovation mindset")
468
- elif category == "teamwork":
469
- weaknesses.append("team collaboration experience")
470
-
471
- # Construct assessment
472
- assessment = f"This candidate demonstrates {', '.join(strengths[:2])} " if strengths else "This candidate "
473
-
474
- if len(strengths) > 2:
475
- assessment += f"as well as {strengths[2]}. "
476
  else:
477
- assessment += ". "
 
 
478
 
 
479
  if weaknesses:
480
- assessment += f"However, they could benefit from developing stronger {' and '.join(weaknesses[:2])}. "
481
 
482
- assessment += f"Based on the resume analysis, they appear to be a {match_percentage}% match for Google's requirements."
 
 
 
 
 
 
483
 
484
- return assessment, match_percentage
485
 
486
  #####################################
487
  # Main Streamlit Interface
@@ -528,30 +542,69 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
528
  st.info(f"Summary generated in {summarization_time:.2f} seconds")
529
 
530
  # Step 3: Generate Google fit assessment
531
- status_text.text("Step 3/3: Evaluating Google fit...")
532
- assessment, match_percentage, assessment_time = analyze_google_fit(summary)
533
- progress_bar.progress(100)
534
-
535
- # Clear status messages
536
- status_text.empty()
537
-
538
- # Display Google fit results
539
- st.subheader("Google Fit Assessment")
540
 
541
- # Display match percentage with appropriate color and emoji - with more realistic thresholds
542
- if match_percentage >= 85:
543
- st.success(f"**Overall Google Match Score:** {match_percentage}% 🌟")
544
- elif match_percentage >= 70:
545
- st.success(f"**Overall Google Match Score:** {match_percentage}% βœ…")
546
- elif match_percentage >= 50:
547
- st.warning(f"**Overall Google Match Score:** {match_percentage}% ⚠️")
548
- else:
549
- st.error(f"**Overall Google Match Score:** {match_percentage}% πŸ”")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
 
551
  # Display assessment
552
  st.markdown("### Expert Assessment")
553
  st.markdown(assessment)
554
-
555
  st.info(f"Assessment completed in {assessment_time:.2f} seconds")
556
 
557
  # Add potential next steps based on the match percentage
 
315
  #####################################
316
  def analyze_google_fit(resume_summary):
317
  """
318
+ Analyze how well the candidate fits Google's requirements with detailed category breakdowns.
 
319
  """
320
  start_time = time.time()
321
 
322
+ # Define Google's key skill categories with more detailed keywords
323
  google_keywords = {
324
+ "technical_skills": ["python", "java", "c++", "javascript", "go", "sql", "algorithms", "data structures",
325
+ "coding", "software development", "git", "programming", "backend", "frontend", "full-stack"],
326
+ "advanced_tech": ["machine learning", "ai", "artificial intelligence", "cloud", "data science", "big data",
327
+ "tensorflow", "deep learning", "distributed systems", "kubernetes", "microservices"],
328
+ "problem_solving": ["problem solving", "analytical", "critical thinking", "troubleshooting", "debugging",
329
+ "optimization", "scalability", "system design", "complexity", "efficiency"],
330
+ "innovation": ["innovation", "creative", "creativity", "design thinking", "research", "novel solutions",
331
+ "patents", "publications", "unique approaches", "cutting-edge"],
332
+ "soft_skills": ["team", "leadership", "collaboration", "communication", "agile", "project management",
333
+ "mentoring", "cross-functional", "presentation", "stakeholder management"]
334
  }
335
 
336
+ # Category weights with descriptive labels
337
  category_weights = {
338
+ "technical_skills": {"weight": 0.35, "label": "Technical Programming Skills"},
339
+ "advanced_tech": {"weight": 0.25, "label": "Advanced Technology Knowledge"},
340
+ "problem_solving": {"weight": 0.20, "label": "Problem Solving Abilities"},
341
+ "innovation": {"weight": 0.10, "label": "Innovation Mindset"},
342
+ "soft_skills": {"weight": 0.10, "label": "Collaboration & Leadership"}
343
  }
344
 
345
  resume_lower = resume_summary.lower()
346
+
347
+ # Calculate category scores and store detailed information
348
  category_scores = {}
349
+ category_details = {}
350
+ found_skills = {}
351
 
352
  for category, keywords in google_keywords.items():
353
+ # Find the specific matching keywords for feedback
354
+ category_matches = [keyword for keyword in keywords if keyword in resume_lower]
355
+ found_skills[category] = category_matches
356
+
357
  # Count matches but cap at a reasonable level
358
+ matches = len(category_matches)
359
+ total_keywords = len(keywords)
360
 
361
+ # Calculate raw percentage for this category
362
+ raw_percentage = int((matches / total_keywords) * 100)
363
+
364
+ # Apply logarithmic scaling for more realistic scores
365
  if matches == 0:
366
+ adjusted_score = 0.0
367
  else:
368
+ # Logarithmic scaling to prevent perfect scores
369
+ adjusted_score = min(0.95, (math.log(matches + 1) / math.log(min(total_keywords, 8) + 1)))
370
+
371
+ # Store both raw and adjusted scores for feedback
372
+ category_scores[category] = adjusted_score
373
+ category_details[category] = {
374
+ "raw_percentage": raw_percentage,
375
+ "adjusted_score": int(adjusted_score * 100),
376
+ "matching_keywords": category_matches,
377
+ "total_keywords": total_keywords,
378
+ "matches": matches
379
+ }
380
+
381
+ # Calculate weighted score
382
+ weighted_score = sum(score * category_weights[category]["weight"] for category, score in category_scores.items())
383
 
384
  # Apply final curve to keep scores in a realistic range
 
385
  match_percentage = min(92, max(35, int(weighted_score * 100)))
386
 
387
+ # Find top strengths and areas for improvement
388
+ strengths = [(category_weights[cat]["label"], details["adjusted_score"])
389
+ for cat, details in category_details.items()
390
+ if details["adjusted_score"] >= 60]
391
+
392
+ weaknesses = [(category_weights[cat]["label"], details["adjusted_score"])
393
+ for cat, details in category_details.items()
394
+ if details["adjusted_score"] < 50]
395
+
396
+ # Sort strengths and weaknesses by score
397
+ strengths.sort(key=lambda x: x[1], reverse=True)
398
+ weaknesses.sort(key=lambda x: x[1])
399
+
400
+ # Create a more detailed prompt for assessment
401
+ strength_text = ", ".join([f"{s[0]}" for s in strengths[:3]]) if strengths else "limited applicable skills"
402
+ weakness_text = ", ".join([f"{w[0]}" for w in weaknesses[:3]]) if weaknesses else "no obvious weaknesses"
403
 
404
+ # Extract key resume elements
405
  skills_match = re.search(r'Skills:.*?(?=\n\n|$)', resume_summary, re.DOTALL)
406
  skills_text = skills_match.group(0) if skills_match else ""
407
 
408
  work_match = re.search(r'Previous Work Experience:.*?(?=\n\n|$)', resume_summary, re.DOTALL)
409
  work_text = work_match.group(0) if work_match else ""
410
 
411
+ # List specific matching skills for more detailed assessment
412
+ specific_skills = []
413
+ for category, matches in found_skills.items():
414
+ if matches:
415
+ specific_skills.extend(matches[:3]) # Take up to 3 skills from each category
416
+
417
+ specific_skills_text = ", ".join(specific_skills[:8]) if specific_skills else "limited identifiable skills"
418
+
419
  prompt = f"""
420
+ Write a detailed assessment of a job candidate for Google.
421
+ Resume highlights: Skills in {specific_skills_text}. {work_text[:200]}
422
+ Strengths: {strength_text}
423
+ Areas for improvement: {weakness_text}
424
+ Match percentage: {match_percentage}%
425
+
426
+ Write a detailed 3-5 sentence assessment beginning with "This candidate". Be specific about skills, experiences,
427
+ strengths, weaknesses, and how they align with Google. Mention specific technical skills where relevant.
428
+ """
429
 
430
  try:
431
  # Generate the assessment
432
  assessment_results = models['evaluator'](
433
  prompt,
434
+ max_length=350, # Longer assessment
435
  do_sample=True,
436
+ temperature=0.7, # Higher temperature for more detailed output
437
+ num_return_sequences=3
438
  )
439
 
440
+ # Find the best response
441
  assessment = None
442
  for result in assessment_results:
443
  text = result['generated_text'].strip()
444
 
445
+ # Remove prompt artifacts
446
+ text = re.sub(r'Write a detailed assessment.*?Match percentage:.*?%', '', text, flags=re.DOTALL)
447
+ text = re.sub(r'Write a detailed 3-5 sentence assessment.*?', '', text, flags=re.DOTALL)
448
 
449
  # Check if it looks valid
450
+ if "this candidate" in text.lower() and len(text) > 100:
451
  assessment = text
452
  break
453
 
454
  # If no good response was found, fall back to manual assessment
455
  if not assessment:
456
+ assessment = generate_detailed_manual_assessment(resume_summary, strengths, weaknesses, specific_skills, match_percentage)
457
 
458
  except Exception as e:
459
+ # Fallback to detailed manual assessment
460
+ assessment = generate_detailed_manual_assessment(resume_summary, strengths, weaknesses, specific_skills, match_percentage)
461
  print(f"Error in assessment generation: {e}")
462
 
463
+ # Final cleanup
464
+ assessment = assessment.strip()
465
+ if not assessment.startswith("This candidate"):
466
+ assessment = f"This candidate {assessment}"
 
 
467
 
468
  execution_time = time.time() - start_time
469
 
470
+ return assessment, match_percentage, category_details, execution_time
471
 
472
+ def generate_detailed_manual_assessment(resume_summary, strengths, weaknesses, specific_skills, match_percentage):
473
  """
474
+ Generate a detailed manual assessment when the model fails.
 
475
  """
476
+ # Start with strengths
477
+ if strengths:
478
+ assessment = f"This candidate demonstrates proficiency in {', '.join([s[0] for s in strengths[:2]])}. "
479
+ if specific_skills:
480
+ assessment += f"Their experience with {', '.join(specific_skills[:4])} aligns with Google's technical requirements. "
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  else:
482
+ assessment = "This candidate has limited alignment with Google's key requirements based on the resume provided. "
483
+ if specific_skills:
484
+ assessment += f"While they have some experience with {', '.join(specific_skills[:3])}, these skills alone may not be sufficient. "
485
 
486
+ # Add weaknesses
487
  if weaknesses:
488
+ assessment += f"To improve their candidacy for Google, they should focus on developing stronger {' and '.join([w[0].lower() for w in weaknesses[:2]])}. "
489
 
490
+ # Add conclusion with match percentage
491
+ if match_percentage >= 70:
492
+ assessment += f"Overall, they show good potential for certain roles at Google with a {match_percentage}% match to requirements."
493
+ elif match_percentage >= 50:
494
+ assessment += f"With targeted skill development, they may become a stronger candidate for Google, currently showing a {match_percentage}% match."
495
+ else:
496
+ assessment += f"Significant skill development would be needed before they could be considered a strong Google candidate, with a current match of {match_percentage}%."
497
 
498
+ return assessment
499
 
500
  #####################################
501
  # Main Streamlit Interface
 
542
  st.info(f"Summary generated in {summarization_time:.2f} seconds")
543
 
544
  # Step 3: Generate Google fit assessment
545
+ status_text.text("Step 3/3: Evaluating Google fit...")
546
+ assessment, match_percentage, category_details, assessment_time = analyze_google_fit(summary)
547
+ progress_bar.progress(100)
 
 
 
 
 
 
548
 
549
+ # Clear status messages
550
+ status_text.empty()
551
+
552
+ # Display Google fit results
553
+ st.subheader("Google Fit Assessment")
554
+
555
+ # Display match percentage with appropriate color and emoji - with more realistic thresholds
556
+ if match_percentage >= 85:
557
+ st.success(f"**Overall Google Match Score:** {match_percentage}% 🌟")
558
+ elif match_percentage >= 70:
559
+ st.success(f"**Overall Google Match Score:** {match_percentage}% βœ…")
560
+ elif match_percentage >= 50:
561
+ st.warning(f"**Overall Google Match Score:** {match_percentage}% ⚠️")
562
+ else:
563
+ st.error(f"**Overall Google Match Score:** {match_percentage}% πŸ”")
564
+
565
+ # NEW ADDITION: Add detailed score breakdown
566
+ st.markdown("### Score Breakdown")
567
+
568
+ # Create a neat table with category scores
569
+ breakdown_data = []
570
+ for category, details in category_details.items():
571
+ label = {"technical_skills": "Technical Programming Skills",
572
+ "advanced_tech": "Advanced Technology Knowledge",
573
+ "problem_solving": "Problem Solving Abilities",
574
+ "innovation": "Innovation Mindset",
575
+ "soft_skills": "Collaboration & Leadership"}[category]
576
+
577
+ # Create a visual indicator for the score
578
+ score = details["adjusted_score"]
579
+
580
+ # Add formatted breakdown row
581
+ breakdown_data.append({
582
+ "Category": label,
583
+ "Score": f"{score}%",
584
+ "Matching Skills": ", ".join(details["matching_keywords"][:3]) if details["matching_keywords"] else "None detected"
585
+ })
586
+
587
+ # Convert to DataFrame and display
588
+ import pandas as pd
589
+ breakdown_df = pd.DataFrame(breakdown_data)
590
+ st.table(breakdown_df)
591
+
592
+ # Show a note about how scores are calculated
593
+ with st.expander("How are these scores calculated?"):
594
+ st.markdown("""
595
+ - **Technical Programming Skills** (35% of total): Evaluates coding languages, software development tools, and core programming concepts
596
+ - **Advanced Technology Knowledge** (25% of total): Assesses experience with cutting-edge technologies like AI, ML, cloud systems
597
+ - **Problem Solving Abilities** (20% of total): Measures analytical thinking, algorithm design, and optimization skills
598
+ - **Innovation Mindset** (10% of total): Looks for creativity, research orientation, and novel approaches
599
+ - **Collaboration & Leadership** (10% of total): Evaluates team skills, communication, and project management
600
+
601
+ Scores are calculated based on keyword matches in your resume, with diminishing returns applied (first few skills matter more than later ones).
602
+ """)
603
 
604
  # Display assessment
605
  st.markdown("### Expert Assessment")
606
  st.markdown(assessment)
607
+
608
  st.info(f"Assessment completed in {assessment_time:.2f} seconds")
609
 
610
  # Add potential next steps based on the match percentage