Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,7 @@ import docx2txt
|
|
6 |
import tempfile
|
7 |
import time
|
8 |
import re
|
|
|
9 |
import concurrent.futures
|
10 |
from functools import lru_cache
|
11 |
from transformers import pipeline
|
@@ -315,71 +316,173 @@ def summarize_resume_text(resume_text):
|
|
315 |
def analyze_google_fit(resume_summary):
|
316 |
"""
|
317 |
Analyze how well the candidate fits Google's requirements.
|
318 |
-
This uses the
|
319 |
"""
|
320 |
start_time = time.time()
|
321 |
|
322 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
prompt = f"""
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
Evaluate this candidate for Google. Consider technical skills, experience, and culture fit.
|
330 |
-
Write a paragraph starting with "This candidate" describing their fit for Google.
|
331 |
-
Include strengths, weaknesses, and a match percentage (0-100%).
|
332 |
-
"""
|
333 |
|
334 |
try:
|
335 |
# Generate the assessment
|
336 |
-
|
337 |
prompt,
|
338 |
-
max_length=
|
339 |
do_sample=True,
|
340 |
-
temperature=0.
|
|
|
341 |
)
|
342 |
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
if not assessment.strip().startswith("This candidate"):
|
357 |
-
assessment = "This candidate " + assessment.strip()
|
358 |
-
|
359 |
-
# Extract match percentage if present
|
360 |
-
match_percentage = None
|
361 |
-
percentage_pattern = r'(\d{1,3})%'
|
362 |
-
match = re.search(percentage_pattern, assessment)
|
363 |
-
if match:
|
364 |
-
match_percentage = int(match.group(1))
|
365 |
-
# Ensure it's in valid range
|
366 |
-
match_percentage = min(100, max(0, match_percentage))
|
367 |
-
|
368 |
-
# If no percentage was found in the text, default to 50%
|
369 |
-
if match_percentage is None:
|
370 |
-
match_percentage = 50
|
371 |
-
# Add a percentage to the end of assessment
|
372 |
-
assessment += f" Overall match: {match_percentage}%."
|
373 |
|
|
|
|
|
|
|
|
|
374 |
except Exception as e:
|
375 |
-
# Fallback
|
376 |
-
assessment
|
377 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
378 |
|
379 |
execution_time = time.time() - start_time
|
380 |
|
381 |
return assessment, match_percentage, execution_time
|
382 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
383 |
#####################################
|
384 |
# Main Streamlit Interface
|
385 |
#####################################
|
@@ -434,8 +537,8 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
|
|
434 |
|
435 |
# Display Google fit results
|
436 |
st.subheader("Google Fit Assessment")
|
437 |
-
|
438 |
-
# Display match percentage with appropriate color and emoji
|
439 |
if match_percentage >= 85:
|
440 |
st.success(f"**Overall Google Match Score:** {match_percentage}% 🌟")
|
441 |
elif match_percentage >= 70:
|
@@ -444,10 +547,10 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
|
|
444 |
st.warning(f"**Overall Google Match Score:** {match_percentage}% ⚠️")
|
445 |
else:
|
446 |
st.error(f"**Overall Google Match Score:** {match_percentage}% 🔍")
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
|
452 |
st.info(f"Assessment completed in {assessment_time:.2f} seconds")
|
453 |
|
|
|
6 |
import tempfile
|
7 |
import time
|
8 |
import re
|
9 |
+
import math
|
10 |
import concurrent.futures
|
11 |
from functools import lru_cache
|
12 |
from transformers import pipeline
|
|
|
316 |
def analyze_google_fit(resume_summary):
|
317 |
"""
|
318 |
Analyze how well the candidate fits Google's requirements.
|
319 |
+
This uses the model to generate a natural language assessment with a realistic match score.
|
320 |
"""
|
321 |
start_time = time.time()
|
322 |
|
323 |
+
# First, calculate a realistic score based on keyword matching and balanced criteria
|
324 |
+
google_keywords = {
|
325 |
+
"technical_skills": ["python", "java", "c++", "javascript", "go", "sql", "algorithms", "data structures", "coding"],
|
326 |
+
"advanced_tech": ["machine learning", "ai", "artificial intelligence", "cloud", "data science", "big data", "tensorflow", "deep learning"],
|
327 |
+
"problem_solving": ["problem solving", "analytical", "critical thinking", "troubleshooting", "debugging", "optimization"],
|
328 |
+
"innovation": ["innovation", "creative", "creativity", "design thinking", "research", "novel"],
|
329 |
+
"soft_skills": ["team", "leadership", "collaboration", "communication", "agile", "project management"]
|
330 |
+
}
|
331 |
+
|
332 |
+
# Calculate realistic score with category weights
|
333 |
+
category_weights = {
|
334 |
+
"technical_skills": 0.35,
|
335 |
+
"advanced_tech": 0.25,
|
336 |
+
"problem_solving": 0.20,
|
337 |
+
"innovation": 0.10,
|
338 |
+
"soft_skills": 0.10
|
339 |
+
}
|
340 |
+
|
341 |
+
resume_lower = resume_summary.lower()
|
342 |
+
category_scores = {}
|
343 |
+
|
344 |
+
for category, keywords in google_keywords.items():
|
345 |
+
# Count matches but cap at a reasonable level
|
346 |
+
matches = sum(1 for keyword in keywords if keyword in resume_lower)
|
347 |
+
max_matches = min(len(keywords), 5) # Cap maximum possible matches
|
348 |
+
|
349 |
+
# Calculate category score with diminishing returns
|
350 |
+
# First few matches matter more than later ones
|
351 |
+
if matches == 0:
|
352 |
+
category_scores[category] = 0.0
|
353 |
+
else:
|
354 |
+
# Logarithmic scaling to prevent perfect scores and create more realistic distribution
|
355 |
+
category_scores[category] = min(0.9, (math.log(matches + 1) / math.log(max_matches + 1)) * 0.9)
|
356 |
+
|
357 |
+
# Calculate weighted score (max should be around 80-85% for an exceptional candidate)
|
358 |
+
weighted_score = sum(score * category_weights[category] for category, score in category_scores.items())
|
359 |
+
|
360 |
+
# Apply final curve to keep scores in a realistic range
|
361 |
+
# Even exceptional candidates should rarely exceed 90%
|
362 |
+
match_percentage = min(92, max(35, int(weighted_score * 100)))
|
363 |
+
|
364 |
+
# Now create a focused prompt for generating the assessment
|
365 |
+
strengths = [category.replace("_", " ") for category, score in category_scores.items() if score > 0.5]
|
366 |
+
weaknesses = [category.replace("_", " ") for category, score in category_scores.items() if score < 0.4]
|
367 |
+
|
368 |
+
# Extract key parts from resume for better context
|
369 |
+
skills_match = re.search(r'Skills:.*?(?=\n\n|$)', resume_summary, re.DOTALL)
|
370 |
+
skills_text = skills_match.group(0) if skills_match else ""
|
371 |
+
|
372 |
+
work_match = re.search(r'Previous Work Experience:.*?(?=\n\n|$)', resume_summary, re.DOTALL)
|
373 |
+
work_text = work_match.group(0) if work_match else ""
|
374 |
+
|
375 |
prompt = f"""
|
376 |
+
Resume shows: {skills_text} {work_text}
|
377 |
+
Google needs: {GOOGLE_DESCRIPTION[:100]}
|
378 |
+
Analyze fit (strengths: {', '.join(strengths)}, areas for improvement: {', '.join(weaknesses)})
|
379 |
+
This candidate """
|
|
|
|
|
|
|
|
|
|
|
380 |
|
381 |
try:
|
382 |
# Generate the assessment
|
383 |
+
assessment_results = models['evaluator'](
|
384 |
prompt,
|
385 |
+
max_length=250,
|
386 |
do_sample=True,
|
387 |
+
temperature=0.4,
|
388 |
+
num_return_sequences=2
|
389 |
)
|
390 |
|
391 |
+
# Find a good response
|
392 |
+
assessment = None
|
393 |
+
for result in assessment_results:
|
394 |
+
text = result['generated_text'].strip()
|
395 |
+
|
396 |
+
# Clean up obvious artifacts
|
397 |
+
text = text.replace("This candidate This candidate", "This candidate")
|
398 |
+
text = re.sub(r'(Resume shows:|Google needs:|Analyze fit|strengths:|areas for improvement:)', '', text)
|
399 |
+
|
400 |
+
# Check if it looks valid
|
401 |
+
if text.startswith("This candidate") and len(text) > 40:
|
402 |
+
assessment = text
|
403 |
+
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
|
405 |
+
# If no good response was found, fall back to manual assessment
|
406 |
+
if not assessment:
|
407 |
+
assessment, _ = generate_manual_assessment(resume_summary, match_percentage)
|
408 |
+
|
409 |
except Exception as e:
|
410 |
+
# Fallback assessment with the calculated match percentage
|
411 |
+
assessment, _ = generate_manual_assessment(resume_summary, match_percentage)
|
412 |
+
print(f"Error in assessment generation: {e}")
|
413 |
+
|
414 |
+
# Final cleanup to remove any remaining prompt artifacts
|
415 |
+
assessment = re.sub(r'score: \d+%', '', assessment) # Remove any existing score
|
416 |
+
|
417 |
+
# Add the calculated score if not already present
|
418 |
+
if "%" not in assessment:
|
419 |
+
assessment += f" Overall, they have approximately a {match_percentage}% match with Google's requirements."
|
420 |
|
421 |
execution_time = time.time() - start_time
|
422 |
|
423 |
return assessment, match_percentage, execution_time
|
424 |
|
425 |
+
def generate_manual_assessment(resume_summary, match_percentage):
|
426 |
+
"""
|
427 |
+
Generate a manual assessment based on keywords in the resume
|
428 |
+
as a fallback when the model fails. Uses the pre-calculated match percentage.
|
429 |
+
"""
|
430 |
+
# Define key Google skill categories
|
431 |
+
key_skills = {
|
432 |
+
"technical": ["python", "java", "javascript", "c++", "go", "programming", "coding", "software development"],
|
433 |
+
"advanced_tech": ["machine learning", "ai", "artificial intelligence", "cloud", "data science", "big data"],
|
434 |
+
"problem_solving": ["problem solving", "algorithms", "analytical", "critical thinking", "troubleshooting"],
|
435 |
+
"innovation": ["innovation", "creative", "creativity", "design thinking"],
|
436 |
+
"teamwork": ["team", "leadership", "collaboration", "communication", "agile"]
|
437 |
+
}
|
438 |
+
|
439 |
+
summary_lower = resume_summary.lower()
|
440 |
+
|
441 |
+
# Count matches in each category
|
442 |
+
strengths = []
|
443 |
+
weaknesses = []
|
444 |
+
|
445 |
+
for category, keywords in key_skills.items():
|
446 |
+
matches = sum(1 for keyword in keywords if keyword in summary_lower)
|
447 |
+
|
448 |
+
if matches >= 2:
|
449 |
+
if category == "technical":
|
450 |
+
strengths.append("strong technical skills")
|
451 |
+
elif category == "advanced_tech":
|
452 |
+
strengths.append("experience with advanced technologies")
|
453 |
+
elif category == "problem_solving":
|
454 |
+
strengths.append("problem-solving abilities")
|
455 |
+
elif category == "innovation":
|
456 |
+
strengths.append("innovative thinking")
|
457 |
+
elif category == "teamwork":
|
458 |
+
strengths.append("teamwork and collaboration skills")
|
459 |
+
elif matches == 0:
|
460 |
+
if category == "technical":
|
461 |
+
weaknesses.append("technical programming skills")
|
462 |
+
elif category == "advanced_tech":
|
463 |
+
weaknesses.append("knowledge of advanced technologies")
|
464 |
+
elif category == "problem_solving":
|
465 |
+
weaknesses.append("demonstrated problem-solving capabilities")
|
466 |
+
elif category == "innovation":
|
467 |
+
weaknesses.append("innovation mindset")
|
468 |
+
elif category == "teamwork":
|
469 |
+
weaknesses.append("team collaboration experience")
|
470 |
+
|
471 |
+
# Construct assessment
|
472 |
+
assessment = f"This candidate demonstrates {', '.join(strengths[:2])} " if strengths else "This candidate "
|
473 |
+
|
474 |
+
if len(strengths) > 2:
|
475 |
+
assessment += f"as well as {strengths[2]}. "
|
476 |
+
else:
|
477 |
+
assessment += ". "
|
478 |
+
|
479 |
+
if weaknesses:
|
480 |
+
assessment += f"However, they could benefit from developing stronger {' and '.join(weaknesses[:2])}. "
|
481 |
+
|
482 |
+
assessment += f"Based on the resume analysis, they appear to be a {match_percentage}% match for Google's requirements."
|
483 |
+
|
484 |
+
return assessment, match_percentage
|
485 |
+
|
486 |
#####################################
|
487 |
# Main Streamlit Interface
|
488 |
#####################################
|
|
|
537 |
|
538 |
# Display Google fit results
|
539 |
st.subheader("Google Fit Assessment")
|
540 |
+
|
541 |
+
# Display match percentage with appropriate color and emoji - with more realistic thresholds
|
542 |
if match_percentage >= 85:
|
543 |
st.success(f"**Overall Google Match Score:** {match_percentage}% 🌟")
|
544 |
elif match_percentage >= 70:
|
|
|
547 |
st.warning(f"**Overall Google Match Score:** {match_percentage}% ⚠️")
|
548 |
else:
|
549 |
st.error(f"**Overall Google Match Score:** {match_percentage}% 🔍")
|
550 |
+
|
551 |
+
# Display assessment
|
552 |
+
st.markdown("### Expert Assessment")
|
553 |
+
st.markdown(assessment)
|
554 |
|
555 |
st.info(f"Assessment completed in {assessment_time:.2f} seconds")
|
556 |
|