Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -42,7 +42,16 @@ def load_models():
|
|
42 |
max_length=100,
|
43 |
truncation=True
|
44 |
)
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
return models
|
47 |
|
48 |
# Preload models immediately when app starts
|
@@ -152,7 +161,7 @@ def extract_industry(text, base_summary):
|
|
152 |
"information systems": ["information systems", "ERP", "systems management"]
|
153 |
}
|
154 |
|
155 |
-
#
|
156 |
combined_text = base_summary.lower()
|
157 |
|
158 |
counts = {}
|
@@ -210,6 +219,7 @@ def extract_skills_and_work(text):
|
|
210 |
for skill in skills:
|
211 |
if skill.lower() in text_lower:
|
212 |
category_skills.append(skill)
|
|
|
213 |
if category_skills:
|
214 |
found_skills.append(f"{category}: {', '.join(category_skills)}")
|
215 |
|
@@ -219,6 +229,7 @@ def extract_skills_and_work(text):
|
|
219 |
|
220 |
for idx, line in enumerate(lines):
|
221 |
line_lower = line.lower().strip()
|
|
|
222 |
# Start of work section
|
223 |
if not in_work_section:
|
224 |
if any(header in line_lower for header in work_headers):
|
@@ -228,6 +239,7 @@ def extract_skills_and_work(text):
|
|
228 |
elif in_work_section:
|
229 |
if any(header in line_lower for header in next_section_headers):
|
230 |
break
|
|
|
231 |
if line.strip():
|
232 |
work_section.append(line.strip())
|
233 |
|
@@ -235,16 +247,21 @@ def extract_skills_and_work(text):
|
|
235 |
if not work_section:
|
236 |
work_experience = "Work experience not clearly identified"
|
237 |
else:
|
|
|
238 |
work_lines = []
|
239 |
company_count = 0
|
|
|
|
|
240 |
for line in work_section:
|
|
|
241 |
if re.search(r'(19|20)\d{2}', line):
|
242 |
company_count += 1
|
243 |
if company_count <= 3: # Limit to 3 most recent positions
|
|
|
244 |
work_lines.append(f"**{line}**")
|
245 |
else:
|
246 |
break
|
247 |
-
elif company_count <= 3 and len(work_lines) < 10:
|
248 |
work_lines.append(line)
|
249 |
|
250 |
work_experience = "\n• " + "\n• ".join(work_lines[:7]) if work_lines else "Work experience not clearly structured"
|
@@ -262,16 +279,17 @@ def summarize_resume_text(resume_text):
|
|
262 |
"""
|
263 |
start_time = time.time()
|
264 |
|
265 |
-
# First, generate a quick summary using
|
266 |
max_input_length = 1024 # Model limit
|
267 |
-
|
268 |
-
|
269 |
-
|
|
|
270 |
|
271 |
# Extract information in parallel where possible
|
272 |
-
|
273 |
-
|
274 |
-
name_future = executor.submit(extract_name, resume_text[:500]) # Only use
|
275 |
age_future = executor.submit(extract_age, resume_text)
|
276 |
industry_future = executor.submit(extract_industry, resume_text, base_summary)
|
277 |
skills_work_future = executor.submit(extract_skills_and_work, resume_text)
|
@@ -290,6 +308,7 @@ def summarize_resume_text(resume_text):
|
|
290 |
formatted_summary += f"Skills: {skills}"
|
291 |
|
292 |
execution_time = time.time() - start_time
|
|
|
293 |
return formatted_summary, execution_time
|
294 |
|
295 |
#####################################
|
@@ -299,6 +318,7 @@ def calculate_google_match_score(candidate_summary):
|
|
299 |
"""
|
300 |
Calculate a detailed match score breakdown based on skills and experience in the candidate summary
|
301 |
compared with what Google requires.
|
|
|
302 |
Returns:
|
303 |
- overall_score: A normalized score between 0 and 1
|
304 |
- category_scores: A dictionary with scores for each category
|
@@ -340,19 +360,26 @@ def calculate_google_match_score(candidate_summary):
|
|
340 |
category_scores = {}
|
341 |
for category, details in google_categories.items():
|
342 |
keywords = details["keywords"]
|
343 |
-
max_possible = len(keywords)
|
|
|
|
|
344 |
matches = sum(1 for keyword in keywords if keyword in summary_lower)
|
345 |
|
|
|
346 |
if max_possible > 0:
|
347 |
raw_score = matches / max_possible
|
|
|
348 |
category_scores[category] = min(1.0, raw_score * 1.5)
|
349 |
else:
|
350 |
category_scores[category] = 0
|
351 |
|
|
|
352 |
overall_score = sum(
|
353 |
score * google_categories[category]["weight"]
|
354 |
for category, score in category_scores.items()
|
355 |
)
|
|
|
|
|
356 |
overall_score = min(1.0, max(0.0, overall_score))
|
357 |
|
358 |
# Create score breakdown explanation
|
@@ -370,15 +397,16 @@ def calculate_google_match_score(candidate_summary):
|
|
370 |
#####################################
|
371 |
def generate_template_feedback(category_scores):
|
372 |
"""
|
373 |
-
Generate comprehensive template-based feedback without using ML model for speed
|
374 |
"""
|
375 |
start_time = time.time()
|
376 |
-
import random
|
377 |
|
|
|
378 |
sorted_categories = sorted(category_scores.items(), key=lambda x: x[1], reverse=True)
|
379 |
top_categories = sorted_categories[:2]
|
380 |
-
bottom_categories =
|
381 |
|
|
|
382 |
top_feedback_templates = {
|
383 |
"Technical Skills": [
|
384 |
"demonstrates strong technical skills with proficiency in programming languages and technical tools that Google values.",
|
@@ -407,6 +435,7 @@ def generate_template_feedback(category_scores):
|
|
407 |
]
|
408 |
}
|
409 |
|
|
|
410 |
bottom_feedback_templates = {
|
411 |
"Technical Skills": [
|
412 |
"should strengthen their technical skills, particularly in programming languages commonly used at Google such as Python, Java, or C++.",
|
@@ -435,21 +464,32 @@ def generate_template_feedback(category_scores):
|
|
435 |
]
|
436 |
}
|
437 |
|
|
|
|
|
|
|
|
|
438 |
top_category = top_categories[0][0]
|
|
|
439 |
top_feedback = random.choice(top_feedback_templates.get(top_category, ["shows notable skills"]))
|
440 |
|
|
|
441 |
bottom_category = bottom_categories[0][0]
|
|
|
442 |
bottom_feedback = random.choice(bottom_feedback_templates.get(bottom_category, ["could improve their skills"]))
|
443 |
|
|
|
444 |
feedback = f"This candidate {top_feedback} "
|
445 |
|
|
|
446 |
if top_categories[1][1] >= 0.6:
|
447 |
second_top = top_categories[1][0]
|
448 |
second_top_feedback = random.choice(top_feedback_templates.get(second_top, ["has good abilities"]))
|
449 |
feedback += f"The candidate also {second_top_feedback} "
|
450 |
|
|
|
451 |
feedback += f"However, the candidate {bottom_feedback} "
|
452 |
|
|
|
453 |
overall_score = sum(score * weight for (category, score), weight in
|
454 |
zip(category_scores.items(), [0.35, 0.25, 0.20, 0.10, 0.10]))
|
455 |
|
@@ -461,6 +501,54 @@ def generate_template_feedback(category_scores):
|
|
461 |
feedback += "The candidate would need significant development to meet Google's standards."
|
462 |
|
463 |
execution_time = time.time() - start_time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
464 |
return feedback, execution_time
|
465 |
|
466 |
#####################################
|
@@ -483,8 +571,13 @@ with st.expander("Google's Requirements", expanded=False):
|
|
483 |
# File uploader
|
484 |
uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
|
485 |
|
|
|
|
|
|
|
|
|
486 |
# Process button with optimized flow
|
487 |
if uploaded_file is not None and st.button("Analyze My Google Fit"):
|
|
|
488 |
progress_bar = st.progress(0)
|
489 |
status_text = st.empty()
|
490 |
|
@@ -501,6 +594,7 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
|
|
501 |
summary, summarization_time = summarize_resume_text(resume_text)
|
502 |
progress_bar.progress(50)
|
503 |
|
|
|
504 |
st.subheader("Your Resume Summary")
|
505 |
st.markdown(summary)
|
506 |
st.info(f"Summary generated in {summarization_time:.2f} seconds")
|
@@ -508,12 +602,24 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
|
|
508 |
# Step 3: Calculate scores and generate feedback
|
509 |
status_text.text("Step 3/3: Calculating Google fit scores...")
|
510 |
overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
|
511 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
512 |
|
513 |
progress_bar.progress(100)
|
|
|
|
|
514 |
status_text.empty()
|
515 |
|
|
|
516 |
st.subheader("Google Fit Assessment")
|
|
|
|
|
517 |
score_percent = int(overall_score * 100)
|
518 |
if overall_score >= 0.85:
|
519 |
st.success(f"**Overall Google Match Score:** {score_percent}% 🌟")
|
@@ -524,15 +630,20 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
|
|
524 |
else:
|
525 |
st.error(f"**Overall Google Match Score:** {score_percent}% 🔍")
|
526 |
|
|
|
527 |
st.markdown("### Score Calculation")
|
528 |
st.markdown(score_breakdown)
|
529 |
|
|
|
530 |
st.markdown("### Expert Assessment")
|
531 |
st.markdown(feedback)
|
532 |
|
533 |
st.info(f"Assessment completed in {feedback_time:.2f} seconds")
|
534 |
|
|
|
535 |
st.subheader("Recommended Next Steps")
|
|
|
|
|
536 |
weakest_categories = sorted(category_scores.items(), key=lambda x: x[1])[:2]
|
537 |
|
538 |
if overall_score >= 0.80:
|
|
|
42 |
max_length=100,
|
43 |
truncation=True
|
44 |
)
|
45 |
+
|
46 |
+
# Load T5-small model for evaluation with optimized settings
|
47 |
+
models['evaluator'] = pipeline(
|
48 |
+
"text-generation",
|
49 |
+
model="facebook/opt-1.3b",
|
50 |
+
max_length=200,
|
51 |
+
num_beams=2,
|
52 |
+
early_stopping=True
|
53 |
+
)
|
54 |
+
|
55 |
return models
|
56 |
|
57 |
# Preload models immediately when app starts
|
|
|
161 |
"information systems": ["information systems", "ERP", "systems management"]
|
162 |
}
|
163 |
|
164 |
+
# Count occurrences of industry keywords - using the summary to speed up
|
165 |
combined_text = base_summary.lower()
|
166 |
|
167 |
counts = {}
|
|
|
219 |
for skill in skills:
|
220 |
if skill.lower() in text_lower:
|
221 |
category_skills.append(skill)
|
222 |
+
|
223 |
if category_skills:
|
224 |
found_skills.append(f"{category}: {', '.join(category_skills)}")
|
225 |
|
|
|
229 |
|
230 |
for idx, line in enumerate(lines):
|
231 |
line_lower = line.lower().strip()
|
232 |
+
|
233 |
# Start of work section
|
234 |
if not in_work_section:
|
235 |
if any(header in line_lower for header in work_headers):
|
|
|
239 |
elif in_work_section:
|
240 |
if any(header in line_lower for header in next_section_headers):
|
241 |
break
|
242 |
+
|
243 |
if line.strip():
|
244 |
work_section.append(line.strip())
|
245 |
|
|
|
247 |
if not work_section:
|
248 |
work_experience = "Work experience not clearly identified"
|
249 |
else:
|
250 |
+
# Just take the first 5-7 lines of the work section as a summary
|
251 |
work_lines = []
|
252 |
company_count = 0
|
253 |
+
current_company = ""
|
254 |
+
|
255 |
for line in work_section:
|
256 |
+
# New company entry often has a date
|
257 |
if re.search(r'(19|20)\d{2}', line):
|
258 |
company_count += 1
|
259 |
if company_count <= 3: # Limit to 3 most recent positions
|
260 |
+
current_company = line
|
261 |
work_lines.append(f"**{line}**")
|
262 |
else:
|
263 |
break
|
264 |
+
elif company_count <= 3 and len(work_lines) < 10: # Limit total lines
|
265 |
work_lines.append(line)
|
266 |
|
267 |
work_experience = "\n• " + "\n• ".join(work_lines[:7]) if work_lines else "Work experience not clearly structured"
|
|
|
279 |
"""
|
280 |
start_time = time.time()
|
281 |
|
282 |
+
# First, generate a quick summary using pre-loaded model
|
283 |
max_input_length = 1024 # Model limit
|
284 |
+
|
285 |
+
# Only summarize the first portion of text for speed
|
286 |
+
text_to_summarize = resume_text[:min(len(resume_text), max_input_length)]
|
287 |
+
base_summary = models['summarizer'](text_to_summarize)[0]['summary_text']
|
288 |
|
289 |
# Extract information in parallel where possible
|
290 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
291 |
+
# These can run in parallel
|
292 |
+
name_future = executor.submit(extract_name, resume_text[:500]) # Only use start of text
|
293 |
age_future = executor.submit(extract_age, resume_text)
|
294 |
industry_future = executor.submit(extract_industry, resume_text, base_summary)
|
295 |
skills_work_future = executor.submit(extract_skills_and_work, resume_text)
|
|
|
308 |
formatted_summary += f"Skills: {skills}"
|
309 |
|
310 |
execution_time = time.time() - start_time
|
311 |
+
|
312 |
return formatted_summary, execution_time
|
313 |
|
314 |
#####################################
|
|
|
318 |
"""
|
319 |
Calculate a detailed match score breakdown based on skills and experience in the candidate summary
|
320 |
compared with what Google requires.
|
321 |
+
|
322 |
Returns:
|
323 |
- overall_score: A normalized score between 0 and 1
|
324 |
- category_scores: A dictionary with scores for each category
|
|
|
360 |
category_scores = {}
|
361 |
for category, details in google_categories.items():
|
362 |
keywords = details["keywords"]
|
363 |
+
max_possible = len(keywords) # Maximum possible matches
|
364 |
+
|
365 |
+
# Count matches (unique keywords found)
|
366 |
matches = sum(1 for keyword in keywords if keyword in summary_lower)
|
367 |
|
368 |
+
# Calculate category score (0-1 range)
|
369 |
if max_possible > 0:
|
370 |
raw_score = matches / max_possible
|
371 |
+
# Apply a curve to reward having more matches
|
372 |
category_scores[category] = min(1.0, raw_score * 1.5)
|
373 |
else:
|
374 |
category_scores[category] = 0
|
375 |
|
376 |
+
# Calculate weighted overall score
|
377 |
overall_score = sum(
|
378 |
score * google_categories[category]["weight"]
|
379 |
for category, score in category_scores.items()
|
380 |
)
|
381 |
+
|
382 |
+
# Ensure overall score is in 0-1 range
|
383 |
overall_score = min(1.0, max(0.0, overall_score))
|
384 |
|
385 |
# Create score breakdown explanation
|
|
|
397 |
#####################################
|
398 |
def generate_template_feedback(category_scores):
|
399 |
"""
|
400 |
+
Generate comprehensive template-based feedback without using ML model for speed.
|
401 |
"""
|
402 |
start_time = time.time()
|
|
|
403 |
|
404 |
+
# Sort categories by score
|
405 |
sorted_categories = sorted(category_scores.items(), key=lambda x: x[1], reverse=True)
|
406 |
top_categories = sorted_categories[:2]
|
407 |
+
bottom_categories = sorted_categories[-2:]
|
408 |
|
409 |
+
# More detailed template-based feedback for top category
|
410 |
top_feedback_templates = {
|
411 |
"Technical Skills": [
|
412 |
"demonstrates strong technical skills with proficiency in programming languages and technical tools that Google values.",
|
|
|
435 |
]
|
436 |
}
|
437 |
|
438 |
+
# More detailed template-based feedback for bottom categories
|
439 |
bottom_feedback_templates = {
|
440 |
"Technical Skills": [
|
441 |
"should strengthen their technical skills, particularly in programming languages commonly used at Google such as Python, Java, or C++.",
|
|
|
464 |
]
|
465 |
}
|
466 |
|
467 |
+
# Generate feedback with more detailed templates
|
468 |
+
import random
|
469 |
+
|
470 |
+
# Get top strength feedback
|
471 |
top_category = top_categories[0][0]
|
472 |
+
top_score = top_categories[0][1]
|
473 |
top_feedback = random.choice(top_feedback_templates.get(top_category, ["shows notable skills"]))
|
474 |
|
475 |
+
# Get improvement area feedback
|
476 |
bottom_category = bottom_categories[0][0]
|
477 |
+
bottom_score = bottom_categories[0][1]
|
478 |
bottom_feedback = random.choice(bottom_feedback_templates.get(bottom_category, ["could improve their skills"]))
|
479 |
|
480 |
+
# Construct full feedback
|
481 |
feedback = f"This candidate {top_feedback} "
|
482 |
|
483 |
+
# Add second strength if it's good
|
484 |
if top_categories[1][1] >= 0.6:
|
485 |
second_top = top_categories[1][0]
|
486 |
second_top_feedback = random.choice(top_feedback_templates.get(second_top, ["has good abilities"]))
|
487 |
feedback += f"The candidate also {second_top_feedback} "
|
488 |
|
489 |
+
# Add improvement feedback
|
490 |
feedback += f"However, the candidate {bottom_feedback} "
|
491 |
|
492 |
+
# Add conclusion based on overall score
|
493 |
overall_score = sum(score * weight for (category, score), weight in
|
494 |
zip(category_scores.items(), [0.35, 0.25, 0.20, 0.10, 0.10]))
|
495 |
|
|
|
501 |
feedback += "The candidate would need significant development to meet Google's standards."
|
502 |
|
503 |
execution_time = time.time() - start_time
|
504 |
+
|
505 |
+
return feedback, execution_time
|
506 |
+
|
507 |
+
#####################################
|
508 |
+
# Function: Generate Aspect-Based Feedback with T5 - Enhanced with Fallback
|
509 |
+
#####################################
|
510 |
+
@st.cache_data(show_spinner=False)
|
511 |
+
def generate_aspect_feedback(candidate_summary, category_scores, _evaluator=None):
|
512 |
+
"""
|
513 |
+
Use T5-small model to generate feedback with robust fallback to template-based feedback.
|
514 |
+
"""
|
515 |
+
start_time = time.time()
|
516 |
+
|
517 |
+
evaluator = _evaluator or models['evaluator']
|
518 |
+
|
519 |
+
# Sort categories by score
|
520 |
+
sorted_categories = sorted(category_scores.items(), key=lambda x: x[1], reverse=True)
|
521 |
+
top_categories = sorted_categories[:2]
|
522 |
+
bottom_categories = sorted_categories[-2:]
|
523 |
+
|
524 |
+
# Create a more explicit prompt for T5
|
525 |
+
prompt = f"""
|
526 |
+
Generate a complete paragraph evaluating a job candidate for Google.
|
527 |
+
The candidate is strong in: {', '.join([cat for cat, _ in top_categories])}.
|
528 |
+
The candidate needs improvement in: {', '.join([cat for cat, _ in bottom_categories])}.
|
529 |
+
Start with 'This candidate' and write at least 3 sentences about their fit for Google.
|
530 |
+
"""
|
531 |
+
|
532 |
+
# Generate focused feedback with error handling
|
533 |
+
try:
|
534 |
+
feedback_result = evaluator(prompt, max_length=200, do_sample=False)
|
535 |
+
feedback = feedback_result[0]['generated_text']
|
536 |
+
|
537 |
+
# Validate the response - ensure it's not empty or too short
|
538 |
+
if len(feedback.strip()) < 20 or feedback.strip() == "This candidate" or feedback.strip() == "This candidate.":
|
539 |
+
# Fall back to template-based if T5 output is too short
|
540 |
+
return generate_template_feedback(category_scores)
|
541 |
+
|
542 |
+
# Ensure third-person tone
|
543 |
+
if not any(feedback.lower().startswith(start) for start in ["the candidate", "this candidate"]):
|
544 |
+
feedback = f"This candidate {feedback}"
|
545 |
+
except Exception as e:
|
546 |
+
# Fall back to template if there's an error
|
547 |
+
print(f"Error generating T5 feedback: {e}")
|
548 |
+
return generate_template_feedback(category_scores)
|
549 |
+
|
550 |
+
execution_time = time.time() - start_time
|
551 |
+
|
552 |
return feedback, execution_time
|
553 |
|
554 |
#####################################
|
|
|
571 |
# File uploader
|
572 |
uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
|
573 |
|
574 |
+
# Add a checkbox for template-based feedback (faster)
|
575 |
+
use_template_feedback = st.checkbox("Use faster template-based feedback (no ML)", value=False,
|
576 |
+
help="Generate feedback using pre-defined templates instead of T5 model")
|
577 |
+
|
578 |
# Process button with optimized flow
|
579 |
if uploaded_file is not None and st.button("Analyze My Google Fit"):
|
580 |
+
# Create a placeholder for the progress bar
|
581 |
progress_bar = st.progress(0)
|
582 |
status_text = st.empty()
|
583 |
|
|
|
594 |
summary, summarization_time = summarize_resume_text(resume_text)
|
595 |
progress_bar.progress(50)
|
596 |
|
597 |
+
# Display summary
|
598 |
st.subheader("Your Resume Summary")
|
599 |
st.markdown(summary)
|
600 |
st.info(f"Summary generated in {summarization_time:.2f} seconds")
|
|
|
602 |
# Step 3: Calculate scores and generate feedback
|
603 |
status_text.text("Step 3/3: Calculating Google fit scores...")
|
604 |
overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
|
605 |
+
|
606 |
+
# Choose feedback generation method based on checkbox
|
607 |
+
if use_template_feedback:
|
608 |
+
feedback, feedback_time = generate_template_feedback(category_scores)
|
609 |
+
else:
|
610 |
+
feedback, feedback_time = generate_aspect_feedback(
|
611 |
+
summary, category_scores, _evaluator=models['evaluator']
|
612 |
+
)
|
613 |
|
614 |
progress_bar.progress(100)
|
615 |
+
|
616 |
+
# Clear status messages
|
617 |
status_text.empty()
|
618 |
|
619 |
+
# Display Google fit results
|
620 |
st.subheader("Google Fit Assessment")
|
621 |
+
|
622 |
+
# Display overall score with appropriate color and emoji
|
623 |
score_percent = int(overall_score * 100)
|
624 |
if overall_score >= 0.85:
|
625 |
st.success(f"**Overall Google Match Score:** {score_percent}% 🌟")
|
|
|
630 |
else:
|
631 |
st.error(f"**Overall Google Match Score:** {score_percent}% 🔍")
|
632 |
|
633 |
+
# Display score breakdown
|
634 |
st.markdown("### Score Calculation")
|
635 |
st.markdown(score_breakdown)
|
636 |
|
637 |
+
# Display focused feedback
|
638 |
st.markdown("### Expert Assessment")
|
639 |
st.markdown(feedback)
|
640 |
|
641 |
st.info(f"Assessment completed in {feedback_time:.2f} seconds")
|
642 |
|
643 |
+
# Add potential next steps based on the score
|
644 |
st.subheader("Recommended Next Steps")
|
645 |
+
|
646 |
+
# Find the weakest categories
|
647 |
weakest_categories = sorted(category_scores.items(), key=lambda x: x[1])[:2]
|
648 |
|
649 |
if overall_score >= 0.80:
|