Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ import time
|
|
8 |
import re
|
9 |
import concurrent.futures
|
10 |
from functools import lru_cache
|
11 |
-
from transformers import pipeline
|
12 |
|
13 |
# Set page title and hide sidebar
|
14 |
st.set_page_config(
|
@@ -35,14 +35,21 @@ def load_models():
|
|
35 |
"""Load models at startup - using smaller/faster models"""
|
36 |
with st.spinner("Loading AI models... This may take a minute on first run."):
|
37 |
models = {}
|
38 |
-
#
|
39 |
-
models['summarizer'] = pipeline(
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
-
# Load T5-small model for evaluation
|
42 |
models['evaluator'] = pipeline(
|
43 |
"text2text-generation",
|
44 |
model="google-t5/t5-small",
|
45 |
-
max_length=200
|
|
|
|
|
46 |
)
|
47 |
|
48 |
return models
|
@@ -93,7 +100,9 @@ def extract_text_from_file(file_obj):
|
|
93 |
text = f"Error processing TXT file: {e}"
|
94 |
else:
|
95 |
text = "Unsupported file type. Please upload a .docx, .doc, or .txt file."
|
96 |
-
|
|
|
|
|
97 |
|
98 |
#####################################
|
99 |
# Functions for Information Extraction - Optimized
|
@@ -384,13 +393,124 @@ def calculate_google_match_score(candidate_summary):
|
|
384 |
return overall_score, category_scores, score_breakdown
|
385 |
|
386 |
#####################################
|
387 |
-
# Function: Generate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
388 |
#####################################
|
389 |
@st.cache_data(show_spinner=False)
|
390 |
def generate_aspect_feedback(candidate_summary, category_scores, _evaluator=None):
|
391 |
"""
|
392 |
-
Use T5-small model to generate feedback
|
393 |
-
for Google, based on the category scores.
|
394 |
"""
|
395 |
start_time = time.time()
|
396 |
|
@@ -401,19 +521,31 @@ def generate_aspect_feedback(candidate_summary, category_scores, _evaluator=None
|
|
401 |
top_categories = sorted_categories[:2]
|
402 |
bottom_categories = sorted_categories[-2:]
|
403 |
|
404 |
-
# Create a prompt for T5
|
405 |
prompt = f"""
|
406 |
-
Generate
|
407 |
-
|
408 |
-
|
|
|
409 |
"""
|
410 |
|
411 |
-
# Generate focused feedback
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
417 |
|
418 |
execution_time = time.time() - start_time
|
419 |
|
@@ -439,6 +571,10 @@ with st.expander("Google's Requirements", expanded=False):
|
|
439 |
# File uploader
|
440 |
uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
|
441 |
|
|
|
|
|
|
|
|
|
442 |
# Process button with optimized flow
|
443 |
if uploaded_file is not None and st.button("Analyze My Google Fit"):
|
444 |
# Create a placeholder for the progress bar
|
@@ -466,9 +602,15 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
|
|
466 |
# Step 3: Calculate scores and generate feedback
|
467 |
status_text.text("Step 3/3: Calculating Google fit scores...")
|
468 |
overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
|
469 |
-
|
470 |
-
|
471 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
472 |
progress_bar.progress(100)
|
473 |
|
474 |
# Clear status messages
|
|
|
8 |
import re
|
9 |
import concurrent.futures
|
10 |
from functools import lru_cache
|
11 |
+
from transformers import pipeline
|
12 |
|
13 |
# Set page title and hide sidebar
|
14 |
st.set_page_config(
|
|
|
35 |
"""Load models at startup - using smaller/faster models"""
|
36 |
with st.spinner("Loading AI models... This may take a minute on first run."):
|
37 |
models = {}
|
38 |
+
# Use bart-base instead of bart-large-cnn for faster processing
|
39 |
+
models['summarizer'] = pipeline(
|
40 |
+
"summarization",
|
41 |
+
model="facebook/bart-base",
|
42 |
+
max_length=100,
|
43 |
+
truncation=True
|
44 |
+
)
|
45 |
|
46 |
+
# Load T5-small model for evaluation with optimized settings
|
47 |
models['evaluator'] = pipeline(
|
48 |
"text2text-generation",
|
49 |
model="google-t5/t5-small",
|
50 |
+
max_length=200,
|
51 |
+
num_beams=2,
|
52 |
+
early_stopping=True
|
53 |
)
|
54 |
|
55 |
return models
|
|
|
100 |
text = f"Error processing TXT file: {e}"
|
101 |
else:
|
102 |
text = "Unsupported file type. Please upload a .docx, .doc, or .txt file."
|
103 |
+
|
104 |
+
# Limit text size for faster processing
|
105 |
+
return text[:15000] if text else text
|
106 |
|
107 |
#####################################
|
108 |
# Functions for Information Extraction - Optimized
|
|
|
393 |
return overall_score, category_scores, score_breakdown
|
394 |
|
395 |
#####################################
|
396 |
+
# Function: Generate Robust Feedback - Template-Based
|
397 |
+
#####################################
|
398 |
+
def generate_template_feedback(category_scores):
|
399 |
+
"""
|
400 |
+
Generate comprehensive template-based feedback without using ML model for speed.
|
401 |
+
"""
|
402 |
+
start_time = time.time()
|
403 |
+
|
404 |
+
# Sort categories by score
|
405 |
+
sorted_categories = sorted(category_scores.items(), key=lambda x: x[1], reverse=True)
|
406 |
+
top_categories = sorted_categories[:2]
|
407 |
+
bottom_categories = sorted_categories[-2:]
|
408 |
+
|
409 |
+
# More detailed template-based feedback for top category
|
410 |
+
top_feedback_templates = {
|
411 |
+
"Technical Skills": [
|
412 |
+
"demonstrates strong technical skills with proficiency in programming languages and technical tools that Google values.",
|
413 |
+
"shows excellent technical capabilities that align well with Google's engineering requirements.",
|
414 |
+
"possesses the technical expertise needed for Google's development environment."
|
415 |
+
],
|
416 |
+
"Advanced Technologies": [
|
417 |
+
"has valuable experience with cutting-edge technologies that Google prioritizes in its innovation efforts.",
|
418 |
+
"demonstrates knowledge in advanced technological areas that align with Google's future direction.",
|
419 |
+
"shows proficiency in modern technologies that Google uses in its products and services."
|
420 |
+
],
|
421 |
+
"Problem Solving": [
|
422 |
+
"exhibits strong problem-solving abilities which are fundamental to Google's engineering culture.",
|
423 |
+
"demonstrates analytical thinking and problem-solving skills that Google seeks in candidates.",
|
424 |
+
"shows the problem-solving aptitude that would be valuable in Google's collaborative environment."
|
425 |
+
],
|
426 |
+
"Innovation & Creativity": [
|
427 |
+
"shows the creative thinking and innovation mindset that Google values in its workforce.",
|
428 |
+
"demonstrates the innovative approach that would fit well with Google's creative culture.",
|
429 |
+
"exhibits creativity that could contribute to Google's product development process."
|
430 |
+
],
|
431 |
+
"Teamwork & Leadership": [
|
432 |
+
"demonstrates leadership qualities and teamwork skills that Google looks for in potential employees.",
|
433 |
+
"shows collaborative abilities that would integrate well with Google's team-based structure.",
|
434 |
+
"exhibits the interpersonal skills needed to thrive in Google's collaborative environment."
|
435 |
+
]
|
436 |
+
}
|
437 |
+
|
438 |
+
# More detailed template-based feedback for bottom categories
|
439 |
+
bottom_feedback_templates = {
|
440 |
+
"Technical Skills": [
|
441 |
+
"should strengthen their technical skills, particularly in programming languages commonly used at Google such as Python, Java, or C++.",
|
442 |
+
"would benefit from developing more depth in technical tools and programming capabilities to meet Google's standards.",
|
443 |
+
"needs to enhance their technical expertise to better align with Google's engineering requirements."
|
444 |
+
],
|
445 |
+
"Advanced Technologies": [
|
446 |
+
"would benefit from gaining more experience with AI, machine learning, or cloud technologies that Google prioritizes.",
|
447 |
+
"should develop more expertise in advanced technologies like machine learning or data science to increase their value to Google.",
|
448 |
+
"needs more exposure to the cutting-edge technologies that drive Google's innovation."
|
449 |
+
],
|
450 |
+
"Problem Solving": [
|
451 |
+
"should strengthen their problem-solving abilities, particularly with algorithms and data structures that are crucial for Google interviews.",
|
452 |
+
"would benefit from developing stronger analytical and problem-solving skills to match Google's expectations.",
|
453 |
+
"needs to improve their approach to complex problem-solving to meet Google's standards."
|
454 |
+
],
|
455 |
+
"Innovation & Creativity": [
|
456 |
+
"could develop a more innovative mindset to better align with Google's creative culture.",
|
457 |
+
"should work on demonstrating more creative thinking in their approach to match Google's innovation focus.",
|
458 |
+
"would benefit from cultivating more creativity and out-of-the-box thinking valued at Google."
|
459 |
+
],
|
460 |
+
"Teamwork & Leadership": [
|
461 |
+
"should focus on developing stronger leadership and teamwork skills to thrive in Google's collaborative environment.",
|
462 |
+
"would benefit from more experience in collaborative settings to match Google's team-oriented culture.",
|
463 |
+
"needs to strengthen their interpersonal and leadership capabilities to align with Google's expectations."
|
464 |
+
]
|
465 |
+
}
|
466 |
+
|
467 |
+
# Generate feedback with more detailed templates
|
468 |
+
import random
|
469 |
+
|
470 |
+
# Get top strength feedback
|
471 |
+
top_category = top_categories[0][0]
|
472 |
+
top_score = top_categories[0][1]
|
473 |
+
top_feedback = random.choice(top_feedback_templates.get(top_category, ["shows notable skills"]))
|
474 |
+
|
475 |
+
# Get improvement area feedback
|
476 |
+
bottom_category = bottom_categories[0][0]
|
477 |
+
bottom_score = bottom_categories[0][1]
|
478 |
+
bottom_feedback = random.choice(bottom_feedback_templates.get(bottom_category, ["could improve their skills"]))
|
479 |
+
|
480 |
+
# Construct full feedback
|
481 |
+
feedback = f"This candidate {top_feedback} "
|
482 |
+
|
483 |
+
# Add second strength if it's good
|
484 |
+
if top_categories[1][1] >= 0.6:
|
485 |
+
second_top = top_categories[1][0]
|
486 |
+
second_top_feedback = random.choice(top_feedback_templates.get(second_top, ["has good abilities"]))
|
487 |
+
feedback += f"The candidate also {second_top_feedback} "
|
488 |
+
|
489 |
+
# Add improvement feedback
|
490 |
+
feedback += f"However, the candidate {bottom_feedback} "
|
491 |
+
|
492 |
+
# Add conclusion based on overall score
|
493 |
+
overall_score = sum(score * weight for (category, score), weight in
|
494 |
+
zip(category_scores.items(), [0.35, 0.25, 0.20, 0.10, 0.10]))
|
495 |
+
|
496 |
+
if overall_score >= 0.75:
|
497 |
+
feedback += "Overall, this candidate shows strong potential for success at Google."
|
498 |
+
elif overall_score >= 0.6:
|
499 |
+
feedback += "With these improvements, the candidate could be a good fit for Google."
|
500 |
+
else:
|
501 |
+
feedback += "The candidate would need significant development to meet Google's standards."
|
502 |
+
|
503 |
+
execution_time = time.time() - start_time
|
504 |
+
|
505 |
+
return feedback, execution_time
|
506 |
+
|
507 |
+
#####################################
|
508 |
+
# Function: Generate Aspect-Based Feedback with T5 - Enhanced with Fallback
|
509 |
#####################################
|
510 |
@st.cache_data(show_spinner=False)
|
511 |
def generate_aspect_feedback(candidate_summary, category_scores, _evaluator=None):
|
512 |
"""
|
513 |
+
Use T5-small model to generate feedback with robust fallback to template-based feedback.
|
|
|
514 |
"""
|
515 |
start_time = time.time()
|
516 |
|
|
|
521 |
top_categories = sorted_categories[:2]
|
522 |
bottom_categories = sorted_categories[-2:]
|
523 |
|
524 |
+
# Create a more explicit prompt for T5
|
525 |
prompt = f"""
|
526 |
+
Generate a complete paragraph evaluating a job candidate for Google.
|
527 |
+
The candidate is strong in: {', '.join([cat for cat, _ in top_categories])}.
|
528 |
+
The candidate needs improvement in: {', '.join([cat for cat, _ in bottom_categories])}.
|
529 |
+
Start with 'This candidate' and write at least 3 sentences about their fit for Google.
|
530 |
"""
|
531 |
|
532 |
+
# Generate focused feedback with error handling
|
533 |
+
try:
|
534 |
+
feedback_result = evaluator(prompt, max_length=200, do_sample=False)
|
535 |
+
feedback = feedback_result[0]['generated_text']
|
536 |
+
|
537 |
+
# Validate the response - ensure it's not empty or too short
|
538 |
+
if len(feedback.strip()) < 20 or feedback.strip() == "This candidate" or feedback.strip() == "This candidate.":
|
539 |
+
# Fall back to template-based if T5 output is too short
|
540 |
+
return generate_template_feedback(category_scores)
|
541 |
+
|
542 |
+
# Ensure third-person tone
|
543 |
+
if not any(feedback.lower().startswith(start) for start in ["the candidate", "this candidate"]):
|
544 |
+
feedback = f"This candidate {feedback}"
|
545 |
+
except Exception as e:
|
546 |
+
# Fall back to template if there's an error
|
547 |
+
print(f"Error generating T5 feedback: {e}")
|
548 |
+
return generate_template_feedback(category_scores)
|
549 |
|
550 |
execution_time = time.time() - start_time
|
551 |
|
|
|
571 |
# File uploader
|
572 |
uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
|
573 |
|
574 |
+
# Add a checkbox for template-based feedback (faster)
|
575 |
+
use_template_feedback = st.checkbox("Use faster template-based feedback (no ML)", value=False,
|
576 |
+
help="Generate feedback using pre-defined templates instead of T5 model")
|
577 |
+
|
578 |
# Process button with optimized flow
|
579 |
if uploaded_file is not None and st.button("Analyze My Google Fit"):
|
580 |
# Create a placeholder for the progress bar
|
|
|
602 |
# Step 3: Calculate scores and generate feedback
|
603 |
status_text.text("Step 3/3: Calculating Google fit scores...")
|
604 |
overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
|
605 |
+
|
606 |
+
# Choose feedback generation method based on checkbox
|
607 |
+
if use_template_feedback:
|
608 |
+
feedback, feedback_time = generate_template_feedback(category_scores)
|
609 |
+
else:
|
610 |
+
feedback, feedback_time = generate_aspect_feedback(
|
611 |
+
summary, category_scores, _evaluator=models['evaluator']
|
612 |
+
)
|
613 |
+
|
614 |
progress_bar.progress(100)
|
615 |
|
616 |
# Clear status messages
|