CR7CAD commited on
Commit
ce7c5e8
·
verified ·
1 Parent(s): 8e90008

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -22
app.py CHANGED
@@ -8,7 +8,7 @@ import time
8
  import re
9
  import concurrent.futures
10
  from functools import lru_cache
11
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
12
 
13
  # Set page title and hide sidebar
14
  st.set_page_config(
@@ -35,14 +35,21 @@ def load_models():
35
  """Load models at startup - using smaller/faster models"""
36
  with st.spinner("Loading AI models... This may take a minute on first run."):
37
  models = {}
38
- # Load smaller summarization model for speed
39
- models['summarizer'] = pipeline("summarization", model="facebook/bart-large-cnn", max_length=130)
 
 
 
 
 
40
 
41
- # Load T5-small model for evaluation
42
  models['evaluator'] = pipeline(
43
  "text2text-generation",
44
  model="google-t5/t5-small",
45
- max_length=200
 
 
46
  )
47
 
48
  return models
@@ -93,7 +100,9 @@ def extract_text_from_file(file_obj):
93
  text = f"Error processing TXT file: {e}"
94
  else:
95
  text = "Unsupported file type. Please upload a .docx, .doc, or .txt file."
96
- return text
 
 
97
 
98
  #####################################
99
  # Functions for Information Extraction - Optimized
@@ -384,13 +393,124 @@ def calculate_google_match_score(candidate_summary):
384
  return overall_score, category_scores, score_breakdown
385
 
386
  #####################################
387
- # Function: Generate Aspect-Based Feedback with T5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
  #####################################
389
  @st.cache_data(show_spinner=False)
390
  def generate_aspect_feedback(candidate_summary, category_scores, _evaluator=None):
391
  """
392
- Use T5-small model to generate feedback on the candidate's strongest and weakest areas
393
- for Google, based on the category scores.
394
  """
395
  start_time = time.time()
396
 
@@ -401,19 +521,31 @@ def generate_aspect_feedback(candidate_summary, category_scores, _evaluator=None
401
  top_categories = sorted_categories[:2]
402
  bottom_categories = sorted_categories[-2:]
403
 
404
- # Create a prompt for T5
405
  prompt = f"""
406
- Generate specific third-person feedback on the candidate's fit for Google.
407
- Focus on these strengths: {', '.join([cat for cat, _ in top_categories])}.
408
- And these improvement areas: {', '.join([cat for cat, _ in bottom_categories])}.
 
409
  """
410
 
411
- # Generate focused feedback
412
- feedback = evaluator(prompt)[0]['generated_text']
413
-
414
- # Ensure third-person tone
415
- if not any(feedback.lower().startswith(start) for start in ["the candidate", "this candidate"]):
416
- feedback = f"This candidate {feedback}"
 
 
 
 
 
 
 
 
 
 
 
417
 
418
  execution_time = time.time() - start_time
419
 
@@ -439,6 +571,10 @@ with st.expander("Google's Requirements", expanded=False):
439
  # File uploader
440
  uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
441
 
 
 
 
 
442
  # Process button with optimized flow
443
  if uploaded_file is not None and st.button("Analyze My Google Fit"):
444
  # Create a placeholder for the progress bar
@@ -466,9 +602,15 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
466
  # Step 3: Calculate scores and generate feedback
467
  status_text.text("Step 3/3: Calculating Google fit scores...")
468
  overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
469
- feedback, feedback_time = generate_aspect_feedback(
470
- summary, category_scores, _evaluator=models['evaluator']
471
- )
 
 
 
 
 
 
472
  progress_bar.progress(100)
473
 
474
  # Clear status messages
 
8
  import re
9
  import concurrent.futures
10
  from functools import lru_cache
11
+ from transformers import pipeline
12
 
13
  # Set page title and hide sidebar
14
  st.set_page_config(
 
35
  """Load models at startup - using smaller/faster models"""
36
  with st.spinner("Loading AI models... This may take a minute on first run."):
37
  models = {}
38
+ # Use bart-base instead of bart-large-cnn for faster processing
39
+ models['summarizer'] = pipeline(
40
+ "summarization",
41
+ model="facebook/bart-base",
42
+ max_length=100,
43
+ truncation=True
44
+ )
45
 
46
+ # Load T5-small model for evaluation with optimized settings
47
  models['evaluator'] = pipeline(
48
  "text2text-generation",
49
  model="google-t5/t5-small",
50
+ max_length=200,
51
+ num_beams=2,
52
+ early_stopping=True
53
  )
54
 
55
  return models
 
100
  text = f"Error processing TXT file: {e}"
101
  else:
102
  text = "Unsupported file type. Please upload a .docx, .doc, or .txt file."
103
+
104
+ # Limit text size for faster processing
105
+ return text[:15000] if text else text
106
 
107
  #####################################
108
  # Functions for Information Extraction - Optimized
 
393
  return overall_score, category_scores, score_breakdown
394
 
395
  #####################################
396
+ # Function: Generate Robust Feedback - Template-Based
397
+ #####################################
398
+ def generate_template_feedback(category_scores):
399
+ """
400
+ Generate comprehensive template-based feedback without using ML model for speed.
401
+ """
402
+ start_time = time.time()
403
+
404
+ # Sort categories by score
405
+ sorted_categories = sorted(category_scores.items(), key=lambda x: x[1], reverse=True)
406
+ top_categories = sorted_categories[:2]
407
+ bottom_categories = sorted_categories[-2:]
408
+
409
+ # More detailed template-based feedback for top category
410
+ top_feedback_templates = {
411
+ "Technical Skills": [
412
+ "demonstrates strong technical skills with proficiency in programming languages and technical tools that Google values.",
413
+ "shows excellent technical capabilities that align well with Google's engineering requirements.",
414
+ "possesses the technical expertise needed for Google's development environment."
415
+ ],
416
+ "Advanced Technologies": [
417
+ "has valuable experience with cutting-edge technologies that Google prioritizes in its innovation efforts.",
418
+ "demonstrates knowledge in advanced technological areas that align with Google's future direction.",
419
+ "shows proficiency in modern technologies that Google uses in its products and services."
420
+ ],
421
+ "Problem Solving": [
422
+ "exhibits strong problem-solving abilities which are fundamental to Google's engineering culture.",
423
+ "demonstrates analytical thinking and problem-solving skills that Google seeks in candidates.",
424
+ "shows the problem-solving aptitude that would be valuable in Google's collaborative environment."
425
+ ],
426
+ "Innovation & Creativity": [
427
+ "shows the creative thinking and innovation mindset that Google values in its workforce.",
428
+ "demonstrates the innovative approach that would fit well with Google's creative culture.",
429
+ "exhibits creativity that could contribute to Google's product development process."
430
+ ],
431
+ "Teamwork & Leadership": [
432
+ "demonstrates leadership qualities and teamwork skills that Google looks for in potential employees.",
433
+ "shows collaborative abilities that would integrate well with Google's team-based structure.",
434
+ "exhibits the interpersonal skills needed to thrive in Google's collaborative environment."
435
+ ]
436
+ }
437
+
438
+ # More detailed template-based feedback for bottom categories
439
+ bottom_feedback_templates = {
440
+ "Technical Skills": [
441
+ "should strengthen their technical skills, particularly in programming languages commonly used at Google such as Python, Java, or C++.",
442
+ "would benefit from developing more depth in technical tools and programming capabilities to meet Google's standards.",
443
+ "needs to enhance their technical expertise to better align with Google's engineering requirements."
444
+ ],
445
+ "Advanced Technologies": [
446
+ "would benefit from gaining more experience with AI, machine learning, or cloud technologies that Google prioritizes.",
447
+ "should develop more expertise in advanced technologies like machine learning or data science to increase their value to Google.",
448
+ "needs more exposure to the cutting-edge technologies that drive Google's innovation."
449
+ ],
450
+ "Problem Solving": [
451
+ "should strengthen their problem-solving abilities, particularly with algorithms and data structures that are crucial for Google interviews.",
452
+ "would benefit from developing stronger analytical and problem-solving skills to match Google's expectations.",
453
+ "needs to improve their approach to complex problem-solving to meet Google's standards."
454
+ ],
455
+ "Innovation & Creativity": [
456
+ "could develop a more innovative mindset to better align with Google's creative culture.",
457
+ "should work on demonstrating more creative thinking in their approach to match Google's innovation focus.",
458
+ "would benefit from cultivating more creativity and out-of-the-box thinking valued at Google."
459
+ ],
460
+ "Teamwork & Leadership": [
461
+ "should focus on developing stronger leadership and teamwork skills to thrive in Google's collaborative environment.",
462
+ "would benefit from more experience in collaborative settings to match Google's team-oriented culture.",
463
+ "needs to strengthen their interpersonal and leadership capabilities to align with Google's expectations."
464
+ ]
465
+ }
466
+
467
+ # Generate feedback with more detailed templates
468
+ import random
469
+
470
+ # Get top strength feedback
471
+ top_category = top_categories[0][0]
472
+ top_score = top_categories[0][1]
473
+ top_feedback = random.choice(top_feedback_templates.get(top_category, ["shows notable skills"]))
474
+
475
+ # Get improvement area feedback
476
+ bottom_category = bottom_categories[0][0]
477
+ bottom_score = bottom_categories[0][1]
478
+ bottom_feedback = random.choice(bottom_feedback_templates.get(bottom_category, ["could improve their skills"]))
479
+
480
+ # Construct full feedback
481
+ feedback = f"This candidate {top_feedback} "
482
+
483
+ # Add second strength if it's good
484
+ if top_categories[1][1] >= 0.6:
485
+ second_top = top_categories[1][0]
486
+ second_top_feedback = random.choice(top_feedback_templates.get(second_top, ["has good abilities"]))
487
+ feedback += f"The candidate also {second_top_feedback} "
488
+
489
+ # Add improvement feedback
490
+ feedback += f"However, the candidate {bottom_feedback} "
491
+
492
+ # Add conclusion based on overall score
493
+ overall_score = sum(score * weight for (category, score), weight in
494
+ zip(category_scores.items(), [0.35, 0.25, 0.20, 0.10, 0.10]))
495
+
496
+ if overall_score >= 0.75:
497
+ feedback += "Overall, this candidate shows strong potential for success at Google."
498
+ elif overall_score >= 0.6:
499
+ feedback += "With these improvements, the candidate could be a good fit for Google."
500
+ else:
501
+ feedback += "The candidate would need significant development to meet Google's standards."
502
+
503
+ execution_time = time.time() - start_time
504
+
505
+ return feedback, execution_time
506
+
507
+ #####################################
508
+ # Function: Generate Aspect-Based Feedback with T5 - Enhanced with Fallback
509
  #####################################
510
  @st.cache_data(show_spinner=False)
511
  def generate_aspect_feedback(candidate_summary, category_scores, _evaluator=None):
512
  """
513
+ Use T5-small model to generate feedback with robust fallback to template-based feedback.
 
514
  """
515
  start_time = time.time()
516
 
 
521
  top_categories = sorted_categories[:2]
522
  bottom_categories = sorted_categories[-2:]
523
 
524
+ # Create a more explicit prompt for T5
525
  prompt = f"""
526
+ Generate a complete paragraph evaluating a job candidate for Google.
527
+ The candidate is strong in: {', '.join([cat for cat, _ in top_categories])}.
528
+ The candidate needs improvement in: {', '.join([cat for cat, _ in bottom_categories])}.
529
+ Start with 'This candidate' and write at least 3 sentences about their fit for Google.
530
  """
531
 
532
+ # Generate focused feedback with error handling
533
+ try:
534
+ feedback_result = evaluator(prompt, max_length=200, do_sample=False)
535
+ feedback = feedback_result[0]['generated_text']
536
+
537
+ # Validate the response - ensure it's not empty or too short
538
+ if len(feedback.strip()) < 20 or feedback.strip() == "This candidate" or feedback.strip() == "This candidate.":
539
+ # Fall back to template-based if T5 output is too short
540
+ return generate_template_feedback(category_scores)
541
+
542
+ # Ensure third-person tone
543
+ if not any(feedback.lower().startswith(start) for start in ["the candidate", "this candidate"]):
544
+ feedback = f"This candidate {feedback}"
545
+ except Exception as e:
546
+ # Fall back to template if there's an error
547
+ print(f"Error generating T5 feedback: {e}")
548
+ return generate_template_feedback(category_scores)
549
 
550
  execution_time = time.time() - start_time
551
 
 
571
  # File uploader
572
  uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
573
 
574
+ # Add a checkbox for template-based feedback (faster)
575
+ use_template_feedback = st.checkbox("Use faster template-based feedback (no ML)", value=False,
576
+ help="Generate feedback using pre-defined templates instead of T5 model")
577
+
578
  # Process button with optimized flow
579
  if uploaded_file is not None and st.button("Analyze My Google Fit"):
580
  # Create a placeholder for the progress bar
 
602
  # Step 3: Calculate scores and generate feedback
603
  status_text.text("Step 3/3: Calculating Google fit scores...")
604
  overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
605
+
606
+ # Choose feedback generation method based on checkbox
607
+ if use_template_feedback:
608
+ feedback, feedback_time = generate_template_feedback(category_scores)
609
+ else:
610
+ feedback, feedback_time = generate_aspect_feedback(
611
+ summary, category_scores, _evaluator=models['evaluator']
612
+ )
613
+
614
  progress_bar.progress(100)
615
 
616
  # Clear status messages