CR7CAD commited on
Commit
cda9adf
·
verified ·
1 Parent(s): d3c5eab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -15
app.py CHANGED
@@ -42,7 +42,16 @@ def load_models():
42
  max_length=100,
43
  truncation=True
44
  )
45
- # We don't need T5 model anymore since we're using template-based feedback
 
 
 
 
 
 
 
 
 
46
  return models
47
 
48
  # Preload models immediately when app starts
@@ -152,7 +161,7 @@ def extract_industry(text, base_summary):
152
  "information systems": ["information systems", "ERP", "systems management"]
153
  }
154
 
155
- # Use the base summary (already lowercased) to speed up matching
156
  combined_text = base_summary.lower()
157
 
158
  counts = {}
@@ -210,6 +219,7 @@ def extract_skills_and_work(text):
210
  for skill in skills:
211
  if skill.lower() in text_lower:
212
  category_skills.append(skill)
 
213
  if category_skills:
214
  found_skills.append(f"{category}: {', '.join(category_skills)}")
215
 
@@ -219,6 +229,7 @@ def extract_skills_and_work(text):
219
 
220
  for idx, line in enumerate(lines):
221
  line_lower = line.lower().strip()
 
222
  # Start of work section
223
  if not in_work_section:
224
  if any(header in line_lower for header in work_headers):
@@ -228,6 +239,7 @@ def extract_skills_and_work(text):
228
  elif in_work_section:
229
  if any(header in line_lower for header in next_section_headers):
230
  break
 
231
  if line.strip():
232
  work_section.append(line.strip())
233
 
@@ -235,16 +247,21 @@ def extract_skills_and_work(text):
235
  if not work_section:
236
  work_experience = "Work experience not clearly identified"
237
  else:
 
238
  work_lines = []
239
  company_count = 0
 
 
240
  for line in work_section:
 
241
  if re.search(r'(19|20)\d{2}', line):
242
  company_count += 1
243
  if company_count <= 3: # Limit to 3 most recent positions
 
244
  work_lines.append(f"**{line}**")
245
  else:
246
  break
247
- elif company_count <= 3 and len(work_lines) < 10:
248
  work_lines.append(line)
249
 
250
  work_experience = "\n• " + "\n• ".join(work_lines[:7]) if work_lines else "Work experience not clearly structured"
@@ -262,16 +279,17 @@ def summarize_resume_text(resume_text):
262
  """
263
  start_time = time.time()
264
 
265
- # First, generate a quick summary using the preloaded model
266
  max_input_length = 1024 # Model limit
267
- # Only summarize the first 1024 characters for speed
268
- text_to_summarize = resume_text[:max_input_length]
269
- base_summary = models['summarizer'](text_to_summarize, truncation=True)[0]['summary_text']
 
270
 
271
  # Extract information in parallel where possible
272
- # Limit the number of workers to reduce overhead
273
- with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
274
- name_future = executor.submit(extract_name, resume_text[:500]) # Only use the start of text
275
  age_future = executor.submit(extract_age, resume_text)
276
  industry_future = executor.submit(extract_industry, resume_text, base_summary)
277
  skills_work_future = executor.submit(extract_skills_and_work, resume_text)
@@ -290,6 +308,7 @@ def summarize_resume_text(resume_text):
290
  formatted_summary += f"Skills: {skills}"
291
 
292
  execution_time = time.time() - start_time
 
293
  return formatted_summary, execution_time
294
 
295
  #####################################
@@ -299,6 +318,7 @@ def calculate_google_match_score(candidate_summary):
299
  """
300
  Calculate a detailed match score breakdown based on skills and experience in the candidate summary
301
  compared with what Google requires.
 
302
  Returns:
303
  - overall_score: A normalized score between 0 and 1
304
  - category_scores: A dictionary with scores for each category
@@ -340,19 +360,26 @@ def calculate_google_match_score(candidate_summary):
340
  category_scores = {}
341
  for category, details in google_categories.items():
342
  keywords = details["keywords"]
343
- max_possible = len(keywords)
 
 
344
  matches = sum(1 for keyword in keywords if keyword in summary_lower)
345
 
 
346
  if max_possible > 0:
347
  raw_score = matches / max_possible
 
348
  category_scores[category] = min(1.0, raw_score * 1.5)
349
  else:
350
  category_scores[category] = 0
351
 
 
352
  overall_score = sum(
353
  score * google_categories[category]["weight"]
354
  for category, score in category_scores.items()
355
  )
 
 
356
  overall_score = min(1.0, max(0.0, overall_score))
357
 
358
  # Create score breakdown explanation
@@ -370,15 +397,16 @@ def calculate_google_match_score(candidate_summary):
370
  #####################################
371
  def generate_template_feedback(category_scores):
372
  """
373
- Generate comprehensive template-based feedback without using ML model for speed and reliability.
374
  """
375
  start_time = time.time()
376
- import random
377
 
 
378
  sorted_categories = sorted(category_scores.items(), key=lambda x: x[1], reverse=True)
379
  top_categories = sorted_categories[:2]
380
- bottom_categories = sorted(category_scores.items(), key=lambda x: x[1])[:2]
381
 
 
382
  top_feedback_templates = {
383
  "Technical Skills": [
384
  "demonstrates strong technical skills with proficiency in programming languages and technical tools that Google values.",
@@ -407,6 +435,7 @@ def generate_template_feedback(category_scores):
407
  ]
408
  }
409
 
 
410
  bottom_feedback_templates = {
411
  "Technical Skills": [
412
  "should strengthen their technical skills, particularly in programming languages commonly used at Google such as Python, Java, or C++.",
@@ -435,21 +464,32 @@ def generate_template_feedback(category_scores):
435
  ]
436
  }
437
 
 
 
 
 
438
  top_category = top_categories[0][0]
 
439
  top_feedback = random.choice(top_feedback_templates.get(top_category, ["shows notable skills"]))
440
 
 
441
  bottom_category = bottom_categories[0][0]
 
442
  bottom_feedback = random.choice(bottom_feedback_templates.get(bottom_category, ["could improve their skills"]))
443
 
 
444
  feedback = f"This candidate {top_feedback} "
445
 
 
446
  if top_categories[1][1] >= 0.6:
447
  second_top = top_categories[1][0]
448
  second_top_feedback = random.choice(top_feedback_templates.get(second_top, ["has good abilities"]))
449
  feedback += f"The candidate also {second_top_feedback} "
450
 
 
451
  feedback += f"However, the candidate {bottom_feedback} "
452
 
 
453
  overall_score = sum(score * weight for (category, score), weight in
454
  zip(category_scores.items(), [0.35, 0.25, 0.20, 0.10, 0.10]))
455
 
@@ -461,6 +501,54 @@ def generate_template_feedback(category_scores):
461
  feedback += "The candidate would need significant development to meet Google's standards."
462
 
463
  execution_time = time.time() - start_time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
  return feedback, execution_time
465
 
466
  #####################################
@@ -483,8 +571,13 @@ with st.expander("Google's Requirements", expanded=False):
483
  # File uploader
484
  uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
485
 
 
 
 
 
486
  # Process button with optimized flow
487
  if uploaded_file is not None and st.button("Analyze My Google Fit"):
 
488
  progress_bar = st.progress(0)
489
  status_text = st.empty()
490
 
@@ -501,6 +594,7 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
501
  summary, summarization_time = summarize_resume_text(resume_text)
502
  progress_bar.progress(50)
503
 
 
504
  st.subheader("Your Resume Summary")
505
  st.markdown(summary)
506
  st.info(f"Summary generated in {summarization_time:.2f} seconds")
@@ -508,12 +602,24 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
508
  # Step 3: Calculate scores and generate feedback
509
  status_text.text("Step 3/3: Calculating Google fit scores...")
510
  overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
511
- feedback, feedback_time = generate_template_feedback(category_scores)
 
 
 
 
 
 
 
512
 
513
  progress_bar.progress(100)
 
 
514
  status_text.empty()
515
 
 
516
  st.subheader("Google Fit Assessment")
 
 
517
  score_percent = int(overall_score * 100)
518
  if overall_score >= 0.85:
519
  st.success(f"**Overall Google Match Score:** {score_percent}% 🌟")
@@ -524,15 +630,20 @@ if uploaded_file is not None and st.button("Analyze My Google Fit"):
524
  else:
525
  st.error(f"**Overall Google Match Score:** {score_percent}% 🔍")
526
 
 
527
  st.markdown("### Score Calculation")
528
  st.markdown(score_breakdown)
529
 
 
530
  st.markdown("### Expert Assessment")
531
  st.markdown(feedback)
532
 
533
  st.info(f"Assessment completed in {feedback_time:.2f} seconds")
534
 
 
535
  st.subheader("Recommended Next Steps")
 
 
536
  weakest_categories = sorted(category_scores.items(), key=lambda x: x[1])[:2]
537
 
538
  if overall_score >= 0.80:
 
42
  max_length=100,
43
  truncation=True
44
  )
45
+
46
+ # Load T5-small model for evaluation with optimized settings
47
+ models['evaluator'] = pipeline(
48
+ "text-generation",
49
+ model="facebook/opt-1.3b",
50
+ max_length=200,
51
+ num_beams=2,
52
+ early_stopping=True
53
+ )
54
+
55
  return models
56
 
57
  # Preload models immediately when app starts
 
161
  "information systems": ["information systems", "ERP", "systems management"]
162
  }
163
 
164
+ # Count occurrences of industry keywords - using the summary to speed up
165
  combined_text = base_summary.lower()
166
 
167
  counts = {}
 
219
  for skill in skills:
220
  if skill.lower() in text_lower:
221
  category_skills.append(skill)
222
+
223
  if category_skills:
224
  found_skills.append(f"{category}: {', '.join(category_skills)}")
225
 
 
229
 
230
  for idx, line in enumerate(lines):
231
  line_lower = line.lower().strip()
232
+
233
  # Start of work section
234
  if not in_work_section:
235
  if any(header in line_lower for header in work_headers):
 
239
  elif in_work_section:
240
  if any(header in line_lower for header in next_section_headers):
241
  break
242
+
243
  if line.strip():
244
  work_section.append(line.strip())
245
 
 
247
  if not work_section:
248
  work_experience = "Work experience not clearly identified"
249
  else:
250
+ # Just take the first 5-7 lines of the work section as a summary
251
  work_lines = []
252
  company_count = 0
253
+ current_company = ""
254
+
255
  for line in work_section:
256
+ # New company entry often has a date
257
  if re.search(r'(19|20)\d{2}', line):
258
  company_count += 1
259
  if company_count <= 3: # Limit to 3 most recent positions
260
+ current_company = line
261
  work_lines.append(f"**{line}**")
262
  else:
263
  break
264
+ elif company_count <= 3 and len(work_lines) < 10: # Limit total lines
265
  work_lines.append(line)
266
 
267
  work_experience = "\n• " + "\n• ".join(work_lines[:7]) if work_lines else "Work experience not clearly structured"
 
279
  """
280
  start_time = time.time()
281
 
282
+ # First, generate a quick summary using pre-loaded model
283
  max_input_length = 1024 # Model limit
284
+
285
+ # Only summarize the first portion of text for speed
286
+ text_to_summarize = resume_text[:min(len(resume_text), max_input_length)]
287
+ base_summary = models['summarizer'](text_to_summarize)[0]['summary_text']
288
 
289
  # Extract information in parallel where possible
290
+ with concurrent.futures.ThreadPoolExecutor() as executor:
291
+ # These can run in parallel
292
+ name_future = executor.submit(extract_name, resume_text[:500]) # Only use start of text
293
  age_future = executor.submit(extract_age, resume_text)
294
  industry_future = executor.submit(extract_industry, resume_text, base_summary)
295
  skills_work_future = executor.submit(extract_skills_and_work, resume_text)
 
308
  formatted_summary += f"Skills: {skills}"
309
 
310
  execution_time = time.time() - start_time
311
+
312
  return formatted_summary, execution_time
313
 
314
  #####################################
 
318
  """
319
  Calculate a detailed match score breakdown based on skills and experience in the candidate summary
320
  compared with what Google requires.
321
+
322
  Returns:
323
  - overall_score: A normalized score between 0 and 1
324
  - category_scores: A dictionary with scores for each category
 
360
  category_scores = {}
361
  for category, details in google_categories.items():
362
  keywords = details["keywords"]
363
+ max_possible = len(keywords) # Maximum possible matches
364
+
365
+ # Count matches (unique keywords found)
366
  matches = sum(1 for keyword in keywords if keyword in summary_lower)
367
 
368
+ # Calculate category score (0-1 range)
369
  if max_possible > 0:
370
  raw_score = matches / max_possible
371
+ # Apply a curve to reward having more matches
372
  category_scores[category] = min(1.0, raw_score * 1.5)
373
  else:
374
  category_scores[category] = 0
375
 
376
+ # Calculate weighted overall score
377
  overall_score = sum(
378
  score * google_categories[category]["weight"]
379
  for category, score in category_scores.items()
380
  )
381
+
382
+ # Ensure overall score is in 0-1 range
383
  overall_score = min(1.0, max(0.0, overall_score))
384
 
385
  # Create score breakdown explanation
 
397
  #####################################
398
  def generate_template_feedback(category_scores):
399
  """
400
+ Generate comprehensive template-based feedback without using ML model for speed.
401
  """
402
  start_time = time.time()
 
403
 
404
+ # Sort categories by score
405
  sorted_categories = sorted(category_scores.items(), key=lambda x: x[1], reverse=True)
406
  top_categories = sorted_categories[:2]
407
+ bottom_categories = sorted_categories[-2:]
408
 
409
+ # More detailed template-based feedback for top category
410
  top_feedback_templates = {
411
  "Technical Skills": [
412
  "demonstrates strong technical skills with proficiency in programming languages and technical tools that Google values.",
 
435
  ]
436
  }
437
 
438
+ # More detailed template-based feedback for bottom categories
439
  bottom_feedback_templates = {
440
  "Technical Skills": [
441
  "should strengthen their technical skills, particularly in programming languages commonly used at Google such as Python, Java, or C++.",
 
464
  ]
465
  }
466
 
467
+ # Generate feedback with more detailed templates
468
+ import random
469
+
470
+ # Get top strength feedback
471
  top_category = top_categories[0][0]
472
+ top_score = top_categories[0][1]
473
  top_feedback = random.choice(top_feedback_templates.get(top_category, ["shows notable skills"]))
474
 
475
+ # Get improvement area feedback
476
  bottom_category = bottom_categories[0][0]
477
+ bottom_score = bottom_categories[0][1]
478
  bottom_feedback = random.choice(bottom_feedback_templates.get(bottom_category, ["could improve their skills"]))
479
 
480
+ # Construct full feedback
481
  feedback = f"This candidate {top_feedback} "
482
 
483
+ # Add second strength if it's good
484
  if top_categories[1][1] >= 0.6:
485
  second_top = top_categories[1][0]
486
  second_top_feedback = random.choice(top_feedback_templates.get(second_top, ["has good abilities"]))
487
  feedback += f"The candidate also {second_top_feedback} "
488
 
489
+ # Add improvement feedback
490
  feedback += f"However, the candidate {bottom_feedback} "
491
 
492
+ # Add conclusion based on overall score
493
  overall_score = sum(score * weight for (category, score), weight in
494
  zip(category_scores.items(), [0.35, 0.25, 0.20, 0.10, 0.10]))
495
 
 
501
  feedback += "The candidate would need significant development to meet Google's standards."
502
 
503
  execution_time = time.time() - start_time
504
+
505
+ return feedback, execution_time
506
+
507
+ #####################################
508
+ # Function: Generate Aspect-Based Feedback with T5 - Enhanced with Fallback
509
+ #####################################
510
+ @st.cache_data(show_spinner=False)
511
+ def generate_aspect_feedback(candidate_summary, category_scores, _evaluator=None):
512
+ """
513
+ Use T5-small model to generate feedback with robust fallback to template-based feedback.
514
+ """
515
+ start_time = time.time()
516
+
517
+ evaluator = _evaluator or models['evaluator']
518
+
519
+ # Sort categories by score
520
+ sorted_categories = sorted(category_scores.items(), key=lambda x: x[1], reverse=True)
521
+ top_categories = sorted_categories[:2]
522
+ bottom_categories = sorted_categories[-2:]
523
+
524
+ # Create a more explicit prompt for T5
525
+ prompt = f"""
526
+ Generate a complete paragraph evaluating a job candidate for Google.
527
+ The candidate is strong in: {', '.join([cat for cat, _ in top_categories])}.
528
+ The candidate needs improvement in: {', '.join([cat for cat, _ in bottom_categories])}.
529
+ Start with 'This candidate' and write at least 3 sentences about their fit for Google.
530
+ """
531
+
532
+ # Generate focused feedback with error handling
533
+ try:
534
+ feedback_result = evaluator(prompt, max_length=200, do_sample=False)
535
+ feedback = feedback_result[0]['generated_text']
536
+
537
+ # Validate the response - ensure it's not empty or too short
538
+ if len(feedback.strip()) < 20 or feedback.strip() == "This candidate" or feedback.strip() == "This candidate.":
539
+ # Fall back to template-based if T5 output is too short
540
+ return generate_template_feedback(category_scores)
541
+
542
+ # Ensure third-person tone
543
+ if not any(feedback.lower().startswith(start) for start in ["the candidate", "this candidate"]):
544
+ feedback = f"This candidate {feedback}"
545
+ except Exception as e:
546
+ # Fall back to template if there's an error
547
+ print(f"Error generating T5 feedback: {e}")
548
+ return generate_template_feedback(category_scores)
549
+
550
+ execution_time = time.time() - start_time
551
+
552
  return feedback, execution_time
553
 
554
  #####################################
 
571
  # File uploader
572
  uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
573
 
574
+ # Add a checkbox for template-based feedback (faster)
575
+ use_template_feedback = st.checkbox("Use faster template-based feedback (no ML)", value=False,
576
+ help="Generate feedback using pre-defined templates instead of T5 model")
577
+
578
  # Process button with optimized flow
579
  if uploaded_file is not None and st.button("Analyze My Google Fit"):
580
+ # Create a placeholder for the progress bar
581
  progress_bar = st.progress(0)
582
  status_text = st.empty()
583
 
 
594
  summary, summarization_time = summarize_resume_text(resume_text)
595
  progress_bar.progress(50)
596
 
597
+ # Display summary
598
  st.subheader("Your Resume Summary")
599
  st.markdown(summary)
600
  st.info(f"Summary generated in {summarization_time:.2f} seconds")
 
602
  # Step 3: Calculate scores and generate feedback
603
  status_text.text("Step 3/3: Calculating Google fit scores...")
604
  overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
605
+
606
+ # Choose feedback generation method based on checkbox
607
+ if use_template_feedback:
608
+ feedback, feedback_time = generate_template_feedback(category_scores)
609
+ else:
610
+ feedback, feedback_time = generate_aspect_feedback(
611
+ summary, category_scores, _evaluator=models['evaluator']
612
+ )
613
 
614
  progress_bar.progress(100)
615
+
616
+ # Clear status messages
617
  status_text.empty()
618
 
619
+ # Display Google fit results
620
  st.subheader("Google Fit Assessment")
621
+
622
+ # Display overall score with appropriate color and emoji
623
  score_percent = int(overall_score * 100)
624
  if overall_score >= 0.85:
625
  st.success(f"**Overall Google Match Score:** {score_percent}% 🌟")
 
630
  else:
631
  st.error(f"**Overall Google Match Score:** {score_percent}% 🔍")
632
 
633
+ # Display score breakdown
634
  st.markdown("### Score Calculation")
635
  st.markdown(score_breakdown)
636
 
637
+ # Display focused feedback
638
  st.markdown("### Expert Assessment")
639
  st.markdown(feedback)
640
 
641
  st.info(f"Assessment completed in {feedback_time:.2f} seconds")
642
 
643
+ # Add potential next steps based on the score
644
  st.subheader("Recommended Next Steps")
645
+
646
+ # Find the weakest categories
647
  weakest_categories = sorted(category_scores.items(), key=lambda x: x[1])[:2]
648
 
649
  if overall_score >= 0.80: