CR7CAD commited on
Commit
848089c
·
verified ·
1 Parent(s): 63bc584

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +397 -684
app.py CHANGED
@@ -1,747 +1,460 @@
1
- import os
2
- import io
3
  import streamlit as st
4
- import docx
5
- import docx2txt
6
- import tempfile
7
- import time
8
- import re
9
- import math
10
- import concurrent.futures
11
  import pandas as pd
12
- from functools import lru_cache
13
- from transformers import pipeline
 
 
 
 
 
 
14
 
15
- # Set page title and hide sidebar
16
  st.set_page_config(
17
- page_title="Resume-Google Job Match Analyzer",
18
- initial_sidebar_state="collapsed"
 
 
19
  )
20
 
21
- # Hide sidebar completely with custom CSS
22
- st.markdown("""
23
- <style>
24
- [data-testid="collapsedControl"] {display: none;}
25
- section[data-testid="stSidebar"] {display: none;}
26
- </style>
27
- """, unsafe_allow_html=True)
 
 
 
28
 
29
- # Pre-defined company description for Google
30
- GOOGLE_DESCRIPTION = """Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong problem-solving skills, adaptability, and collaboration abilities. Technical roles require proficiency in programming languages such as Python, Java, C++, Go, or JavaScript, with expertise in data structures, algorithms, and system design. Additionally, skills in AI, cybersecurity, UX/UI design, and digital marketing are highly valued. Google fosters a culture of innovation, expecting candidates to demonstrate creativity, analytical thinking, and a passion for cutting-edge technology."""
31
 
32
- #####################################
33
- # Preload Models
34
- #####################################
35
- @st.cache_resource(show_spinner=True)
36
  def load_models():
37
- """Load models at startup"""
38
- with st.spinner("Loading AI models... This may take a minute on first run."):
39
- models = {}
40
- # Use bart-base for summarization
41
- models['summarizer'] = pipeline(
42
- "summarization",
43
- model="facebook/bart-base",
44
- max_length=100,
45
- truncation=True
46
- )
47
-
48
- # Load model for evaluation
49
- models['evaluator'] = pipeline(
50
- "text2text-generation",
51
- model="Qwen/Qwen2.5-0.5B-Instruct",
52
- max_length=300
53
- )
54
-
55
- return models
56
-
57
- # Preload models immediately when app starts
58
- models = load_models()
59
-
60
- #####################################
61
- # Function: Extract Text from File
62
- #####################################
63
- @st.cache_data(show_spinner=False)
64
- def extract_text_from_file(file_obj):
65
- """
66
- Extract text from .docx and .doc files.
67
- Returns the extracted text or an error message if extraction fails.
68
- """
69
- filename = file_obj.name
70
- ext = os.path.splitext(filename)[1].lower()
71
- text = ""
72
-
73
- if ext == ".docx":
74
- try:
75
- document = docx.Document(file_obj)
76
- text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
77
- except Exception as e:
78
- text = f"Error processing DOCX file: {e}"
79
- elif ext == ".doc":
80
- try:
81
- # For .doc files, we need to save to a temp file
82
- with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
83
- temp_file.write(file_obj.getvalue())
84
- temp_path = temp_file.name
85
-
86
- # Use docx2txt which is generally faster
87
- try:
88
- text = docx2txt.process(temp_path)
89
- except Exception:
90
- text = "Could not process .doc file. Please convert to .docx format."
91
-
92
- # Clean up temp file
93
- os.unlink(temp_path)
94
- except Exception as e:
95
- text = f"Error processing DOC file: {e}"
96
- elif ext == ".txt":
97
- try:
98
- text = file_obj.getvalue().decode("utf-8")
99
- except Exception as e:
100
- text = f"Error processing TXT file: {e}"
101
- else:
102
- text = "Unsupported file type. Please upload a .docx, .doc, or .txt file."
103
 
104
- # Limit text size for faster processing
105
- return text[:15000] if text else text
106
-
107
- #####################################
108
- # Functions for Information Extraction
109
- #####################################
110
-
111
- # Cache the extraction functions to avoid reprocessing
112
- @lru_cache(maxsize=32)
113
- def extract_name(text_start):
114
- """Extract candidate name from the beginning of resume text"""
115
- # Only use the first 500 characters to speed up processing
116
- lines = text_start.split('\n')
117
-
118
- # Check first few non-empty lines for potential names
119
- potential_name_lines = [line.strip() for line in lines[:5] if line.strip()]
120
-
121
- if potential_name_lines:
122
- # First line is often the name if it's short and doesn't contain common headers
123
- first_line = potential_name_lines[0]
124
- if 5 <= len(first_line) <= 40 and not any(x in first_line.lower() for x in ["resume", "cv", "curriculum", "vitae", "profile"]):
125
- return first_line
126
-
127
- # Look for lines that might contain a name
128
- for line in potential_name_lines[:3]:
129
- if len(line.split()) <= 4 and not any(x in line.lower() for x in ["address", "phone", "email", "resume", "cv"]):
130
- return line
131
-
132
- return "Unknown (please extract from resume)"
133
-
134
- def extract_age(text):
135
- """Extract candidate age from resume text"""
136
- # Simplified: just check a few common patterns
137
- age_patterns = [
138
- r'age:?\s*(\d{1,2})',
139
- r'(\d{1,2})\s*years\s*old',
140
- ]
141
 
142
- text_lower = text.lower()
143
- for pattern in age_patterns:
144
- matches = re.search(pattern, text_lower)
145
- if matches:
146
- return matches.group(1)
147
 
148
- return "Not specified"
149
 
150
- def extract_industry(text, base_summary):
151
- """Extract expected job industry from resume"""
152
- # Simplified industry keywords focused on the most common ones
153
- industry_keywords = {
154
- "technology": ["software", "programming", "developer", "IT", "tech", "computer"],
155
- "finance": ["banking", "financial", "accounting", "finance", "analyst"],
156
- "healthcare": ["medical", "health", "hospital", "clinical", "nurse", "doctor"],
157
- "education": ["teaching", "teacher", "professor", "education", "university"],
158
- "marketing": ["marketing", "advertising", "digital marketing", "social media"],
159
- "engineering": ["engineer", "engineering"],
160
- "data science": ["data science", "machine learning", "AI", "analytics"],
161
- "information systems": ["information systems", "ERP", "systems management"]
162
- }
163
 
164
- # Count occurrences of industry keywords - using the summary to speed up
165
- combined_text = base_summary.lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
- counts = {}
168
- for industry, keywords in industry_keywords.items():
169
- counts[industry] = sum(combined_text.count(keyword.lower()) for keyword in keywords)
 
170
 
171
- # Get the industry with the highest count
172
- if counts:
173
- likely_industry = max(counts.items(), key=lambda x: x[1])
174
- if likely_industry[1] > 0:
175
- return likely_industry[0].capitalize()
176
 
177
- # Check for educational background that might indicate industry
178
- degrees = ["computer science", "business", "engineering", "medicine", "education", "finance", "marketing"]
 
 
179
 
180
- for degree in degrees:
181
- if degree in combined_text:
182
- return f"{degree.capitalize()}-related field"
 
183
 
184
- return "Not clearly specified"
185
-
186
- def extract_skills_and_work(text):
187
- """Extract both skills and work experience at once to save processing time"""
188
- # Common skill categories - reduced keyword list for speed
189
- skill_categories = {
190
- "Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "Go"],
191
- "Data Science": ["Machine Learning", "Data Analysis", "Statistics", "TensorFlow", "PyTorch", "AI", "Algorithms"],
192
- "Database": ["SQL", "MySQL", "MongoDB", "Database", "NoSQL", "PostgreSQL"],
193
- "Web Development": ["React", "Angular", "Node.js", "Frontend", "Backend", "Full-Stack"],
194
- "Software Development": ["Agile", "Scrum", "Git", "DevOps", "Docker", "System Design"],
195
- "Cloud": ["AWS", "Azure", "Google Cloud", "Cloud Computing"],
196
- "Security": ["Cybersecurity", "Network Security", "Encryption", "Security"],
197
- "Business": ["Project Management", "Business Analysis", "Leadership", "Teamwork"],
198
- "Design": ["UX/UI", "User Experience", "Design Thinking", "Adobe"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  }
200
 
201
- # Work experience extraction
202
- work_headers = [
203
- "work experience", "professional experience", "employment history",
204
- "work history", "experience"
 
 
 
 
 
 
 
 
 
 
 
205
  ]
206
 
207
- next_section_headers = [
208
- "education", "skills", "certifications", "projects", "achievements"
 
209
  ]
210
 
211
- # Process everything at once
212
- lines = text.split('\n')
213
- text_lower = text.lower()
214
-
215
- # Skills extraction
216
- found_skills = []
217
- for category, skills in skill_categories.items():
218
- category_skills = []
219
- for skill in skills:
220
- if skill.lower() in text_lower:
221
- category_skills.append(skill)
222
-
223
- if category_skills:
224
- found_skills.append(f"{category}: {', '.join(category_skills)}")
225
 
226
- # Work experience extraction - simplified approach
227
- work_section = []
228
- in_work_section = False
229
-
230
- for idx, line in enumerate(lines):
231
- line_lower = line.lower().strip()
232
-
233
- # Start of work section
234
- if not in_work_section:
235
- if any(header in line_lower for header in work_headers):
236
- in_work_section = True
237
- continue
238
- # End of work section
239
- elif in_work_section:
240
- if any(header in line_lower for header in next_section_headers):
241
- break
242
-
243
- if line.strip():
244
- work_section.append(line.strip())
245
-
246
- # Simplified work formatting
247
- if not work_section:
248
- work_experience = "Work experience not clearly identified"
249
- else:
250
- # Just take the first 5-7 lines of the work section as a summary
251
- work_lines = []
252
- company_count = 0
253
- current_company = ""
254
-
255
- for line in work_section:
256
- # New company entry often has a date
257
- if re.search(r'(19|20)\d{2}', line):
258
- company_count += 1
259
- if company_count <= 3: # Limit to 3 most recent positions
260
- current_company = line
261
- work_lines.append(f"**{line}**")
262
- else:
263
- break
264
- elif company_count <= 3 and len(work_lines) < 10: # Limit total lines
265
- work_lines.append(line)
266
-
267
- work_experience = "\n• " + "\n• ".join(work_lines[:7]) if work_lines else "Work experience not clearly structured"
268
 
269
- skills_formatted = "\n• " + "\n• ".join(found_skills) if found_skills else "No specific technical skills clearly identified"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
- return skills_formatted, work_experience
272
-
273
- #####################################
274
- # Function: Summarize Resume Text
275
- #####################################
276
- def summarize_resume_text(resume_text):
277
- """
278
- Generates a structured summary of the resume text
279
- """
280
- start_time = time.time()
281
-
282
- # First, generate a quick summary using pre-loaded model
283
- max_input_length = 1024 # Model limit
284
-
285
- # Only summarize the first portion of text for speed
286
- text_to_summarize = resume_text[:min(len(resume_text), max_input_length)]
287
- base_summary = models['summarizer'](text_to_summarize)[0]['summary_text']
288
-
289
- # Extract information in parallel where possible
290
- with concurrent.futures.ThreadPoolExecutor() as executor:
291
- # These can run in parallel
292
- name_future = executor.submit(extract_name, resume_text[:500]) # Only use start of text
293
- age_future = executor.submit(extract_age, resume_text)
294
- industry_future = executor.submit(extract_industry, resume_text, base_summary)
295
- skills_work_future = executor.submit(extract_skills_and_work, resume_text)
296
-
297
- # Get results
298
- name = name_future.result()
299
- age = age_future.result()
300
- industry = industry_future.result()
301
- skills, work_experience = skills_work_future.result()
302
-
303
- # Format the structured summary
304
- formatted_summary = f"Name: {name}\n"
305
- formatted_summary += f"Age: {age}\n"
306
- formatted_summary += f"Expected Job Industry: {industry}\n\n"
307
- formatted_summary += f"Previous Work Experience: {work_experience}\n\n"
308
- formatted_summary += f"Skills: {skills}"
309
-
310
- execution_time = time.time() - start_time
311
-
312
- return formatted_summary, execution_time
313
 
314
- #####################################
315
- # Function: Analyze Google Fit
316
- #####################################
317
- def analyze_google_fit(resume_summary):
318
- """
319
- Analyze how well the candidate fits Google's requirements with detailed category breakdowns.
320
- """
321
- start_time = time.time()
322
-
323
- # Define Google's key skill categories with more detailed keywords
324
- google_keywords = {
325
- "technical_skills": ["python", "java", "c++", "javascript", "go", "sql", "algorithms", "data structures",
326
- "coding", "software development", "git", "programming", "backend", "frontend", "full-stack"],
327
- "advanced_tech": ["machine learning", "ai", "artificial intelligence", "cloud", "data science", "big data",
328
- "tensorflow", "deep learning", "distributed systems", "kubernetes", "microservices"],
329
- "problem_solving": ["problem solving", "analytical", "critical thinking", "troubleshooting", "debugging",
330
- "optimization", "scalability", "system design", "complexity", "efficiency"],
331
- "innovation": ["innovation", "creative", "creativity", "design thinking", "research", "novel solutions",
332
- "patents", "publications", "unique approaches", "cutting-edge"],
333
- "soft_skills": ["team", "leadership", "collaboration", "communication", "agile", "project management",
334
- "mentoring", "cross-functional", "presentation", "stakeholder management"]
335
- }
336
 
337
- # Category weights with descriptive labels
338
- category_weights = {
339
- "technical_skills": {"weight": 0.35, "label": "Technical Programming Skills"},
340
- "advanced_tech": {"weight": 0.25, "label": "Advanced Technology Knowledge"},
341
- "problem_solving": {"weight": 0.20, "label": "Problem Solving Abilities"},
342
- "innovation": {"weight": 0.10, "label": "Innovation Mindset"},
343
- "soft_skills": {"weight": 0.10, "label": "Collaboration & Leadership"}
344
- }
345
 
346
- resume_lower = resume_summary.lower()
 
347
 
348
- # Calculate category scores and store detailed information
349
- category_scores = {}
350
- category_details = {}
351
- found_skills = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
 
353
- for category, keywords in google_keywords.items():
354
- # Find the specific matching keywords for feedback
355
- category_matches = [keyword for keyword in keywords if keyword in resume_lower]
356
- found_skills[category] = category_matches
357
-
358
- # Count matches but cap at a reasonable level
359
- matches = len(category_matches)
360
- total_keywords = len(keywords)
361
-
362
- # Calculate raw percentage for this category
363
- raw_percentage = int((matches / total_keywords) * 100)
364
-
365
- # Apply logarithmic scaling for more realistic scores
366
- if matches == 0:
367
- adjusted_score = 0.0
368
  else:
369
- # Logarithmic scaling to prevent perfect scores
370
- adjusted_score = min(0.95, (math.log(matches + 1) / math.log(min(total_keywords, 8) + 1)))
371
 
372
- # Store both raw and adjusted scores for feedback
373
- category_scores[category] = adjusted_score
374
- category_details[category] = {
375
- "raw_percentage": raw_percentage,
376
- "adjusted_score": int(adjusted_score * 100),
377
- "matching_keywords": category_matches,
378
- "total_keywords": total_keywords,
379
- "matches": matches
 
 
380
  }
381
 
382
- # Calculate weighted score
383
- weighted_score = sum(score * category_weights[category]["weight"] for category, score in category_scores.items())
 
 
384
 
385
- # Apply final curve to keep scores in a realistic range
386
- match_percentage = min(92, max(35, int(weighted_score * 100)))
 
 
 
387
 
388
- # Get more specific information for a better prompt
389
- # Get top skills across all categories (up to 5 total)
390
- all_matching_skills = []
391
- for category, matches in found_skills.items():
392
- if matches:
393
- all_matching_skills.extend(matches)
394
 
395
- top_skills = list(set(all_matching_skills))[:5] # Remove duplicates and take top 5
396
- skills_text = ", ".join(top_skills) if top_skills else "limited relevant skills"
 
 
 
 
 
 
 
 
 
397
 
398
- # Get strongest and weakest categories for more specific feedback
399
- categories_sorted = sorted(category_details.items(), key=lambda x: x[1]["adjusted_score"], reverse=True)
400
- top_category = category_weights[categories_sorted[0][0]]["label"]
401
- weak_category = category_weights[categories_sorted[-1][0]]["label"]
402
 
403
- # Extract work experience highlights
404
- experience_match = re.search(r'Previous Work Experience:.*?(?=\n\n|$)', resume_summary, re.DOTALL)
405
- experience_text = experience_match.group(0) if experience_match else ""
 
 
406
 
407
- # Extract just 1-2 key experiences
408
- experiences = re.findall(r'([A-Z][^.]*?company|[A-Z][^.]*?engineer|[A-Z][^.]*?developer|[A-Z][^.]*?Google|[A-Z][^.]*?Microsoft|[A-Z][^.]*?Amazon)', experience_text)
409
- experience_highlights = ", ".join(experiences[:2]) if experiences else "work experience"
410
 
411
- # Create a more specific prompt for T5 that focuses on detailed assessment
412
- prompt = f"""
413
- Generate a professional expert assessment for a Google job candidate.
414
- Skills detected: {skills_text}.
415
- Strongest area: {top_category} ({categories_sorted[0][1]["adjusted_score"]}%).
416
- Weakest area: {weak_category} ({categories_sorted[-1][1]["adjusted_score"]}%).
417
- Overall match: {match_percentage}%.
418
-
419
- Write an evaluative assessment that analyzes the candidate's fit for Google.
420
- Start with "This candidate" and provide an expert evaluation of their Google fit.
421
-
422
- This candidate"""
423
-
424
- try:
425
- # Generate the assessment using T5
426
- assessment_results = models['evaluator'](
427
- prompt,
428
- max_length=300,
429
- do_sample=True,
430
- temperature=0.75,
431
- num_return_sequences=3
432
- )
433
 
434
- # Find the best response with much more thorough cleaning
435
- best_assessment = None
436
- for result in assessment_results:
437
- # Get the raw text
438
- raw_text = result['generated_text'].strip()
439
-
440
- # Extract just the part that starts with "This candidate"
441
- if "This candidate" in raw_text:
442
- # Find the start of the actual assessment
443
- start_idx = raw_text.find("This candidate")
444
- text = raw_text[start_idx:]
445
-
446
- # Check if it's actually an assessment (not just instructions)
447
- if len(text) > 50 and not any(x in text.lower() for x in [
448
- "actionable advice",
449
- "include specific",
450
- "make an assessment",
451
- "evaluate their",
452
- "assess their",
453
- "provide specific areas"
454
- ]):
455
- best_assessment = text
456
- break
457
-
458
- # Use the best response or generate a fallback if none were ideal
459
- if best_assessment:
460
- assessment = best_assessment
461
- else:
462
- # Generate a completely manual assessment since T5 responses contain too many instructions
463
- assessment = f"""This candidate demonstrates solid {top_category} with proficiency in {skills_text}.
464
- However, they would need to strengthen their {weak_category} to meet Google's high standards.
465
- To become more competitive, they should develop advanced problem-solving skills through algorithmic
466
- challenges and contribute to open-source projects. Overall, at {match_percentage}% match,
467
- they show potential but require targeted skill development before being ready for Google."""
468
-
469
- except Exception as e:
470
- # Fallback to a completely manual assessment
471
- print(f"Error in T5 assessment generation: {e}")
472
- assessment = f"""This candidate demonstrates solid {top_category} with proficiency in {skills_text}.
473
- However, they would need to strengthen their {weak_category} to meet Google's high standards.
474
- To become more competitive, they should develop advanced problem-solving skills through algorithmic
475
- challenges and contribute to open-source projects. Overall, at {match_percentage}% match,
476
- they show potential but require targeted skill development before being ready for Google."""
477
-
478
- # Final cleanup - more aggressive to remove any remaining instructions
479
- assessment = re.sub(r'include specific actionable advice.*?improvement\.', '', assessment, flags=re.DOTALL|re.IGNORECASE)
480
- assessment = re.sub(r'make an assessment.*?resume\.', '', assessment, flags=re.DOTALL|re.IGNORECASE)
481
- assessment = re.sub(r'evaluate their technical skills.*?google\.', '', assessment, flags=re.DOTALL|re.IGNORECASE)
482
- assessment = re.sub(r'assess their strengths.*?contributions', '', assessment, flags=re.DOTALL|re.IGNORECASE)
483
- assessment = re.sub(r'provide specific areas.*?needed', '', assessment, flags=re.DOTALL|re.IGNORECASE)
484
- assessment = re.sub(r'give an overall.*?google', '', assessment, flags=re.DOTALL|re.IGNORECASE)
485
-
486
- # Clean up any double spaces, newlines, etc.
487
- assessment = re.sub(r'\s+', ' ', assessment)
488
- assessment = assessment.strip()
489
-
490
- # If cleaning removed too much text, use the fallback
491
- if len(assessment) < 50 or not assessment.startswith("This candidate"):
492
- assessment = f"""This candidate demonstrates solid {top_category} with proficiency in {skills_text}.
493
- However, they would need to strengthen their {weak_category} to meet Google's high standards.
494
- To become more competitive, they should develop advanced problem-solving skills through algorithmic
495
- challenges and contribute to open-source projects. Overall, at {match_percentage}% match,
496
- they show potential but require targeted skill development before being ready for Google."""
497
 
498
- # Make sure percentages are consistent
499
- assessment = re.sub(r'\b\d{1,2}%\b', f"{match_percentage}%", assessment)
500
 
501
- execution_time = time.time() - start_time
 
502
 
503
- return assessment, match_percentage, category_details, execution_time
504
-
505
- def generate_expert_assessment(resume_summary, match_percentage, category_details, found_skills):
506
- """
507
- Generate a comprehensive expert assessment based on the resume analysis.
508
- This is a specialized function to create high-quality, specific assessments.
509
  """
510
- # Sort categories by score to identify top strengths and weaknesses
511
- categories = list(category_details.keys())
512
- categories.sort(key=lambda cat: category_details[cat]["adjusted_score"], reverse=True)
513
-
514
- # Identify top strengths (top 2 categories)
515
- top_strengths = categories[:2]
516
-
517
- # Identify main weaknesses (bottom 2 categories, but only if score is below 50%)
518
- weaknesses = [cat for cat in categories if category_details[cat]["adjusted_score"] < 50]
519
-
520
- # Extract relevant skills for top strengths (up to 3 skills per strength)
521
- strength_skills = []
522
- for category in top_strengths:
523
- matches = found_skills[category][:3] if found_skills[category] else []
524
- strength_skills.extend(matches)
525
-
526
- # Extract experience snippets from resume
527
- experience_match = re.search(r'Previous Work Experience:(.*?)(?=\n\n|$)', resume_summary, re.DOTALL)
528
- experience_text = experience_match.group(1) if experience_match else ""
529
-
530
- # Find relevant company names or roles that might be impressive
531
- company_pattern = r'\b(Google|Microsoft|Amazon|Apple|Facebook|Meta|Twitter|LinkedIn|Uber|Airbnb|Netflix|Oracle|IBM|Intel|Adobe|Salesforce)\b'
532
- companies = re.findall(company_pattern, experience_text, re.IGNORECASE)
533
-
534
- # Determine the expertise level based on score
535
- if match_percentage >= 75:
536
- expertise_level = "strong"
537
- elif match_percentage >= 60:
538
- expertise_level = "solid"
539
- elif match_percentage >= 45:
540
- expertise_level = "moderate"
541
- else:
542
- expertise_level = "limited"
543
-
544
- # Start building assessment
545
- assessment = f"This candidate demonstrates {expertise_level} potential for Google, with particular strengths in "
546
-
547
- # Add strengths with specific skills
548
- if top_strengths:
549
- strength_labels = []
550
- for strength in top_strengths:
551
- label = {"technical_skills": "technical programming",
552
- "advanced_tech": "advanced technology",
553
- "problem_solving": "problem-solving",
554
- "innovation": "innovation",
555
- "soft_skills": "collaboration and leadership"}[strength]
556
- strength_labels.append(label)
557
 
558
- assessment += f"{' and '.join(strength_labels)}. "
 
 
 
 
 
 
 
559
 
560
- # Add specific skills if available
561
- if strength_skills:
562
- assessment += f"Their experience with {', '.join(strength_skills[:4])} "
563
-
564
- # Add relevance to Google
565
- if any(skill in ['machine learning', 'ai', 'python', 'java', 'c++', 'cloud'] for skill in strength_skills):
566
- assessment += "directly aligns with Google's technical requirements. "
567
- else:
568
- assessment += "is relevant to Google's technology stack. "
569
- else:
570
- assessment += "few areas that align closely with Google's requirements. "
571
-
572
- # Add context from work experience if relevant companies found
573
- if companies:
574
- unique_companies = list(set([c.lower() for c in companies]))
575
- if len(unique_companies) > 1:
576
- assessment += f"Their experience at companies like {', '.join(unique_companies[:2])} provides valuable industry context. "
577
- else:
578
- assessment += f"Their experience at {unique_companies[0]} provides relevant industry context. "
579
-
580
- # Add weaknesses and improvement suggestions
581
- if weaknesses:
582
- assessment += "However, to improve their candidacy, they should strengthen their "
583
 
584
- weakness_labels = []
585
- for weakness in weaknesses[:2]: # Only mention top 2 weaknesses
586
- label = {"technical_skills": "technical programming skills",
587
- "advanced_tech": "knowledge of advanced technologies",
588
- "problem_solving": "problem-solving capabilities",
589
- "innovation": "innovation mindset",
590
- "soft_skills": "teamwork and collaboration abilities"}[weakness]
591
- weakness_labels.append(label)
592
 
593
- assessment += f"{' and '.join(weakness_labels)}, "
 
 
 
 
 
 
 
 
 
594
 
595
- # Add specific improvement suggestion
596
- if "technical_skills" in weaknesses:
597
- assessment += "particularly by building projects with modern languages like Python, Java, or Go. "
598
- elif "advanced_tech" in weaknesses:
599
- assessment += "ideally by gaining exposure to machine learning, cloud systems, or distributed computing. "
600
- elif "problem_solving" in weaknesses:
601
- assessment += "by practicing algorithmic problems and system design challenges. "
602
- elif "innovation" in weaknesses:
603
- assessment += "through projects that demonstrate creative thinking and novel solutions. "
604
- elif "soft_skills" in weaknesses:
605
- assessment += "by highlighting collaborative projects and leadership experiences. "
606
-
607
- # Add final evaluation with match percentage
608
- if match_percentage >= 70:
609
- assessment += f"Overall, this candidate shows good alignment with Google's culture of innovation and technical excellence, with a {match_percentage}% match to the company's requirements."
610
- elif match_percentage >= 50:
611
- assessment += f"With these improvements, the candidate could become more competitive for Google positions, currently showing a {match_percentage}% match to the company's requirements."
612
  else:
613
- assessment += f"Significant development in these areas would be needed before they could be considered a strong Google candidate, with a current match of {match_percentage}% to requirements."
 
614
 
615
  return assessment
616
 
617
- #####################################
618
- # Main Streamlit Interface
619
- #####################################
620
- st.title("Google Resume Match Analyzer")
621
- st.markdown(
622
- """
623
- Upload your resume file in **.docx**, **.doc**, or **.txt** format to see how well you match with Google's hiring requirements. The app performs the following tasks:
624
- 1. Extracts text from your resume.
625
- 2. Uses AI to generate a structured candidate summary.
626
- 3. Analyzes how well your profile fits Google's requirements.
627
- """
628
- )
629
 
630
- # Display Google's requirements
631
- with st.expander("Google's Requirements", expanded=False):
632
- st.write(GOOGLE_DESCRIPTION)
633
 
634
- # File uploader
635
- uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
 
 
 
636
 
637
- # Process button with optimized flow
638
- if uploaded_file is not None and st.button("Analyze My Google Fit"):
639
- # Create a placeholder for the progress bar
640
- progress_bar = st.progress(0)
641
- status_text = st.empty()
642
-
643
- # Step 1: Extract text
644
- status_text.text("Step 1/3: Extracting text from resume...")
645
- resume_text = extract_text_from_file(uploaded_file)
646
- progress_bar.progress(25)
647
-
648
- if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .docx, .doc, or .txt file.":
649
- st.error(resume_text)
650
- else:
651
- # Step 2: Generate summary
652
- status_text.text("Step 2/3: Analyzing resume and generating summary...")
653
- summary, summarization_time = summarize_resume_text(resume_text)
654
- progress_bar.progress(50)
655
-
656
- # Display summary
657
- st.subheader("Your Resume Summary")
658
- st.markdown(summary)
659
- st.info(f"Summary generated in {summarization_time:.2f} seconds")
660
-
661
- # Step 3: Generate Google fit assessment
662
- status_text.text("Step 3/3: Evaluating Google fit...")
663
- assessment, match_percentage, category_details, assessment_time = analyze_google_fit(summary)
664
- progress_bar.progress(100)
665
-
666
- # Clear status messages
667
- status_text.empty()
668
-
669
- # Display Google fit results
670
- st.subheader("Google Fit Assessment")
671
-
672
- # Display match percentage with appropriate color and emoji - with more realistic thresholds
673
- if match_percentage >= 85:
674
- st.success(f"**Overall Google Match Score:** {match_percentage}% 🌟")
675
- elif match_percentage >= 70:
676
- st.success(f"**Overall Google Match Score:** {match_percentage}% ✅")
677
- elif match_percentage >= 50:
678
- st.warning(f"**Overall Google Match Score:** {match_percentage}% ⚠️")
679
- else:
680
- st.error(f"**Overall Google Match Score:** {match_percentage}% 🔍")
681
-
682
- # NEW ADDITION: Add detailed score breakdown
683
- st.markdown("### Score Breakdown")
684
-
685
- # Create a neat table with category scores
686
- breakdown_data = []
687
- for category, details in category_details.items():
688
- label = {"technical_skills": "Technical Programming Skills",
689
- "advanced_tech": "Advanced Technology Knowledge",
690
- "problem_solving": "Problem Solving Abilities",
691
- "innovation": "Innovation Mindset",
692
- "soft_skills": "Collaboration & Leadership"}[category]
693
-
694
- # Create a visual indicator for the score
695
- score = details["adjusted_score"]
696
-
697
- # Add formatted breakdown row
698
- breakdown_data.append({
699
- "Category": label,
700
- "Score": f"{score}%",
701
- "Matching Skills": ", ".join(details["matching_keywords"][:3]) if details["matching_keywords"] else "None detected"
702
- })
703
-
704
- # Convert to DataFrame and display
705
- breakdown_df = pd.DataFrame(breakdown_data)
706
- # Remove the index column entirely
707
- st.table(breakdown_df.set_index('Category').reset_index()) # This removes the numerical index
708
-
709
- # Show a note about how scores are calculated
710
- with st.expander("How are these scores calculated?"):
711
- st.markdown("""
712
- - **Technical Programming Skills** (35% of total): Evaluates coding languages, software development tools, and core programming concepts
713
- - **Advanced Technology Knowledge** (25% of total): Assesses experience with cutting-edge technologies like AI, ML, cloud systems
714
- - **Problem Solving Abilities** (20% of total): Measures analytical thinking, algorithm design, and optimization skills
715
- - **Innovation Mindset** (10% of total): Looks for creativity, research orientation, and novel approaches
716
- - **Collaboration & Leadership** (10% of total): Evaluates team skills, communication, and project management
717
-
718
- Scores are calculated based on keyword matches in your resume, with diminishing returns applied (first few skills matter more than later ones).
719
- """)
720
 
721
- # Display assessment
722
- st.markdown("### Expert Assessment")
723
- st.markdown(assessment)
724
 
725
- st.info(f"Assessment completed in {assessment_time:.2f} seconds")
726
-
727
- # Add potential next steps based on the match percentage
728
- st.subheader("Recommended Next Steps")
729
-
730
- if match_percentage >= 80:
731
- st.markdown("""
732
- - Consider applying for positions at Google that match your experience
733
- - Prepare for technical interviews by practicing algorithms and system design
734
- - Review Google's interview process and STAR method for behavioral questions
735
- """)
736
- elif match_percentage >= 60:
737
- st.markdown("""
738
- - Focus on strengthening your technical skills and advanced technology knowledge
739
- - Work on projects that demonstrate your skills in Google's key technology areas
740
- - Consider taking additional courses in algorithms, system design, or other Google focus areas
741
- """)
742
- else:
743
- st.markdown("""
744
- - Build more relevant experience in software development or technical areas
745
- - Develop projects showcasing problem-solving abilities and technical skills
746
- - Consider gaining more experience before applying, or target specific Google roles that better match your profile
747
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
 
 
 
 
 
 
2
  import pandas as pd
3
+ import re
4
+ import json
5
+ import nltk
6
+ from nltk.corpus import stopwords
7
+ from nltk.tokenize import word_tokenize
8
+ import torch
9
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
10
+ import time
11
 
12
+ # Set page title and configuration
13
  st.set_page_config(
14
+ page_title="Resume-Job Fit Analyzer",
15
+ page_icon="📊",
16
+ layout="wide",
17
+ initial_sidebar_state="expanded"
18
  )
19
 
20
+ # Download NLTK resources if needed
21
+ @st.cache_resource
22
+ def download_nltk_resources():
23
+ try:
24
+ nltk.data.find('tokenizers/punkt')
25
+ nltk.data.find('corpora/stopwords')
26
+ except LookupError:
27
+ nltk.download('punkt')
28
+ nltk.download('stopwords')
29
+ return stopwords.words('english')
30
 
31
+ stop_words = download_nltk_resources()
 
32
 
33
+ # Load models
34
+ @st.cache_resource
 
 
35
  def load_models():
36
+ """Load and cache the NLP models"""
37
+ models = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ # Use BART for resume parsing
40
+ models['parser'] = pipeline(
41
+ "text2text-generation",
42
+ model="facebook/bart-base", # This would be the fine-tuned model in production
43
+ device=0 if torch.cuda.is_available() else -1
44
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ # Use Qwen for evaluation
47
+ models['evaluator'] = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
48
+ models['evaluator_tokenizer'] = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
 
 
49
 
50
+ return models
51
 
52
+ # Extract skills from text
53
+ def extract_skills(text, skill_keywords):
54
+ """Extract skills from text based on a predefined list of skills"""
55
+ found_skills = []
56
+ text_lower = text.lower()
 
 
 
 
 
 
 
 
57
 
58
+ for skill in skill_keywords:
59
+ # Create a regular expression pattern for whole word matching
60
+ pattern = r'\b' + re.escape(skill.lower()) + r'\b'
61
+ if re.search(pattern, text_lower):
62
+ found_skills.append(skill)
63
+
64
+ return list(set(found_skills))
65
+
66
+ # Parse resume
67
+ def parse_resume(resume_text, models):
68
+ """Extract structured information from resume text"""
69
+ # In production, this would use the fine-tuned BART model
70
+ # For now, we'll implement a simple rule-based parser
71
+
72
+ # Clean the text
73
+ clean_text = re.sub(r'\s+', ' ', resume_text).strip()
74
+
75
+ # Extract common skill keywords (this would be a more extensive list in production)
76
+ tech_skills = [
77
+ "Python", "Java", "C++", "JavaScript", "TypeScript", "Go", "Rust", "SQL",
78
+ "React", "Angular", "Vue", "Node.js", "Django", "Flask", "Spring",
79
+ "TensorFlow", "PyTorch", "Scikit-learn", "Machine Learning", "Deep Learning", "NLP",
80
+ "AWS", "Azure", "GCP", "Docker", "Kubernetes", "CI/CD", "Jenkins", "GitHub Actions",
81
+ "REST API", "GraphQL", "Microservices", "Serverless"
82
+ ]
83
 
84
+ soft_skills = [
85
+ "Leadership", "Communication", "Teamwork", "Problem-solving", "Critical thinking",
86
+ "Time management", "Adaptability", "Creativity", "Collaboration", "Presentation"
87
+ ]
88
 
89
+ # Extract skills
90
+ found_tech_skills = extract_skills(clean_text, tech_skills)
91
+ found_soft_skills = extract_skills(clean_text, soft_skills)
 
 
92
 
93
+ # Extract experience using regex patterns (simplified)
94
+ experience_pattern = r'(?:Experience|EXPERIENCE|Work Experience|WORK EXPERIENCE).*?(?:Education|EDUCATION|Skills|SKILLS|$)'
95
+ experience_match = re.search(experience_pattern, clean_text, re.DOTALL)
96
+ experience_text = experience_match.group(0) if experience_match else ""
97
 
98
+ # Extract education using regex patterns (simplified)
99
+ education_pattern = r'(?:Education|EDUCATION).*?(?:Skills|SKILLS|Experience|EXPERIENCE|$)'
100
+ education_match = re.search(education_pattern, clean_text, re.DOTALL)
101
+ education_text = education_match.group(0) if education_match else ""
102
 
103
+ # Estimate years of experience (simplified)
104
+ years_exp = 0
105
+ year_patterns = [
106
+ r'(\d{4})\s*-\s*(?:present|current|now|2023|2024|2025)',
107
+ r'(\d{4})\s*-\s*(\d{4})'
108
+ ]
109
+
110
+ for pattern in year_patterns:
111
+ matches = re.findall(pattern, clean_text, re.IGNORECASE)
112
+ for match in matches:
113
+ if isinstance(match, tuple):
114
+ start_year = int(match[0])
115
+ end_year = int(match[1]) if match[1].isdigit() else 2025
116
+ years_exp += (end_year - start_year)
117
+ else:
118
+ start_year = int(match)
119
+ years_exp += (2025 - start_year)
120
+
121
+ # Cap reasonable years
122
+ years_exp = min(years_exp, 30)
123
+
124
+ # Create structured data
125
+ structured_data = {
126
+ "skills": {
127
+ "technical": found_tech_skills,
128
+ "soft": found_soft_skills
129
+ },
130
+ "experience": {
131
+ "years": years_exp,
132
+ "summary": experience_text[:300] + "..." if len(experience_text) > 300 else experience_text
133
+ },
134
+ "education": education_text[:300] + "..." if len(education_text) > 300 else education_text
135
  }
136
 
137
+ return structured_data
138
+
139
+ # Parse job description
140
+ def parse_job_description(job_text):
141
+ """Extract key requirements from job description"""
142
+ # Clean the text
143
+ clean_text = re.sub(r'\s+', ' ', job_text).strip()
144
+
145
+ # Extract common skill keywords (same as resume parser)
146
+ tech_skills = [
147
+ "Python", "Java", "C++", "JavaScript", "TypeScript", "Go", "Rust", "SQL",
148
+ "React", "Angular", "Vue", "Node.js", "Django", "Flask", "Spring",
149
+ "TensorFlow", "PyTorch", "Scikit-learn", "Machine Learning", "Deep Learning", "NLP",
150
+ "AWS", "Azure", "GCP", "Docker", "Kubernetes", "CI/CD", "Jenkins", "GitHub Actions",
151
+ "REST API", "GraphQL", "Microservices", "Serverless"
152
  ]
153
 
154
+ soft_skills = [
155
+ "Leadership", "Communication", "Teamwork", "Problem-solving", "Critical thinking",
156
+ "Time management", "Adaptability", "Creativity", "Collaboration", "Presentation"
157
  ]
158
 
159
+ # Extract skills
160
+ required_tech_skills = extract_skills(clean_text, tech_skills)
161
+ required_soft_skills = extract_skills(clean_text, soft_skills)
 
 
 
 
 
 
 
 
 
 
 
162
 
163
+ # Extract years of experience requirement (simplified)
164
+ exp_patterns = [
165
+ r'(\d+)\+?\s*(?:years|yrs|yr)(?:\s*of)?\s*(?:experience|exp)',
166
+ r'(?:experience|exp)(?:\s*of)?\s*(\d+)\+?\s*(?:years|yrs|yr)'
167
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
+ required_years = 0
170
+ for pattern in exp_patterns:
171
+ matches = re.findall(pattern, clean_text, re.IGNORECASE)
172
+ if matches:
173
+ # Take the highest mentioned years
174
+ required_years = max([int(y) for y in matches if y.isdigit()] + [required_years])
175
+
176
+ # Extract job title
177
+ title_pattern = r'^(.*?)(?:\n|$)'
178
+ title_match = re.search(title_pattern, clean_text)
179
+ job_title = title_match.group(1).strip() if title_match else "Not specified"
180
+
181
+ # Create structured data
182
+ structured_data = {
183
+ "title": job_title,
184
+ "requirements": {
185
+ "technical_skills": required_tech_skills,
186
+ "soft_skills": required_soft_skills,
187
+ "years_experience": required_years
188
+ },
189
+ "full_text": job_text
190
+ }
191
 
192
+ return structured_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
+ # Calculate match score
195
+ def calculate_match_score(resume_data, job_data):
196
+ """Calculate how well the resume matches the job description"""
197
+ scores = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
+ # Calculate skill match percentage
200
+ required_tech_skills = set(job_data["requirements"]["technical_skills"])
201
+ candidate_tech_skills = set(resume_data["skills"]["technical"])
 
 
 
 
 
202
 
203
+ required_soft_skills = set(job_data["requirements"]["soft_skills"])
204
+ candidate_soft_skills = set(resume_data["skills"]["soft"])
205
 
206
+ if required_tech_skills:
207
+ tech_match = len(candidate_tech_skills.intersection(required_tech_skills)) / len(required_tech_skills)
208
+ scores["technical_skills"] = {
209
+ "score": int(tech_match * 100),
210
+ "matched": list(candidate_tech_skills.intersection(required_tech_skills)),
211
+ "missing": list(required_tech_skills - candidate_tech_skills)
212
+ }
213
+ else:
214
+ scores["technical_skills"] = {"score": 0, "matched": [], "missing": []}
215
+
216
+ if required_soft_skills:
217
+ soft_match = len(candidate_soft_skills.intersection(required_soft_skills)) / len(required_soft_skills)
218
+ scores["soft_skills"] = {
219
+ "score": int(soft_match * 100),
220
+ "matched": list(candidate_soft_skills.intersection(required_soft_skills)),
221
+ "missing": list(required_soft_skills - candidate_soft_skills)
222
+ }
223
+ else:
224
+ scores["soft_skills"] = {"score": 0, "matched": [], "missing": []}
225
 
226
+ # Experience match
227
+ required_years = job_data["requirements"]["years_experience"]
228
+ candidate_years = resume_data["experience"]["years"]
229
+
230
+ if required_years > 0:
231
+ if candidate_years >= required_years:
232
+ exp_score = 100
 
 
 
 
 
 
 
 
233
  else:
234
+ exp_score = int((candidate_years / required_years) * 100)
 
235
 
236
+ scores["experience"] = {
237
+ "score": exp_score,
238
+ "candidate_years": candidate_years,
239
+ "required_years": required_years
240
+ }
241
+ else:
242
+ scores["experience"] = {
243
+ "score": 100 if candidate_years > 0 else 50,
244
+ "candidate_years": candidate_years,
245
+ "required_years": "Not specified"
246
  }
247
 
248
+ # Calculate overall score (weighted)
249
+ tech_weight = 0.6
250
+ soft_weight = 0.2
251
+ exp_weight = 0.2
252
 
253
+ overall_score = (
254
+ scores["technical_skills"]["score"] * tech_weight +
255
+ scores["soft_skills"]["score"] * soft_weight +
256
+ scores["experience"]["score"] * exp_weight
257
+ )
258
 
259
+ scores["overall"] = int(overall_score)
 
 
 
 
 
260
 
261
+ return scores
262
+
263
+ # Generate expert assessment using Qwen
264
+ def generate_assessment(resume_data, job_data, match_scores, models):
265
+ """Generate an expert assessment using Qwen model"""
266
+ # Prepare context
267
+ job_title = job_data["title"]
268
+ matched_skills = match_scores["technical_skills"]["matched"]
269
+ missing_skills = match_scores["technical_skills"]["missing"]
270
+ experience_match = match_scores["experience"]
271
+ overall_score = match_scores["overall"]
272
 
273
+ # Determine fit classification
274
+ fit_status = "FIT" if overall_score >= 70 else "NOT FIT"
 
 
275
 
276
+ # Create prompt for Qwen
277
+ prompt = f"""
278
+ <|im_start|>system
279
+ You are an expert resume evaluator. Analyze how well a candidate fits a job posting and provide professional feedback.
280
+ <|im_end|>
281
 
282
+ <|im_start|>user
283
+ Evaluate this candidate for a {job_title} position.
 
284
 
285
+ Overall match score: {overall_score}%
286
+ Technical skills match: {match_scores["technical_skills"]["score"]}%
287
+ Soft skills match: {match_scores["soft_skills"]["score"]}%
288
+ Experience match: {experience_match["score"]}%
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
290
+ Candidate has: {experience_match["candidate_years"]} years of experience
291
+ Position requires: {experience_match["required_years"]} years of experience
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
 
293
+ Matched technical skills: {", ".join(matched_skills) if matched_skills else "None"}
294
+ Missing technical skills: {", ".join(missing_skills) if missing_skills else "None"}
295
 
296
+ Create a professional assessment of this candidate. First state whether they are a FIT or NOT FIT for the position, then explain why with specific strengths and development areas.
297
+ <|im_end|>
298
 
299
+ <|im_start|>assistant
 
 
 
 
 
300
  """
301
+
302
+ try:
303
+ # Generate the assessment using Qwen
304
+ tokenizer = models['evaluator_tokenizer']
305
+ qwen_model = models['evaluator']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
+ inputs = tokenizer(prompt, return_tensors="pt")
308
+ outputs = qwen_model.generate(
309
+ inputs.input_ids,
310
+ max_new_tokens=512,
311
+ do_sample=True,
312
+ temperature=0.7,
313
+ top_p=0.9
314
+ )
315
 
316
+ assessment = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
 
318
+ # Extract the assistant's response
319
+ if "<|im_start|>assistant" in assessment:
320
+ assessment = assessment.split("<|im_start|>assistant")[-1]
 
 
 
 
 
321
 
322
+ # Clean up any remaining markers
323
+ assessment = re.sub(r'<\|im_(start|end)\|>', '', assessment)
324
+ assessment = assessment.strip()
325
+
326
+ # If no assessment was generated, create a fallback
327
+ if not assessment or len(assessment) < 50:
328
+ assessment = generate_fallback_assessment(resume_data, job_data, match_scores, fit_status)
329
+ except Exception as e:
330
+ st.error(f"Error generating assessment: {str(e)}")
331
+ assessment = generate_fallback_assessment(resume_data, job_data, match_scores, fit_status)
332
 
333
+ return assessment, fit_status
334
+
335
+ # Generate fallback assessment
336
+ def generate_fallback_assessment(resume_data, job_data, match_scores, fit_status):
337
+ """Generate a fallback assessment if the model fails"""
338
+ job_title = job_data["title"]
339
+ matched_skills = match_scores["technical_skills"]["matched"]
340
+ missing_skills = match_scores["technical_skills"]["missing"]
341
+ overall_score = match_scores["overall"]
342
+
343
+ if fit_status == "FIT":
344
+ assessment = f"""FIT: This candidate demonstrates a strong alignment with the {job_title} position, achieving an overall match score of {overall_score}%. Their proficiency in {', '.join(matched_skills) if matched_skills else 'relevant skills'} positions them well to contribute effectively from the start. The candidate's experience level is suitable for the role's requirements. To maximize their success, they could consider developing expertise in {', '.join(missing_skills) if missing_skills else 'additional specialized areas relevant to this role'}.
345
+ """
 
 
 
 
346
  else:
347
+ assessment = f"""NOT FIT: This candidate currently shows limited alignment with the {job_title} position, with an overall match score of {overall_score}%. While they demonstrate some relevant capabilities in {', '.join(matched_skills) if matched_skills else 'a few areas'}, they would need to develop expertise in critical areas such as {', '.join(missing_skills) if missing_skills else 'key technical requirements for this position'}. The candidate may become more competitive for this role by focusing on these skill gaps and gaining more relevant experience.
348
+ """
349
 
350
  return assessment
351
 
352
+ # Create the main header and interface
353
+ st.title("Resume-Job Fit Analyzer")
354
+ st.markdown("### Evaluate how well a resume matches a job description")
 
 
 
 
 
 
 
 
 
355
 
356
+ # Setup columns for input
357
+ col1, col2 = st.columns(2)
 
358
 
359
+ with col1:
360
+ # Resume input
361
+ st.subheader("Resume")
362
+ resume_text = st.text_area("Paste resume text here", height=300,
363
+ placeholder="Paste the candidate's resume text here...")
364
 
365
+ with col2:
366
+ # Job description input
367
+ st.subheader("Job Description")
368
+ job_description = st.text_area("Paste job description here", height=300,
369
+ placeholder="Paste the job description here...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
 
371
+ # Analysis button
372
+ analyze_button = st.button("Analyze Match", type="primary", use_container_width=True)
 
373
 
374
+ # Main analysis logic
375
+ if analyze_button:
376
+ if not resume_text or not job_description:
377
+ st.error("Please provide both a resume and a job description.")
378
+ else:
379
+ with st.spinner("Analyzing resume and job match..."):
380
+ # Record start time
381
+ start_time = time.time()
382
+
383
+ # Load models (uses caching so only loads once)
384
+ models = load_models()
385
+
386
+ # Parse resume and job description
387
+ resume_data = parse_resume(resume_text, models)
388
+ job_data = parse_job_description(job_description)
389
+
390
+ # Calculate match score
391
+ match_scores = calculate_match_score(resume_data, job_data)
392
+
393
+ # Generate assessment
394
+ assessment, fit_status = generate_assessment(resume_data, job_data, match_scores, models)
395
+
396
+ # Calculate execution time
397
+ execution_time = time.time() - start_time
398
+
399
+ # Display results
400
+ st.success(f"Analysis complete in {execution_time:.2f} seconds")
401
+
402
+ # Display fit status prominently
403
+ st.markdown(f"## Overall Result: {fit_status}")
404
+
405
+ # Display match score
406
+ st.subheader("Match Score")
407
+ score_col1, score_col2, score_col3 = st.columns(3)
408
+
409
+ with score_col1:
410
+ st.metric("Overall Match", f"{match_scores['overall']}%")
411
+
412
+ with score_col2:
413
+ st.metric("Technical Skills", f"{match_scores['technical_skills']['score']}%")
414
+
415
+ with score_col3:
416
+ st.metric("Experience Match", f"{match_scores['experience']['score']}%")
417
+
418
+ # Show skills breakdown
419
+ st.subheader("Skills Breakdown")
420
+ skill_col1, skill_col2 = st.columns(2)
421
+
422
+ with skill_col1:
423
+ st.markdown("##### Matched Skills")
424
+ if match_scores["technical_skills"]["matched"]:
425
+ for skill in match_scores["technical_skills"]["matched"]:
426
+ st.markdown(f"✅ {skill}")
427
+ else:
428
+ st.markdown("No matched skills found")
429
+
430
+ with skill_col2:
431
+ st.markdown("##### Missing Skills")
432
+ if match_scores["technical_skills"]["missing"]:
433
+ for skill in match_scores["technical_skills"]["missing"]:
434
+ st.markdown(f"❌ {skill}")
435
+ else:
436
+ st.markdown("No missing skills detected")
437
+
438
+ # Show experience comparison
439
+ st.subheader("Experience")
440
+ exp_col1, exp_col2 = st.columns(2)
441
+
442
+ with exp_col1:
443
+ st.markdown(f"**Required**: {job_data['requirements']['years_experience']} years")
444
+
445
+ with exp_col2:
446
+ st.markdown(f"**Candidate has**: {resume_data['experience']['years']} years")
447
+
448
+ # Display detailed assessment
449
+ st.subheader("Expert Assessment")
450
+ st.markdown(assessment)
451
+
452
+ # Show parsed data (expandable)
453
+ with st.expander("View Parsed Data"):
454
+ col1, col2 = st.columns(2)
455
+ with col1:
456
+ st.subheader("Resume Data")
457
+ st.json(resume_data)
458
+ with col2:
459
+ st.subheader("Job Requirements")
460
+ st.json(job_data)