Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,747 +1,460 @@
|
|
1 |
-
import os
|
2 |
-
import io
|
3 |
import streamlit as st
|
4 |
-
import docx
|
5 |
-
import docx2txt
|
6 |
-
import tempfile
|
7 |
-
import time
|
8 |
-
import re
|
9 |
-
import math
|
10 |
-
import concurrent.futures
|
11 |
import pandas as pd
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
# Set page title and
|
16 |
st.set_page_config(
|
17 |
-
page_title="Resume-
|
18 |
-
|
|
|
|
|
19 |
)
|
20 |
|
21 |
-
#
|
22 |
-
st.
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
28 |
|
29 |
-
|
30 |
-
GOOGLE_DESCRIPTION = """Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong problem-solving skills, adaptability, and collaboration abilities. Technical roles require proficiency in programming languages such as Python, Java, C++, Go, or JavaScript, with expertise in data structures, algorithms, and system design. Additionally, skills in AI, cybersecurity, UX/UI design, and digital marketing are highly valued. Google fosters a culture of innovation, expecting candidates to demonstrate creativity, analytical thinking, and a passion for cutting-edge technology."""
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
#####################################
|
35 |
-
@st.cache_resource(show_spinner=True)
|
36 |
def load_models():
|
37 |
-
"""Load
|
38 |
-
|
39 |
-
models = {}
|
40 |
-
# Use bart-base for summarization
|
41 |
-
models['summarizer'] = pipeline(
|
42 |
-
"summarization",
|
43 |
-
model="facebook/bart-base",
|
44 |
-
max_length=100,
|
45 |
-
truncation=True
|
46 |
-
)
|
47 |
-
|
48 |
-
# Load model for evaluation
|
49 |
-
models['evaluator'] = pipeline(
|
50 |
-
"text2text-generation",
|
51 |
-
model="Qwen/Qwen2.5-0.5B-Instruct",
|
52 |
-
max_length=300
|
53 |
-
)
|
54 |
-
|
55 |
-
return models
|
56 |
-
|
57 |
-
# Preload models immediately when app starts
|
58 |
-
models = load_models()
|
59 |
-
|
60 |
-
#####################################
|
61 |
-
# Function: Extract Text from File
|
62 |
-
#####################################
|
63 |
-
@st.cache_data(show_spinner=False)
|
64 |
-
def extract_text_from_file(file_obj):
|
65 |
-
"""
|
66 |
-
Extract text from .docx and .doc files.
|
67 |
-
Returns the extracted text or an error message if extraction fails.
|
68 |
-
"""
|
69 |
-
filename = file_obj.name
|
70 |
-
ext = os.path.splitext(filename)[1].lower()
|
71 |
-
text = ""
|
72 |
-
|
73 |
-
if ext == ".docx":
|
74 |
-
try:
|
75 |
-
document = docx.Document(file_obj)
|
76 |
-
text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
|
77 |
-
except Exception as e:
|
78 |
-
text = f"Error processing DOCX file: {e}"
|
79 |
-
elif ext == ".doc":
|
80 |
-
try:
|
81 |
-
# For .doc files, we need to save to a temp file
|
82 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
|
83 |
-
temp_file.write(file_obj.getvalue())
|
84 |
-
temp_path = temp_file.name
|
85 |
-
|
86 |
-
# Use docx2txt which is generally faster
|
87 |
-
try:
|
88 |
-
text = docx2txt.process(temp_path)
|
89 |
-
except Exception:
|
90 |
-
text = "Could not process .doc file. Please convert to .docx format."
|
91 |
-
|
92 |
-
# Clean up temp file
|
93 |
-
os.unlink(temp_path)
|
94 |
-
except Exception as e:
|
95 |
-
text = f"Error processing DOC file: {e}"
|
96 |
-
elif ext == ".txt":
|
97 |
-
try:
|
98 |
-
text = file_obj.getvalue().decode("utf-8")
|
99 |
-
except Exception as e:
|
100 |
-
text = f"Error processing TXT file: {e}"
|
101 |
-
else:
|
102 |
-
text = "Unsupported file type. Please upload a .docx, .doc, or .txt file."
|
103 |
|
104 |
-
#
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
# Cache the extraction functions to avoid reprocessing
|
112 |
-
@lru_cache(maxsize=32)
|
113 |
-
def extract_name(text_start):
|
114 |
-
"""Extract candidate name from the beginning of resume text"""
|
115 |
-
# Only use the first 500 characters to speed up processing
|
116 |
-
lines = text_start.split('\n')
|
117 |
-
|
118 |
-
# Check first few non-empty lines for potential names
|
119 |
-
potential_name_lines = [line.strip() for line in lines[:5] if line.strip()]
|
120 |
-
|
121 |
-
if potential_name_lines:
|
122 |
-
# First line is often the name if it's short and doesn't contain common headers
|
123 |
-
first_line = potential_name_lines[0]
|
124 |
-
if 5 <= len(first_line) <= 40 and not any(x in first_line.lower() for x in ["resume", "cv", "curriculum", "vitae", "profile"]):
|
125 |
-
return first_line
|
126 |
-
|
127 |
-
# Look for lines that might contain a name
|
128 |
-
for line in potential_name_lines[:3]:
|
129 |
-
if len(line.split()) <= 4 and not any(x in line.lower() for x in ["address", "phone", "email", "resume", "cv"]):
|
130 |
-
return line
|
131 |
-
|
132 |
-
return "Unknown (please extract from resume)"
|
133 |
-
|
134 |
-
def extract_age(text):
|
135 |
-
"""Extract candidate age from resume text"""
|
136 |
-
# Simplified: just check a few common patterns
|
137 |
-
age_patterns = [
|
138 |
-
r'age:?\s*(\d{1,2})',
|
139 |
-
r'(\d{1,2})\s*years\s*old',
|
140 |
-
]
|
141 |
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
if matches:
|
146 |
-
return matches.group(1)
|
147 |
|
148 |
-
return
|
149 |
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
"finance": ["banking", "financial", "accounting", "finance", "analyst"],
|
156 |
-
"healthcare": ["medical", "health", "hospital", "clinical", "nurse", "doctor"],
|
157 |
-
"education": ["teaching", "teacher", "professor", "education", "university"],
|
158 |
-
"marketing": ["marketing", "advertising", "digital marketing", "social media"],
|
159 |
-
"engineering": ["engineer", "engineering"],
|
160 |
-
"data science": ["data science", "machine learning", "AI", "analytics"],
|
161 |
-
"information systems": ["information systems", "ERP", "systems management"]
|
162 |
-
}
|
163 |
|
164 |
-
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
-
|
168 |
-
|
169 |
-
|
|
|
170 |
|
171 |
-
#
|
172 |
-
|
173 |
-
|
174 |
-
if likely_industry[1] > 0:
|
175 |
-
return likely_industry[0].capitalize()
|
176 |
|
177 |
-
#
|
178 |
-
|
|
|
|
|
179 |
|
180 |
-
|
181 |
-
|
182 |
-
|
|
|
183 |
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
}
|
200 |
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
]
|
206 |
|
207 |
-
|
208 |
-
"
|
|
|
209 |
]
|
210 |
|
211 |
-
#
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
# Skills extraction
|
216 |
-
found_skills = []
|
217 |
-
for category, skills in skill_categories.items():
|
218 |
-
category_skills = []
|
219 |
-
for skill in skills:
|
220 |
-
if skill.lower() in text_lower:
|
221 |
-
category_skills.append(skill)
|
222 |
-
|
223 |
-
if category_skills:
|
224 |
-
found_skills.append(f"{category}: {', '.join(category_skills)}")
|
225 |
|
226 |
-
#
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
line_lower = line.lower().strip()
|
232 |
-
|
233 |
-
# Start of work section
|
234 |
-
if not in_work_section:
|
235 |
-
if any(header in line_lower for header in work_headers):
|
236 |
-
in_work_section = True
|
237 |
-
continue
|
238 |
-
# End of work section
|
239 |
-
elif in_work_section:
|
240 |
-
if any(header in line_lower for header in next_section_headers):
|
241 |
-
break
|
242 |
-
|
243 |
-
if line.strip():
|
244 |
-
work_section.append(line.strip())
|
245 |
-
|
246 |
-
# Simplified work formatting
|
247 |
-
if not work_section:
|
248 |
-
work_experience = "Work experience not clearly identified"
|
249 |
-
else:
|
250 |
-
# Just take the first 5-7 lines of the work section as a summary
|
251 |
-
work_lines = []
|
252 |
-
company_count = 0
|
253 |
-
current_company = ""
|
254 |
-
|
255 |
-
for line in work_section:
|
256 |
-
# New company entry often has a date
|
257 |
-
if re.search(r'(19|20)\d{2}', line):
|
258 |
-
company_count += 1
|
259 |
-
if company_count <= 3: # Limit to 3 most recent positions
|
260 |
-
current_company = line
|
261 |
-
work_lines.append(f"**{line}**")
|
262 |
-
else:
|
263 |
-
break
|
264 |
-
elif company_count <= 3 and len(work_lines) < 10: # Limit total lines
|
265 |
-
work_lines.append(line)
|
266 |
-
|
267 |
-
work_experience = "\n• " + "\n• ".join(work_lines[:7]) if work_lines else "Work experience not clearly structured"
|
268 |
|
269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
|
271 |
-
return
|
272 |
-
|
273 |
-
#####################################
|
274 |
-
# Function: Summarize Resume Text
|
275 |
-
#####################################
|
276 |
-
def summarize_resume_text(resume_text):
|
277 |
-
"""
|
278 |
-
Generates a structured summary of the resume text
|
279 |
-
"""
|
280 |
-
start_time = time.time()
|
281 |
-
|
282 |
-
# First, generate a quick summary using pre-loaded model
|
283 |
-
max_input_length = 1024 # Model limit
|
284 |
-
|
285 |
-
# Only summarize the first portion of text for speed
|
286 |
-
text_to_summarize = resume_text[:min(len(resume_text), max_input_length)]
|
287 |
-
base_summary = models['summarizer'](text_to_summarize)[0]['summary_text']
|
288 |
-
|
289 |
-
# Extract information in parallel where possible
|
290 |
-
with concurrent.futures.ThreadPoolExecutor() as executor:
|
291 |
-
# These can run in parallel
|
292 |
-
name_future = executor.submit(extract_name, resume_text[:500]) # Only use start of text
|
293 |
-
age_future = executor.submit(extract_age, resume_text)
|
294 |
-
industry_future = executor.submit(extract_industry, resume_text, base_summary)
|
295 |
-
skills_work_future = executor.submit(extract_skills_and_work, resume_text)
|
296 |
-
|
297 |
-
# Get results
|
298 |
-
name = name_future.result()
|
299 |
-
age = age_future.result()
|
300 |
-
industry = industry_future.result()
|
301 |
-
skills, work_experience = skills_work_future.result()
|
302 |
-
|
303 |
-
# Format the structured summary
|
304 |
-
formatted_summary = f"Name: {name}\n"
|
305 |
-
formatted_summary += f"Age: {age}\n"
|
306 |
-
formatted_summary += f"Expected Job Industry: {industry}\n\n"
|
307 |
-
formatted_summary += f"Previous Work Experience: {work_experience}\n\n"
|
308 |
-
formatted_summary += f"Skills: {skills}"
|
309 |
-
|
310 |
-
execution_time = time.time() - start_time
|
311 |
-
|
312 |
-
return formatted_summary, execution_time
|
313 |
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
"""
|
319 |
-
Analyze how well the candidate fits Google's requirements with detailed category breakdowns.
|
320 |
-
"""
|
321 |
-
start_time = time.time()
|
322 |
-
|
323 |
-
# Define Google's key skill categories with more detailed keywords
|
324 |
-
google_keywords = {
|
325 |
-
"technical_skills": ["python", "java", "c++", "javascript", "go", "sql", "algorithms", "data structures",
|
326 |
-
"coding", "software development", "git", "programming", "backend", "frontend", "full-stack"],
|
327 |
-
"advanced_tech": ["machine learning", "ai", "artificial intelligence", "cloud", "data science", "big data",
|
328 |
-
"tensorflow", "deep learning", "distributed systems", "kubernetes", "microservices"],
|
329 |
-
"problem_solving": ["problem solving", "analytical", "critical thinking", "troubleshooting", "debugging",
|
330 |
-
"optimization", "scalability", "system design", "complexity", "efficiency"],
|
331 |
-
"innovation": ["innovation", "creative", "creativity", "design thinking", "research", "novel solutions",
|
332 |
-
"patents", "publications", "unique approaches", "cutting-edge"],
|
333 |
-
"soft_skills": ["team", "leadership", "collaboration", "communication", "agile", "project management",
|
334 |
-
"mentoring", "cross-functional", "presentation", "stakeholder management"]
|
335 |
-
}
|
336 |
|
337 |
-
#
|
338 |
-
|
339 |
-
|
340 |
-
"advanced_tech": {"weight": 0.25, "label": "Advanced Technology Knowledge"},
|
341 |
-
"problem_solving": {"weight": 0.20, "label": "Problem Solving Abilities"},
|
342 |
-
"innovation": {"weight": 0.10, "label": "Innovation Mindset"},
|
343 |
-
"soft_skills": {"weight": 0.10, "label": "Collaboration & Leadership"}
|
344 |
-
}
|
345 |
|
346 |
-
|
|
|
347 |
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
352 |
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
total_keywords = len(keywords)
|
361 |
-
|
362 |
-
# Calculate raw percentage for this category
|
363 |
-
raw_percentage = int((matches / total_keywords) * 100)
|
364 |
-
|
365 |
-
# Apply logarithmic scaling for more realistic scores
|
366 |
-
if matches == 0:
|
367 |
-
adjusted_score = 0.0
|
368 |
else:
|
369 |
-
|
370 |
-
adjusted_score = min(0.95, (math.log(matches + 1) / math.log(min(total_keywords, 8) + 1)))
|
371 |
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
"
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
"
|
|
|
|
|
380 |
}
|
381 |
|
382 |
-
# Calculate
|
383 |
-
|
|
|
|
|
384 |
|
385 |
-
|
386 |
-
|
|
|
|
|
|
|
387 |
|
388 |
-
|
389 |
-
# Get top skills across all categories (up to 5 total)
|
390 |
-
all_matching_skills = []
|
391 |
-
for category, matches in found_skills.items():
|
392 |
-
if matches:
|
393 |
-
all_matching_skills.extend(matches)
|
394 |
|
395 |
-
|
396 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
|
398 |
-
#
|
399 |
-
|
400 |
-
top_category = category_weights[categories_sorted[0][0]]["label"]
|
401 |
-
weak_category = category_weights[categories_sorted[-1][0]]["label"]
|
402 |
|
403 |
-
#
|
404 |
-
|
405 |
-
|
|
|
|
|
406 |
|
407 |
-
|
408 |
-
|
409 |
-
experience_highlights = ", ".join(experiences[:2]) if experiences else "work experience"
|
410 |
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
Strongest area: {top_category} ({categories_sorted[0][1]["adjusted_score"]}%).
|
416 |
-
Weakest area: {weak_category} ({categories_sorted[-1][1]["adjusted_score"]}%).
|
417 |
-
Overall match: {match_percentage}%.
|
418 |
-
|
419 |
-
Write an evaluative assessment that analyzes the candidate's fit for Google.
|
420 |
-
Start with "This candidate" and provide an expert evaluation of their Google fit.
|
421 |
-
|
422 |
-
This candidate"""
|
423 |
-
|
424 |
-
try:
|
425 |
-
# Generate the assessment using T5
|
426 |
-
assessment_results = models['evaluator'](
|
427 |
-
prompt,
|
428 |
-
max_length=300,
|
429 |
-
do_sample=True,
|
430 |
-
temperature=0.75,
|
431 |
-
num_return_sequences=3
|
432 |
-
)
|
433 |
|
434 |
-
|
435 |
-
|
436 |
-
for result in assessment_results:
|
437 |
-
# Get the raw text
|
438 |
-
raw_text = result['generated_text'].strip()
|
439 |
-
|
440 |
-
# Extract just the part that starts with "This candidate"
|
441 |
-
if "This candidate" in raw_text:
|
442 |
-
# Find the start of the actual assessment
|
443 |
-
start_idx = raw_text.find("This candidate")
|
444 |
-
text = raw_text[start_idx:]
|
445 |
-
|
446 |
-
# Check if it's actually an assessment (not just instructions)
|
447 |
-
if len(text) > 50 and not any(x in text.lower() for x in [
|
448 |
-
"actionable advice",
|
449 |
-
"include specific",
|
450 |
-
"make an assessment",
|
451 |
-
"evaluate their",
|
452 |
-
"assess their",
|
453 |
-
"provide specific areas"
|
454 |
-
]):
|
455 |
-
best_assessment = text
|
456 |
-
break
|
457 |
-
|
458 |
-
# Use the best response or generate a fallback if none were ideal
|
459 |
-
if best_assessment:
|
460 |
-
assessment = best_assessment
|
461 |
-
else:
|
462 |
-
# Generate a completely manual assessment since T5 responses contain too many instructions
|
463 |
-
assessment = f"""This candidate demonstrates solid {top_category} with proficiency in {skills_text}.
|
464 |
-
However, they would need to strengthen their {weak_category} to meet Google's high standards.
|
465 |
-
To become more competitive, they should develop advanced problem-solving skills through algorithmic
|
466 |
-
challenges and contribute to open-source projects. Overall, at {match_percentage}% match,
|
467 |
-
they show potential but require targeted skill development before being ready for Google."""
|
468 |
-
|
469 |
-
except Exception as e:
|
470 |
-
# Fallback to a completely manual assessment
|
471 |
-
print(f"Error in T5 assessment generation: {e}")
|
472 |
-
assessment = f"""This candidate demonstrates solid {top_category} with proficiency in {skills_text}.
|
473 |
-
However, they would need to strengthen their {weak_category} to meet Google's high standards.
|
474 |
-
To become more competitive, they should develop advanced problem-solving skills through algorithmic
|
475 |
-
challenges and contribute to open-source projects. Overall, at {match_percentage}% match,
|
476 |
-
they show potential but require targeted skill development before being ready for Google."""
|
477 |
-
|
478 |
-
# Final cleanup - more aggressive to remove any remaining instructions
|
479 |
-
assessment = re.sub(r'include specific actionable advice.*?improvement\.', '', assessment, flags=re.DOTALL|re.IGNORECASE)
|
480 |
-
assessment = re.sub(r'make an assessment.*?resume\.', '', assessment, flags=re.DOTALL|re.IGNORECASE)
|
481 |
-
assessment = re.sub(r'evaluate their technical skills.*?google\.', '', assessment, flags=re.DOTALL|re.IGNORECASE)
|
482 |
-
assessment = re.sub(r'assess their strengths.*?contributions', '', assessment, flags=re.DOTALL|re.IGNORECASE)
|
483 |
-
assessment = re.sub(r'provide specific areas.*?needed', '', assessment, flags=re.DOTALL|re.IGNORECASE)
|
484 |
-
assessment = re.sub(r'give an overall.*?google', '', assessment, flags=re.DOTALL|re.IGNORECASE)
|
485 |
-
|
486 |
-
# Clean up any double spaces, newlines, etc.
|
487 |
-
assessment = re.sub(r'\s+', ' ', assessment)
|
488 |
-
assessment = assessment.strip()
|
489 |
-
|
490 |
-
# If cleaning removed too much text, use the fallback
|
491 |
-
if len(assessment) < 50 or not assessment.startswith("This candidate"):
|
492 |
-
assessment = f"""This candidate demonstrates solid {top_category} with proficiency in {skills_text}.
|
493 |
-
However, they would need to strengthen their {weak_category} to meet Google's high standards.
|
494 |
-
To become more competitive, they should develop advanced problem-solving skills through algorithmic
|
495 |
-
challenges and contribute to open-source projects. Overall, at {match_percentage}% match,
|
496 |
-
they show potential but require targeted skill development before being ready for Google."""
|
497 |
|
498 |
-
|
499 |
-
|
500 |
|
501 |
-
|
|
|
502 |
|
503 |
-
|
504 |
-
|
505 |
-
def generate_expert_assessment(resume_summary, match_percentage, category_details, found_skills):
|
506 |
-
"""
|
507 |
-
Generate a comprehensive expert assessment based on the resume analysis.
|
508 |
-
This is a specialized function to create high-quality, specific assessments.
|
509 |
"""
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
top_strengths = categories[:2]
|
516 |
-
|
517 |
-
# Identify main weaknesses (bottom 2 categories, but only if score is below 50%)
|
518 |
-
weaknesses = [cat for cat in categories if category_details[cat]["adjusted_score"] < 50]
|
519 |
-
|
520 |
-
# Extract relevant skills for top strengths (up to 3 skills per strength)
|
521 |
-
strength_skills = []
|
522 |
-
for category in top_strengths:
|
523 |
-
matches = found_skills[category][:3] if found_skills[category] else []
|
524 |
-
strength_skills.extend(matches)
|
525 |
-
|
526 |
-
# Extract experience snippets from resume
|
527 |
-
experience_match = re.search(r'Previous Work Experience:(.*?)(?=\n\n|$)', resume_summary, re.DOTALL)
|
528 |
-
experience_text = experience_match.group(1) if experience_match else ""
|
529 |
-
|
530 |
-
# Find relevant company names or roles that might be impressive
|
531 |
-
company_pattern = r'\b(Google|Microsoft|Amazon|Apple|Facebook|Meta|Twitter|LinkedIn|Uber|Airbnb|Netflix|Oracle|IBM|Intel|Adobe|Salesforce)\b'
|
532 |
-
companies = re.findall(company_pattern, experience_text, re.IGNORECASE)
|
533 |
-
|
534 |
-
# Determine the expertise level based on score
|
535 |
-
if match_percentage >= 75:
|
536 |
-
expertise_level = "strong"
|
537 |
-
elif match_percentage >= 60:
|
538 |
-
expertise_level = "solid"
|
539 |
-
elif match_percentage >= 45:
|
540 |
-
expertise_level = "moderate"
|
541 |
-
else:
|
542 |
-
expertise_level = "limited"
|
543 |
-
|
544 |
-
# Start building assessment
|
545 |
-
assessment = f"This candidate demonstrates {expertise_level} potential for Google, with particular strengths in "
|
546 |
-
|
547 |
-
# Add strengths with specific skills
|
548 |
-
if top_strengths:
|
549 |
-
strength_labels = []
|
550 |
-
for strength in top_strengths:
|
551 |
-
label = {"technical_skills": "technical programming",
|
552 |
-
"advanced_tech": "advanced technology",
|
553 |
-
"problem_solving": "problem-solving",
|
554 |
-
"innovation": "innovation",
|
555 |
-
"soft_skills": "collaboration and leadership"}[strength]
|
556 |
-
strength_labels.append(label)
|
557 |
|
558 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
559 |
|
560 |
-
|
561 |
-
if strength_skills:
|
562 |
-
assessment += f"Their experience with {', '.join(strength_skills[:4])} "
|
563 |
-
|
564 |
-
# Add relevance to Google
|
565 |
-
if any(skill in ['machine learning', 'ai', 'python', 'java', 'c++', 'cloud'] for skill in strength_skills):
|
566 |
-
assessment += "directly aligns with Google's technical requirements. "
|
567 |
-
else:
|
568 |
-
assessment += "is relevant to Google's technology stack. "
|
569 |
-
else:
|
570 |
-
assessment += "few areas that align closely with Google's requirements. "
|
571 |
-
|
572 |
-
# Add context from work experience if relevant companies found
|
573 |
-
if companies:
|
574 |
-
unique_companies = list(set([c.lower() for c in companies]))
|
575 |
-
if len(unique_companies) > 1:
|
576 |
-
assessment += f"Their experience at companies like {', '.join(unique_companies[:2])} provides valuable industry context. "
|
577 |
-
else:
|
578 |
-
assessment += f"Their experience at {unique_companies[0]} provides relevant industry context. "
|
579 |
-
|
580 |
-
# Add weaknesses and improvement suggestions
|
581 |
-
if weaknesses:
|
582 |
-
assessment += "However, to improve their candidacy, they should strengthen their "
|
583 |
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
"advanced_tech": "knowledge of advanced technologies",
|
588 |
-
"problem_solving": "problem-solving capabilities",
|
589 |
-
"innovation": "innovation mindset",
|
590 |
-
"soft_skills": "teamwork and collaboration abilities"}[weakness]
|
591 |
-
weakness_labels.append(label)
|
592 |
|
593 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
594 |
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
if match_percentage >= 70:
|
609 |
-
assessment += f"Overall, this candidate shows good alignment with Google's culture of innovation and technical excellence, with a {match_percentage}% match to the company's requirements."
|
610 |
-
elif match_percentage >= 50:
|
611 |
-
assessment += f"With these improvements, the candidate could become more competitive for Google positions, currently showing a {match_percentage}% match to the company's requirements."
|
612 |
else:
|
613 |
-
assessment
|
|
|
614 |
|
615 |
return assessment
|
616 |
|
617 |
-
|
618 |
-
|
619 |
-
|
620 |
-
st.title("Google Resume Match Analyzer")
|
621 |
-
st.markdown(
|
622 |
-
"""
|
623 |
-
Upload your resume file in **.docx**, **.doc**, or **.txt** format to see how well you match with Google's hiring requirements. The app performs the following tasks:
|
624 |
-
1. Extracts text from your resume.
|
625 |
-
2. Uses AI to generate a structured candidate summary.
|
626 |
-
3. Analyzes how well your profile fits Google's requirements.
|
627 |
-
"""
|
628 |
-
)
|
629 |
|
630 |
-
#
|
631 |
-
|
632 |
-
st.write(GOOGLE_DESCRIPTION)
|
633 |
|
634 |
-
|
635 |
-
|
|
|
|
|
|
|
636 |
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
|
641 |
-
|
642 |
-
|
643 |
-
# Step 1: Extract text
|
644 |
-
status_text.text("Step 1/3: Extracting text from resume...")
|
645 |
-
resume_text = extract_text_from_file(uploaded_file)
|
646 |
-
progress_bar.progress(25)
|
647 |
-
|
648 |
-
if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .docx, .doc, or .txt file.":
|
649 |
-
st.error(resume_text)
|
650 |
-
else:
|
651 |
-
# Step 2: Generate summary
|
652 |
-
status_text.text("Step 2/3: Analyzing resume and generating summary...")
|
653 |
-
summary, summarization_time = summarize_resume_text(resume_text)
|
654 |
-
progress_bar.progress(50)
|
655 |
-
|
656 |
-
# Display summary
|
657 |
-
st.subheader("Your Resume Summary")
|
658 |
-
st.markdown(summary)
|
659 |
-
st.info(f"Summary generated in {summarization_time:.2f} seconds")
|
660 |
-
|
661 |
-
# Step 3: Generate Google fit assessment
|
662 |
-
status_text.text("Step 3/3: Evaluating Google fit...")
|
663 |
-
assessment, match_percentage, category_details, assessment_time = analyze_google_fit(summary)
|
664 |
-
progress_bar.progress(100)
|
665 |
-
|
666 |
-
# Clear status messages
|
667 |
-
status_text.empty()
|
668 |
-
|
669 |
-
# Display Google fit results
|
670 |
-
st.subheader("Google Fit Assessment")
|
671 |
-
|
672 |
-
# Display match percentage with appropriate color and emoji - with more realistic thresholds
|
673 |
-
if match_percentage >= 85:
|
674 |
-
st.success(f"**Overall Google Match Score:** {match_percentage}% 🌟")
|
675 |
-
elif match_percentage >= 70:
|
676 |
-
st.success(f"**Overall Google Match Score:** {match_percentage}% ✅")
|
677 |
-
elif match_percentage >= 50:
|
678 |
-
st.warning(f"**Overall Google Match Score:** {match_percentage}% ⚠️")
|
679 |
-
else:
|
680 |
-
st.error(f"**Overall Google Match Score:** {match_percentage}% 🔍")
|
681 |
-
|
682 |
-
# NEW ADDITION: Add detailed score breakdown
|
683 |
-
st.markdown("### Score Breakdown")
|
684 |
-
|
685 |
-
# Create a neat table with category scores
|
686 |
-
breakdown_data = []
|
687 |
-
for category, details in category_details.items():
|
688 |
-
label = {"technical_skills": "Technical Programming Skills",
|
689 |
-
"advanced_tech": "Advanced Technology Knowledge",
|
690 |
-
"problem_solving": "Problem Solving Abilities",
|
691 |
-
"innovation": "Innovation Mindset",
|
692 |
-
"soft_skills": "Collaboration & Leadership"}[category]
|
693 |
-
|
694 |
-
# Create a visual indicator for the score
|
695 |
-
score = details["adjusted_score"]
|
696 |
-
|
697 |
-
# Add formatted breakdown row
|
698 |
-
breakdown_data.append({
|
699 |
-
"Category": label,
|
700 |
-
"Score": f"{score}%",
|
701 |
-
"Matching Skills": ", ".join(details["matching_keywords"][:3]) if details["matching_keywords"] else "None detected"
|
702 |
-
})
|
703 |
-
|
704 |
-
# Convert to DataFrame and display
|
705 |
-
breakdown_df = pd.DataFrame(breakdown_data)
|
706 |
-
# Remove the index column entirely
|
707 |
-
st.table(breakdown_df.set_index('Category').reset_index()) # This removes the numerical index
|
708 |
-
|
709 |
-
# Show a note about how scores are calculated
|
710 |
-
with st.expander("How are these scores calculated?"):
|
711 |
-
st.markdown("""
|
712 |
-
- **Technical Programming Skills** (35% of total): Evaluates coding languages, software development tools, and core programming concepts
|
713 |
-
- **Advanced Technology Knowledge** (25% of total): Assesses experience with cutting-edge technologies like AI, ML, cloud systems
|
714 |
-
- **Problem Solving Abilities** (20% of total): Measures analytical thinking, algorithm design, and optimization skills
|
715 |
-
- **Innovation Mindset** (10% of total): Looks for creativity, research orientation, and novel approaches
|
716 |
-
- **Collaboration & Leadership** (10% of total): Evaluates team skills, communication, and project management
|
717 |
-
|
718 |
-
Scores are calculated based on keyword matches in your resume, with diminishing returns applied (first few skills matter more than later ones).
|
719 |
-
""")
|
720 |
|
721 |
-
|
722 |
-
|
723 |
-
st.markdown(assessment)
|
724 |
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
st.
|
729 |
-
|
730 |
-
|
731 |
-
|
732 |
-
|
733 |
-
|
734 |
-
|
735 |
-
|
736 |
-
|
737 |
-
|
738 |
-
|
739 |
-
|
740 |
-
|
741 |
-
|
742 |
-
|
743 |
-
|
744 |
-
|
745 |
-
|
746 |
-
|
747 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import pandas as pd
|
3 |
+
import re
|
4 |
+
import json
|
5 |
+
import nltk
|
6 |
+
from nltk.corpus import stopwords
|
7 |
+
from nltk.tokenize import word_tokenize
|
8 |
+
import torch
|
9 |
+
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
|
10 |
+
import time
|
11 |
|
12 |
+
# Set page title and configuration
|
13 |
st.set_page_config(
|
14 |
+
page_title="Resume-Job Fit Analyzer",
|
15 |
+
page_icon="📊",
|
16 |
+
layout="wide",
|
17 |
+
initial_sidebar_state="expanded"
|
18 |
)
|
19 |
|
20 |
+
# Download NLTK resources if needed
|
21 |
+
@st.cache_resource
|
22 |
+
def download_nltk_resources():
|
23 |
+
try:
|
24 |
+
nltk.data.find('tokenizers/punkt')
|
25 |
+
nltk.data.find('corpora/stopwords')
|
26 |
+
except LookupError:
|
27 |
+
nltk.download('punkt')
|
28 |
+
nltk.download('stopwords')
|
29 |
+
return stopwords.words('english')
|
30 |
|
31 |
+
stop_words = download_nltk_resources()
|
|
|
32 |
|
33 |
+
# Load models
|
34 |
+
@st.cache_resource
|
|
|
|
|
35 |
def load_models():
|
36 |
+
"""Load and cache the NLP models"""
|
37 |
+
models = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
+
# Use BART for resume parsing
|
40 |
+
models['parser'] = pipeline(
|
41 |
+
"text2text-generation",
|
42 |
+
model="facebook/bart-base", # This would be the fine-tuned model in production
|
43 |
+
device=0 if torch.cuda.is_available() else -1
|
44 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
+
# Use Qwen for evaluation
|
47 |
+
models['evaluator'] = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
|
48 |
+
models['evaluator_tokenizer'] = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
|
|
|
|
|
49 |
|
50 |
+
return models
|
51 |
|
52 |
+
# Extract skills from text
|
53 |
+
def extract_skills(text, skill_keywords):
|
54 |
+
"""Extract skills from text based on a predefined list of skills"""
|
55 |
+
found_skills = []
|
56 |
+
text_lower = text.lower()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
+
for skill in skill_keywords:
|
59 |
+
# Create a regular expression pattern for whole word matching
|
60 |
+
pattern = r'\b' + re.escape(skill.lower()) + r'\b'
|
61 |
+
if re.search(pattern, text_lower):
|
62 |
+
found_skills.append(skill)
|
63 |
+
|
64 |
+
return list(set(found_skills))
|
65 |
+
|
66 |
+
# Parse resume
|
67 |
+
def parse_resume(resume_text, models):
|
68 |
+
"""Extract structured information from resume text"""
|
69 |
+
# In production, this would use the fine-tuned BART model
|
70 |
+
# For now, we'll implement a simple rule-based parser
|
71 |
+
|
72 |
+
# Clean the text
|
73 |
+
clean_text = re.sub(r'\s+', ' ', resume_text).strip()
|
74 |
+
|
75 |
+
# Extract common skill keywords (this would be a more extensive list in production)
|
76 |
+
tech_skills = [
|
77 |
+
"Python", "Java", "C++", "JavaScript", "TypeScript", "Go", "Rust", "SQL",
|
78 |
+
"React", "Angular", "Vue", "Node.js", "Django", "Flask", "Spring",
|
79 |
+
"TensorFlow", "PyTorch", "Scikit-learn", "Machine Learning", "Deep Learning", "NLP",
|
80 |
+
"AWS", "Azure", "GCP", "Docker", "Kubernetes", "CI/CD", "Jenkins", "GitHub Actions",
|
81 |
+
"REST API", "GraphQL", "Microservices", "Serverless"
|
82 |
+
]
|
83 |
|
84 |
+
soft_skills = [
|
85 |
+
"Leadership", "Communication", "Teamwork", "Problem-solving", "Critical thinking",
|
86 |
+
"Time management", "Adaptability", "Creativity", "Collaboration", "Presentation"
|
87 |
+
]
|
88 |
|
89 |
+
# Extract skills
|
90 |
+
found_tech_skills = extract_skills(clean_text, tech_skills)
|
91 |
+
found_soft_skills = extract_skills(clean_text, soft_skills)
|
|
|
|
|
92 |
|
93 |
+
# Extract experience using regex patterns (simplified)
|
94 |
+
experience_pattern = r'(?:Experience|EXPERIENCE|Work Experience|WORK EXPERIENCE).*?(?:Education|EDUCATION|Skills|SKILLS|$)'
|
95 |
+
experience_match = re.search(experience_pattern, clean_text, re.DOTALL)
|
96 |
+
experience_text = experience_match.group(0) if experience_match else ""
|
97 |
|
98 |
+
# Extract education using regex patterns (simplified)
|
99 |
+
education_pattern = r'(?:Education|EDUCATION).*?(?:Skills|SKILLS|Experience|EXPERIENCE|$)'
|
100 |
+
education_match = re.search(education_pattern, clean_text, re.DOTALL)
|
101 |
+
education_text = education_match.group(0) if education_match else ""
|
102 |
|
103 |
+
# Estimate years of experience (simplified)
|
104 |
+
years_exp = 0
|
105 |
+
year_patterns = [
|
106 |
+
r'(\d{4})\s*-\s*(?:present|current|now|2023|2024|2025)',
|
107 |
+
r'(\d{4})\s*-\s*(\d{4})'
|
108 |
+
]
|
109 |
+
|
110 |
+
for pattern in year_patterns:
|
111 |
+
matches = re.findall(pattern, clean_text, re.IGNORECASE)
|
112 |
+
for match in matches:
|
113 |
+
if isinstance(match, tuple):
|
114 |
+
start_year = int(match[0])
|
115 |
+
end_year = int(match[1]) if match[1].isdigit() else 2025
|
116 |
+
years_exp += (end_year - start_year)
|
117 |
+
else:
|
118 |
+
start_year = int(match)
|
119 |
+
years_exp += (2025 - start_year)
|
120 |
+
|
121 |
+
# Cap reasonable years
|
122 |
+
years_exp = min(years_exp, 30)
|
123 |
+
|
124 |
+
# Create structured data
|
125 |
+
structured_data = {
|
126 |
+
"skills": {
|
127 |
+
"technical": found_tech_skills,
|
128 |
+
"soft": found_soft_skills
|
129 |
+
},
|
130 |
+
"experience": {
|
131 |
+
"years": years_exp,
|
132 |
+
"summary": experience_text[:300] + "..." if len(experience_text) > 300 else experience_text
|
133 |
+
},
|
134 |
+
"education": education_text[:300] + "..." if len(education_text) > 300 else education_text
|
135 |
}
|
136 |
|
137 |
+
return structured_data
|
138 |
+
|
139 |
+
# Parse job description
|
140 |
+
def parse_job_description(job_text):
|
141 |
+
"""Extract key requirements from job description"""
|
142 |
+
# Clean the text
|
143 |
+
clean_text = re.sub(r'\s+', ' ', job_text).strip()
|
144 |
+
|
145 |
+
# Extract common skill keywords (same as resume parser)
|
146 |
+
tech_skills = [
|
147 |
+
"Python", "Java", "C++", "JavaScript", "TypeScript", "Go", "Rust", "SQL",
|
148 |
+
"React", "Angular", "Vue", "Node.js", "Django", "Flask", "Spring",
|
149 |
+
"TensorFlow", "PyTorch", "Scikit-learn", "Machine Learning", "Deep Learning", "NLP",
|
150 |
+
"AWS", "Azure", "GCP", "Docker", "Kubernetes", "CI/CD", "Jenkins", "GitHub Actions",
|
151 |
+
"REST API", "GraphQL", "Microservices", "Serverless"
|
152 |
]
|
153 |
|
154 |
+
soft_skills = [
|
155 |
+
"Leadership", "Communication", "Teamwork", "Problem-solving", "Critical thinking",
|
156 |
+
"Time management", "Adaptability", "Creativity", "Collaboration", "Presentation"
|
157 |
]
|
158 |
|
159 |
+
# Extract skills
|
160 |
+
required_tech_skills = extract_skills(clean_text, tech_skills)
|
161 |
+
required_soft_skills = extract_skills(clean_text, soft_skills)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
|
163 |
+
# Extract years of experience requirement (simplified)
|
164 |
+
exp_patterns = [
|
165 |
+
r'(\d+)\+?\s*(?:years|yrs|yr)(?:\s*of)?\s*(?:experience|exp)',
|
166 |
+
r'(?:experience|exp)(?:\s*of)?\s*(\d+)\+?\s*(?:years|yrs|yr)'
|
167 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
|
169 |
+
required_years = 0
|
170 |
+
for pattern in exp_patterns:
|
171 |
+
matches = re.findall(pattern, clean_text, re.IGNORECASE)
|
172 |
+
if matches:
|
173 |
+
# Take the highest mentioned years
|
174 |
+
required_years = max([int(y) for y in matches if y.isdigit()] + [required_years])
|
175 |
+
|
176 |
+
# Extract job title
|
177 |
+
title_pattern = r'^(.*?)(?:\n|$)'
|
178 |
+
title_match = re.search(title_pattern, clean_text)
|
179 |
+
job_title = title_match.group(1).strip() if title_match else "Not specified"
|
180 |
+
|
181 |
+
# Create structured data
|
182 |
+
structured_data = {
|
183 |
+
"title": job_title,
|
184 |
+
"requirements": {
|
185 |
+
"technical_skills": required_tech_skills,
|
186 |
+
"soft_skills": required_soft_skills,
|
187 |
+
"years_experience": required_years
|
188 |
+
},
|
189 |
+
"full_text": job_text
|
190 |
+
}
|
191 |
|
192 |
+
return structured_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
|
194 |
+
# Calculate match score
|
195 |
+
def calculate_match_score(resume_data, job_data):
|
196 |
+
"""Calculate how well the resume matches the job description"""
|
197 |
+
scores = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
|
199 |
+
# Calculate skill match percentage
|
200 |
+
required_tech_skills = set(job_data["requirements"]["technical_skills"])
|
201 |
+
candidate_tech_skills = set(resume_data["skills"]["technical"])
|
|
|
|
|
|
|
|
|
|
|
202 |
|
203 |
+
required_soft_skills = set(job_data["requirements"]["soft_skills"])
|
204 |
+
candidate_soft_skills = set(resume_data["skills"]["soft"])
|
205 |
|
206 |
+
if required_tech_skills:
|
207 |
+
tech_match = len(candidate_tech_skills.intersection(required_tech_skills)) / len(required_tech_skills)
|
208 |
+
scores["technical_skills"] = {
|
209 |
+
"score": int(tech_match * 100),
|
210 |
+
"matched": list(candidate_tech_skills.intersection(required_tech_skills)),
|
211 |
+
"missing": list(required_tech_skills - candidate_tech_skills)
|
212 |
+
}
|
213 |
+
else:
|
214 |
+
scores["technical_skills"] = {"score": 0, "matched": [], "missing": []}
|
215 |
+
|
216 |
+
if required_soft_skills:
|
217 |
+
soft_match = len(candidate_soft_skills.intersection(required_soft_skills)) / len(required_soft_skills)
|
218 |
+
scores["soft_skills"] = {
|
219 |
+
"score": int(soft_match * 100),
|
220 |
+
"matched": list(candidate_soft_skills.intersection(required_soft_skills)),
|
221 |
+
"missing": list(required_soft_skills - candidate_soft_skills)
|
222 |
+
}
|
223 |
+
else:
|
224 |
+
scores["soft_skills"] = {"score": 0, "matched": [], "missing": []}
|
225 |
|
226 |
+
# Experience match
|
227 |
+
required_years = job_data["requirements"]["years_experience"]
|
228 |
+
candidate_years = resume_data["experience"]["years"]
|
229 |
+
|
230 |
+
if required_years > 0:
|
231 |
+
if candidate_years >= required_years:
|
232 |
+
exp_score = 100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
else:
|
234 |
+
exp_score = int((candidate_years / required_years) * 100)
|
|
|
235 |
|
236 |
+
scores["experience"] = {
|
237 |
+
"score": exp_score,
|
238 |
+
"candidate_years": candidate_years,
|
239 |
+
"required_years": required_years
|
240 |
+
}
|
241 |
+
else:
|
242 |
+
scores["experience"] = {
|
243 |
+
"score": 100 if candidate_years > 0 else 50,
|
244 |
+
"candidate_years": candidate_years,
|
245 |
+
"required_years": "Not specified"
|
246 |
}
|
247 |
|
248 |
+
# Calculate overall score (weighted)
|
249 |
+
tech_weight = 0.6
|
250 |
+
soft_weight = 0.2
|
251 |
+
exp_weight = 0.2
|
252 |
|
253 |
+
overall_score = (
|
254 |
+
scores["technical_skills"]["score"] * tech_weight +
|
255 |
+
scores["soft_skills"]["score"] * soft_weight +
|
256 |
+
scores["experience"]["score"] * exp_weight
|
257 |
+
)
|
258 |
|
259 |
+
scores["overall"] = int(overall_score)
|
|
|
|
|
|
|
|
|
|
|
260 |
|
261 |
+
return scores
|
262 |
+
|
263 |
+
# Generate expert assessment using Qwen
|
264 |
+
def generate_assessment(resume_data, job_data, match_scores, models):
|
265 |
+
"""Generate an expert assessment using Qwen model"""
|
266 |
+
# Prepare context
|
267 |
+
job_title = job_data["title"]
|
268 |
+
matched_skills = match_scores["technical_skills"]["matched"]
|
269 |
+
missing_skills = match_scores["technical_skills"]["missing"]
|
270 |
+
experience_match = match_scores["experience"]
|
271 |
+
overall_score = match_scores["overall"]
|
272 |
|
273 |
+
# Determine fit classification
|
274 |
+
fit_status = "FIT" if overall_score >= 70 else "NOT FIT"
|
|
|
|
|
275 |
|
276 |
+
# Create prompt for Qwen
|
277 |
+
prompt = f"""
|
278 |
+
<|im_start|>system
|
279 |
+
You are an expert resume evaluator. Analyze how well a candidate fits a job posting and provide professional feedback.
|
280 |
+
<|im_end|>
|
281 |
|
282 |
+
<|im_start|>user
|
283 |
+
Evaluate this candidate for a {job_title} position.
|
|
|
284 |
|
285 |
+
Overall match score: {overall_score}%
|
286 |
+
Technical skills match: {match_scores["technical_skills"]["score"]}%
|
287 |
+
Soft skills match: {match_scores["soft_skills"]["score"]}%
|
288 |
+
Experience match: {experience_match["score"]}%
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
289 |
|
290 |
+
Candidate has: {experience_match["candidate_years"]} years of experience
|
291 |
+
Position requires: {experience_match["required_years"]} years of experience
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
|
293 |
+
Matched technical skills: {", ".join(matched_skills) if matched_skills else "None"}
|
294 |
+
Missing technical skills: {", ".join(missing_skills) if missing_skills else "None"}
|
295 |
|
296 |
+
Create a professional assessment of this candidate. First state whether they are a FIT or NOT FIT for the position, then explain why with specific strengths and development areas.
|
297 |
+
<|im_end|>
|
298 |
|
299 |
+
<|im_start|>assistant
|
|
|
|
|
|
|
|
|
|
|
300 |
"""
|
301 |
+
|
302 |
+
try:
|
303 |
+
# Generate the assessment using Qwen
|
304 |
+
tokenizer = models['evaluator_tokenizer']
|
305 |
+
qwen_model = models['evaluator']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
|
307 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
308 |
+
outputs = qwen_model.generate(
|
309 |
+
inputs.input_ids,
|
310 |
+
max_new_tokens=512,
|
311 |
+
do_sample=True,
|
312 |
+
temperature=0.7,
|
313 |
+
top_p=0.9
|
314 |
+
)
|
315 |
|
316 |
+
assessment = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
|
318 |
+
# Extract the assistant's response
|
319 |
+
if "<|im_start|>assistant" in assessment:
|
320 |
+
assessment = assessment.split("<|im_start|>assistant")[-1]
|
|
|
|
|
|
|
|
|
|
|
321 |
|
322 |
+
# Clean up any remaining markers
|
323 |
+
assessment = re.sub(r'<\|im_(start|end)\|>', '', assessment)
|
324 |
+
assessment = assessment.strip()
|
325 |
+
|
326 |
+
# If no assessment was generated, create a fallback
|
327 |
+
if not assessment or len(assessment) < 50:
|
328 |
+
assessment = generate_fallback_assessment(resume_data, job_data, match_scores, fit_status)
|
329 |
+
except Exception as e:
|
330 |
+
st.error(f"Error generating assessment: {str(e)}")
|
331 |
+
assessment = generate_fallback_assessment(resume_data, job_data, match_scores, fit_status)
|
332 |
|
333 |
+
return assessment, fit_status
|
334 |
+
|
335 |
+
# Generate fallback assessment
|
336 |
+
def generate_fallback_assessment(resume_data, job_data, match_scores, fit_status):
|
337 |
+
"""Generate a fallback assessment if the model fails"""
|
338 |
+
job_title = job_data["title"]
|
339 |
+
matched_skills = match_scores["technical_skills"]["matched"]
|
340 |
+
missing_skills = match_scores["technical_skills"]["missing"]
|
341 |
+
overall_score = match_scores["overall"]
|
342 |
+
|
343 |
+
if fit_status == "FIT":
|
344 |
+
assessment = f"""FIT: This candidate demonstrates a strong alignment with the {job_title} position, achieving an overall match score of {overall_score}%. Their proficiency in {', '.join(matched_skills) if matched_skills else 'relevant skills'} positions them well to contribute effectively from the start. The candidate's experience level is suitable for the role's requirements. To maximize their success, they could consider developing expertise in {', '.join(missing_skills) if missing_skills else 'additional specialized areas relevant to this role'}.
|
345 |
+
"""
|
|
|
|
|
|
|
|
|
346 |
else:
|
347 |
+
assessment = f"""NOT FIT: This candidate currently shows limited alignment with the {job_title} position, with an overall match score of {overall_score}%. While they demonstrate some relevant capabilities in {', '.join(matched_skills) if matched_skills else 'a few areas'}, they would need to develop expertise in critical areas such as {', '.join(missing_skills) if missing_skills else 'key technical requirements for this position'}. The candidate may become more competitive for this role by focusing on these skill gaps and gaining more relevant experience.
|
348 |
+
"""
|
349 |
|
350 |
return assessment
|
351 |
|
352 |
+
# Create the main header and interface
|
353 |
+
st.title("Resume-Job Fit Analyzer")
|
354 |
+
st.markdown("### Evaluate how well a resume matches a job description")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
|
356 |
+
# Setup columns for input
|
357 |
+
col1, col2 = st.columns(2)
|
|
|
358 |
|
359 |
+
with col1:
|
360 |
+
# Resume input
|
361 |
+
st.subheader("Resume")
|
362 |
+
resume_text = st.text_area("Paste resume text here", height=300,
|
363 |
+
placeholder="Paste the candidate's resume text here...")
|
364 |
|
365 |
+
with col2:
|
366 |
+
# Job description input
|
367 |
+
st.subheader("Job Description")
|
368 |
+
job_description = st.text_area("Paste job description here", height=300,
|
369 |
+
placeholder="Paste the job description here...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
|
371 |
+
# Analysis button
|
372 |
+
analyze_button = st.button("Analyze Match", type="primary", use_container_width=True)
|
|
|
373 |
|
374 |
+
# Main analysis logic
|
375 |
+
if analyze_button:
|
376 |
+
if not resume_text or not job_description:
|
377 |
+
st.error("Please provide both a resume and a job description.")
|
378 |
+
else:
|
379 |
+
with st.spinner("Analyzing resume and job match..."):
|
380 |
+
# Record start time
|
381 |
+
start_time = time.time()
|
382 |
+
|
383 |
+
# Load models (uses caching so only loads once)
|
384 |
+
models = load_models()
|
385 |
+
|
386 |
+
# Parse resume and job description
|
387 |
+
resume_data = parse_resume(resume_text, models)
|
388 |
+
job_data = parse_job_description(job_description)
|
389 |
+
|
390 |
+
# Calculate match score
|
391 |
+
match_scores = calculate_match_score(resume_data, job_data)
|
392 |
+
|
393 |
+
# Generate assessment
|
394 |
+
assessment, fit_status = generate_assessment(resume_data, job_data, match_scores, models)
|
395 |
+
|
396 |
+
# Calculate execution time
|
397 |
+
execution_time = time.time() - start_time
|
398 |
+
|
399 |
+
# Display results
|
400 |
+
st.success(f"Analysis complete in {execution_time:.2f} seconds")
|
401 |
+
|
402 |
+
# Display fit status prominently
|
403 |
+
st.markdown(f"## Overall Result: {fit_status}")
|
404 |
+
|
405 |
+
# Display match score
|
406 |
+
st.subheader("Match Score")
|
407 |
+
score_col1, score_col2, score_col3 = st.columns(3)
|
408 |
+
|
409 |
+
with score_col1:
|
410 |
+
st.metric("Overall Match", f"{match_scores['overall']}%")
|
411 |
+
|
412 |
+
with score_col2:
|
413 |
+
st.metric("Technical Skills", f"{match_scores['technical_skills']['score']}%")
|
414 |
+
|
415 |
+
with score_col3:
|
416 |
+
st.metric("Experience Match", f"{match_scores['experience']['score']}%")
|
417 |
+
|
418 |
+
# Show skills breakdown
|
419 |
+
st.subheader("Skills Breakdown")
|
420 |
+
skill_col1, skill_col2 = st.columns(2)
|
421 |
+
|
422 |
+
with skill_col1:
|
423 |
+
st.markdown("##### Matched Skills")
|
424 |
+
if match_scores["technical_skills"]["matched"]:
|
425 |
+
for skill in match_scores["technical_skills"]["matched"]:
|
426 |
+
st.markdown(f"✅ {skill}")
|
427 |
+
else:
|
428 |
+
st.markdown("No matched skills found")
|
429 |
+
|
430 |
+
with skill_col2:
|
431 |
+
st.markdown("##### Missing Skills")
|
432 |
+
if match_scores["technical_skills"]["missing"]:
|
433 |
+
for skill in match_scores["technical_skills"]["missing"]:
|
434 |
+
st.markdown(f"❌ {skill}")
|
435 |
+
else:
|
436 |
+
st.markdown("No missing skills detected")
|
437 |
+
|
438 |
+
# Show experience comparison
|
439 |
+
st.subheader("Experience")
|
440 |
+
exp_col1, exp_col2 = st.columns(2)
|
441 |
+
|
442 |
+
with exp_col1:
|
443 |
+
st.markdown(f"**Required**: {job_data['requirements']['years_experience']} years")
|
444 |
+
|
445 |
+
with exp_col2:
|
446 |
+
st.markdown(f"**Candidate has**: {resume_data['experience']['years']} years")
|
447 |
+
|
448 |
+
# Display detailed assessment
|
449 |
+
st.subheader("Expert Assessment")
|
450 |
+
st.markdown(assessment)
|
451 |
+
|
452 |
+
# Show parsed data (expandable)
|
453 |
+
with st.expander("View Parsed Data"):
|
454 |
+
col1, col2 = st.columns(2)
|
455 |
+
with col1:
|
456 |
+
st.subheader("Resume Data")
|
457 |
+
st.json(resume_data)
|
458 |
+
with col2:
|
459 |
+
st.subheader("Job Requirements")
|
460 |
+
st.json(job_data)
|