Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,520 +1,853 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import pandas as pd
|
3 |
-
import re
|
4 |
-
import json
|
5 |
-
import nltk
|
6 |
-
from nltk.corpus import stopwords
|
7 |
-
from nltk.tokenize import word_tokenize
|
8 |
-
import torch
|
9 |
-
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
|
10 |
-
import time
|
11 |
import os
|
12 |
-
import docx2txt
|
13 |
import io
|
|
|
14 |
import docx
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
-
# Set page title and
|
17 |
st.set_page_config(
|
18 |
page_title="Resume-Job Fit Analyzer",
|
19 |
-
|
20 |
-
layout="wide"
|
21 |
)
|
22 |
|
23 |
-
#
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
nltk.download('punkt')
|
31 |
-
nltk.download('stopwords')
|
32 |
-
return stopwords.words('english')
|
33 |
-
|
34 |
-
stop_words = download_nltk_resources()
|
35 |
|
36 |
-
|
37 |
-
|
|
|
|
|
38 |
def load_models():
|
39 |
-
"""Load
|
40 |
-
models
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
54 |
|
55 |
-
#
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
67 |
try:
|
68 |
-
|
69 |
-
|
70 |
except Exception as e:
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
elif file_extension == ".doc":
|
75 |
-
# Legacy Word document - this is more complex
|
76 |
try:
|
77 |
-
# For .doc files, we
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
# Use an external converter - this is a placeholder
|
85 |
-
# In a real implementation, you might want to use antiword, textract or similar
|
86 |
-
# Here we'll try using docx2txt as a fallback but it might not work well
|
87 |
try:
|
88 |
-
text = docx2txt.process(
|
89 |
-
except:
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
# Try to extract readable text by removing binary parts
|
95 |
-
text = re.sub(r'[^\x20-\x7E\n\r\t]', '', text)
|
96 |
-
|
97 |
-
# Clean up
|
98 |
-
if os.path.exists("temp_file.doc"):
|
99 |
-
os.remove("temp_file.doc")
|
100 |
-
|
101 |
-
return text
|
102 |
except Exception as e:
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
106 |
else:
|
107 |
-
|
108 |
-
|
|
|
|
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
text_lower = text.lower()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
-
for
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
#
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
"
|
|
|
140 |
]
|
141 |
|
142 |
-
|
143 |
-
"
|
144 |
-
"Time management", "Adaptability", "Creativity", "Collaboration", "Presentation"
|
145 |
]
|
146 |
|
147 |
-
#
|
148 |
-
|
149 |
-
|
150 |
|
151 |
-
#
|
152 |
-
|
153 |
-
|
154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
-
#
|
157 |
-
|
158 |
-
|
159 |
-
education_text = education_match.group(0) if education_match else ""
|
160 |
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
}
|
194 |
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
# Clean the text
|
201 |
-
clean_text = re.sub(r'\s+', ' ', job_text).strip()
|
202 |
-
|
203 |
-
# Extract common skill keywords (same as resume parser)
|
204 |
-
tech_skills = [
|
205 |
-
"Python", "Java", "C++", "JavaScript", "TypeScript", "Go", "Rust", "SQL",
|
206 |
-
"React", "Angular", "Vue", "Node.js", "Django", "Flask", "Spring",
|
207 |
-
"TensorFlow", "PyTorch", "Scikit-learn", "Machine Learning", "Deep Learning", "NLP",
|
208 |
-
"AWS", "Azure", "GCP", "Docker", "Kubernetes", "CI/CD", "Jenkins", "GitHub Actions",
|
209 |
-
"REST API", "GraphQL", "Microservices", "Serverless"
|
210 |
-
]
|
211 |
|
212 |
-
|
213 |
-
|
214 |
-
|
|
|
|
|
215 |
]
|
216 |
|
217 |
-
|
218 |
-
|
219 |
-
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
-
# Extract years of experience
|
222 |
exp_patterns = [
|
223 |
-
r'(\d+)
|
224 |
-
r'
|
225 |
]
|
226 |
|
227 |
-
|
228 |
for pattern in exp_patterns:
|
229 |
-
|
230 |
-
if
|
231 |
-
|
232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
|
|
238 |
|
239 |
-
#
|
240 |
-
|
241 |
"title": job_title,
|
242 |
-
"
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
},
|
247 |
-
"full_text": job_text
|
248 |
}
|
249 |
|
250 |
-
return
|
251 |
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
|
|
|
|
|
|
|
|
256 |
|
257 |
-
#
|
258 |
-
|
259 |
-
candidate_tech_skills = set(resume_data["skills"]["technical"])
|
260 |
|
261 |
-
|
262 |
-
|
|
|
263 |
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
else:
|
272 |
-
scores["technical_skills"] = {"score": 0, "matched": [], "missing": []}
|
273 |
-
|
274 |
-
if required_soft_skills:
|
275 |
-
soft_match = len(candidate_soft_skills.intersection(required_soft_skills)) / len(required_soft_skills)
|
276 |
-
scores["soft_skills"] = {
|
277 |
-
"score": int(soft_match * 100),
|
278 |
-
"matched": list(candidate_soft_skills.intersection(required_soft_skills)),
|
279 |
-
"missing": list(required_soft_skills - candidate_soft_skills)
|
280 |
-
}
|
281 |
-
else:
|
282 |
-
scores["soft_skills"] = {"score": 0, "matched": [], "missing": []}
|
283 |
|
284 |
-
#
|
285 |
-
|
286 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
|
288 |
-
|
289 |
-
|
290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
else:
|
292 |
-
|
|
|
293 |
|
294 |
-
scores
|
295 |
-
|
296 |
-
|
297 |
-
"
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
"
|
302 |
-
"candidate_years": candidate_years,
|
303 |
-
"required_years": "Not specified"
|
304 |
}
|
305 |
|
306 |
-
#
|
307 |
-
|
308 |
-
soft_weight = 0.2
|
309 |
-
exp_weight = 0.2
|
310 |
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
|
|
316 |
|
317 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
|
|
|
|
|
|
|
|
|
|
330 |
|
331 |
-
#
|
332 |
-
|
|
|
|
|
|
|
333 |
|
334 |
-
#
|
335 |
-
|
336 |
-
<|im_start|>system
|
337 |
-
You are an expert resume evaluator. Analyze how well a candidate fits a job posting and provide professional feedback.
|
338 |
-
<|im_end|>
|
339 |
|
340 |
-
|
341 |
-
|
342 |
|
343 |
-
|
344 |
-
|
345 |
-
Soft skills match: {match_scores["soft_skills"]["score"]}%
|
346 |
-
Experience match: {experience_match["score"]}%
|
347 |
|
348 |
-
|
349 |
-
|
|
|
|
|
|
|
|
|
350 |
|
351 |
-
|
352 |
-
|
353 |
|
354 |
-
|
355 |
-
|
|
|
|
|
|
|
|
|
356 |
|
357 |
-
|
358 |
-
"""
|
359 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
try:
|
361 |
-
# Generate the assessment using
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
inputs = tokenizer(prompt, return_tensors="pt")
|
366 |
-
outputs = qwen_model.generate(
|
367 |
-
inputs.input_ids,
|
368 |
-
max_new_tokens=512,
|
369 |
do_sample=True,
|
370 |
-
temperature=0.
|
371 |
-
|
372 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
383 |
|
384 |
-
# If no assessment was generated, create a fallback
|
385 |
-
if not assessment or len(assessment) < 50:
|
386 |
-
assessment = generate_fallback_assessment(resume_data, job_data, match_scores, fit_status)
|
387 |
except Exception as e:
|
388 |
-
|
389 |
-
assessment = generate_fallback_assessment(
|
390 |
-
|
391 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
|
393 |
# Generate fallback assessment
|
394 |
-
def generate_fallback_assessment(
|
395 |
"""Generate a fallback assessment if the model fails"""
|
396 |
-
job_title =
|
397 |
-
|
398 |
-
missing_skills = match_scores["technical_skills"]["missing"]
|
399 |
-
overall_score = match_scores["overall"]
|
400 |
|
401 |
if fit_status == "FIT":
|
402 |
-
assessment = f"""
|
403 |
"""
|
404 |
else:
|
405 |
-
assessment = f"""
|
406 |
"""
|
407 |
|
408 |
return assessment
|
409 |
|
410 |
-
|
|
|
|
|
411 |
st.title("Resume-Job Fit Analyzer")
|
412 |
-
st.markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
413 |
|
414 |
# Resume upload
|
415 |
-
st.
|
416 |
-
uploaded_file = st.file_uploader("Upload Resume (.doc, .docx, .txt)", type=["doc", "docx", "txt"])
|
417 |
|
418 |
# Job description input
|
419 |
-
st.
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
#
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
# Main analysis logic
|
435 |
-
if analyze_button:
|
436 |
-
if not resume_text or not job_description:
|
437 |
-
st.error("Please upload a resume file and provide a job description.")
|
438 |
else:
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
st.
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
with
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
|
|
2 |
import io
|
3 |
+
import streamlit as st
|
4 |
import docx
|
5 |
+
import docx2txt
|
6 |
+
import tempfile
|
7 |
+
import time
|
8 |
+
import re
|
9 |
+
import math
|
10 |
+
import concurrent.futures
|
11 |
+
import pandas as pd
|
12 |
+
from functools import lru_cache
|
13 |
+
from transformers import pipeline
|
14 |
|
15 |
+
# Set page title and hide sidebar
|
16 |
st.set_page_config(
|
17 |
page_title="Resume-Job Fit Analyzer",
|
18 |
+
initial_sidebar_state="collapsed"
|
|
|
19 |
)
|
20 |
|
21 |
+
# Hide sidebar completely with custom CSS
|
22 |
+
st.markdown("""
|
23 |
+
<style>
|
24 |
+
[data-testid="collapsedControl"] {display: none;}
|
25 |
+
section[data-testid="stSidebar"] {display: none;}
|
26 |
+
</style>
|
27 |
+
""", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
+
#####################################
|
30 |
+
# Preload Models
|
31 |
+
#####################################
|
32 |
+
@st.cache_resource(show_spinner=True)
|
33 |
def load_models():
|
34 |
+
"""Load models at startup"""
|
35 |
+
with st.spinner("Loading AI models... This may take a minute on first run."):
|
36 |
+
models = {}
|
37 |
+
# Use bart-base for summarization
|
38 |
+
models['summarizer'] = pipeline(
|
39 |
+
"summarization",
|
40 |
+
model="facebook/bart-base",
|
41 |
+
max_length=100,
|
42 |
+
truncation=True
|
43 |
+
)
|
44 |
+
|
45 |
+
# Load model for evaluation
|
46 |
+
models['evaluator'] = pipeline(
|
47 |
+
"text2text-generation",
|
48 |
+
model="Qwen/Qwen2.5-0.5B-Instruct",
|
49 |
+
max_length=300
|
50 |
+
)
|
51 |
+
|
52 |
+
return models
|
53 |
|
54 |
+
# Preload models immediately when app starts
|
55 |
+
models = load_models()
|
56 |
+
|
57 |
+
#####################################
|
58 |
+
# Function: Extract Text from File
|
59 |
+
#####################################
|
60 |
+
@st.cache_data(show_spinner=False)
|
61 |
+
def extract_text_from_file(file_obj):
|
62 |
+
"""
|
63 |
+
Extract text from .docx and .doc files.
|
64 |
+
Returns the extracted text or an error message if extraction fails.
|
65 |
+
"""
|
66 |
+
filename = file_obj.name
|
67 |
+
ext = os.path.splitext(filename)[1].lower()
|
68 |
+
text = ""
|
69 |
+
|
70 |
+
if ext == ".docx":
|
71 |
try:
|
72 |
+
document = docx.Document(file_obj)
|
73 |
+
text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
|
74 |
except Exception as e:
|
75 |
+
text = f"Error processing DOCX file: {e}"
|
76 |
+
elif ext == ".doc":
|
|
|
|
|
|
|
77 |
try:
|
78 |
+
# For .doc files, we need to save to a temp file
|
79 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
|
80 |
+
temp_file.write(file_obj.getvalue())
|
81 |
+
temp_path = temp_file.name
|
82 |
+
|
83 |
+
# Use docx2txt which is generally faster
|
|
|
|
|
|
|
|
|
84 |
try:
|
85 |
+
text = docx2txt.process(temp_path)
|
86 |
+
except Exception:
|
87 |
+
text = "Could not process .doc file. Please convert to .docx format."
|
88 |
+
|
89 |
+
# Clean up temp file
|
90 |
+
os.unlink(temp_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
except Exception as e:
|
92 |
+
text = f"Error processing DOC file: {e}"
|
93 |
+
elif ext == ".txt":
|
94 |
+
try:
|
95 |
+
text = file_obj.getvalue().decode("utf-8")
|
96 |
+
except Exception as e:
|
97 |
+
text = f"Error processing TXT file: {e}"
|
98 |
else:
|
99 |
+
text = "Unsupported file type. Please upload a .docx, .doc, or .txt file."
|
100 |
+
|
101 |
+
# Limit text size for faster processing
|
102 |
+
return text[:15000] if text else text
|
103 |
|
104 |
+
#####################################
|
105 |
+
# Functions for Information Extraction
|
106 |
+
#####################################
|
107 |
+
|
108 |
+
# Cache the extraction functions to avoid reprocessing
|
109 |
+
@lru_cache(maxsize=32)
|
110 |
+
def extract_name(text_start):
|
111 |
+
"""Extract candidate name from the beginning of resume text"""
|
112 |
+
# Only use the first 500 characters to speed up processing
|
113 |
+
lines = text_start.split('\n')
|
114 |
+
|
115 |
+
# Check first few non-empty lines for potential names
|
116 |
+
potential_name_lines = [line.strip() for line in lines[:5] if line.strip()]
|
117 |
+
|
118 |
+
if potential_name_lines:
|
119 |
+
# First line is often the name if it's short and doesn't contain common headers
|
120 |
+
first_line = potential_name_lines[0]
|
121 |
+
if 5 <= len(first_line) <= 40 and not any(x in first_line.lower() for x in ["resume", "cv", "curriculum", "vitae", "profile"]):
|
122 |
+
return first_line
|
123 |
+
|
124 |
+
# Look for lines that might contain a name
|
125 |
+
for line in potential_name_lines[:3]:
|
126 |
+
if len(line.split()) <= 4 and not any(x in line.lower() for x in ["address", "phone", "email", "resume", "cv"]):
|
127 |
+
return line
|
128 |
+
|
129 |
+
return "Unknown (please extract from resume)"
|
130 |
+
|
131 |
+
def extract_age(text):
|
132 |
+
"""Extract candidate age from resume text"""
|
133 |
+
# Simplified: just check a few common patterns
|
134 |
+
age_patterns = [
|
135 |
+
r'age:?\s*(\d{1,2})',
|
136 |
+
r'(\d{1,2})\s*years\s*old',
|
137 |
+
]
|
138 |
+
|
139 |
text_lower = text.lower()
|
140 |
+
for pattern in age_patterns:
|
141 |
+
matches = re.search(pattern, text_lower)
|
142 |
+
if matches:
|
143 |
+
return matches.group(1)
|
144 |
+
|
145 |
+
return "Not specified"
|
146 |
+
|
147 |
+
def extract_industry(text, base_summary):
|
148 |
+
"""Extract expected job industry from resume"""
|
149 |
+
# Simplified industry keywords focused on the most common ones
|
150 |
+
industry_keywords = {
|
151 |
+
"technology": ["software", "programming", "developer", "IT", "tech", "computer"],
|
152 |
+
"finance": ["banking", "financial", "accounting", "finance", "analyst"],
|
153 |
+
"healthcare": ["medical", "health", "hospital", "clinical", "nurse", "doctor"],
|
154 |
+
"education": ["teaching", "teacher", "professor", "education", "university"],
|
155 |
+
"marketing": ["marketing", "advertising", "digital marketing", "social media"],
|
156 |
+
"engineering": ["engineer", "engineering"],
|
157 |
+
"data science": ["data science", "machine learning", "AI", "analytics"],
|
158 |
+
"information systems": ["information systems", "ERP", "systems management"]
|
159 |
+
}
|
160 |
+
|
161 |
+
# Count occurrences of industry keywords - using the summary to speed up
|
162 |
+
combined_text = base_summary.lower()
|
163 |
+
|
164 |
+
counts = {}
|
165 |
+
for industry, keywords in industry_keywords.items():
|
166 |
+
counts[industry] = sum(combined_text.count(keyword.lower()) for keyword in keywords)
|
167 |
+
|
168 |
+
# Get the industry with the highest count
|
169 |
+
if counts:
|
170 |
+
likely_industry = max(counts.items(), key=lambda x: x[1])
|
171 |
+
if likely_industry[1] > 0:
|
172 |
+
return likely_industry[0].capitalize()
|
173 |
+
|
174 |
+
# Check for educational background that might indicate industry
|
175 |
+
degrees = ["computer science", "business", "engineering", "medicine", "education", "finance", "marketing"]
|
176 |
|
177 |
+
for degree in degrees:
|
178 |
+
if degree in combined_text:
|
179 |
+
return f"{degree.capitalize()}-related field"
|
180 |
+
|
181 |
+
return "Not clearly specified"
|
182 |
+
|
183 |
+
def extract_skills_and_work(text):
|
184 |
+
"""Extract both skills and work experience at once to save processing time"""
|
185 |
+
# Common skill categories - reduced keyword list for speed
|
186 |
+
skill_categories = {
|
187 |
+
"Programming": ["Python", "Java", "JavaScript", "HTML", "CSS", "SQL", "C++", "C#", "Go"],
|
188 |
+
"Data Science": ["Machine Learning", "Data Analysis", "Statistics", "TensorFlow", "PyTorch", "AI", "Algorithms"],
|
189 |
+
"Database": ["SQL", "MySQL", "MongoDB", "Database", "NoSQL", "PostgreSQL"],
|
190 |
+
"Web Development": ["React", "Angular", "Node.js", "Frontend", "Backend", "Full-Stack"],
|
191 |
+
"Software Development": ["Agile", "Scrum", "Git", "DevOps", "Docker", "System Design"],
|
192 |
+
"Cloud": ["AWS", "Azure", "Google Cloud", "Cloud Computing"],
|
193 |
+
"Security": ["Cybersecurity", "Network Security", "Encryption", "Security"],
|
194 |
+
"Business": ["Project Management", "Business Analysis", "Leadership", "Teamwork"],
|
195 |
+
"Design": ["UX/UI", "User Experience", "Design Thinking", "Adobe"]
|
196 |
+
}
|
197 |
+
|
198 |
+
# Work experience extraction
|
199 |
+
work_headers = [
|
200 |
+
"work experience", "professional experience", "employment history",
|
201 |
+
"work history", "experience"
|
202 |
]
|
203 |
|
204 |
+
next_section_headers = [
|
205 |
+
"education", "skills", "certifications", "projects", "achievements"
|
|
|
206 |
]
|
207 |
|
208 |
+
# Process everything at once
|
209 |
+
lines = text.split('\n')
|
210 |
+
text_lower = text.lower()
|
211 |
|
212 |
+
# Skills extraction
|
213 |
+
found_skills = []
|
214 |
+
for category, skills in skill_categories.items():
|
215 |
+
category_skills = []
|
216 |
+
for skill in skills:
|
217 |
+
if skill.lower() in text_lower:
|
218 |
+
category_skills.append(skill)
|
219 |
+
|
220 |
+
if category_skills:
|
221 |
+
found_skills.append(f"{category}: {', '.join(category_skills)}")
|
222 |
|
223 |
+
# Work experience extraction - simplified approach
|
224 |
+
work_section = []
|
225 |
+
in_work_section = False
|
|
|
226 |
|
227 |
+
for idx, line in enumerate(lines):
|
228 |
+
line_lower = line.lower().strip()
|
229 |
+
|
230 |
+
# Start of work section
|
231 |
+
if not in_work_section:
|
232 |
+
if any(header in line_lower for header in work_headers):
|
233 |
+
in_work_section = True
|
234 |
+
continue
|
235 |
+
# End of work section
|
236 |
+
elif in_work_section:
|
237 |
+
if any(header in line_lower for header in next_section_headers):
|
238 |
+
break
|
239 |
+
|
240 |
+
if line.strip():
|
241 |
+
work_section.append(line.strip())
|
242 |
|
243 |
+
# Simplified work formatting
|
244 |
+
if not work_section:
|
245 |
+
work_experience = "Work experience not clearly identified"
|
246 |
+
else:
|
247 |
+
# Just take the first 5-7 lines of the work section as a summary
|
248 |
+
work_lines = []
|
249 |
+
company_count = 0
|
250 |
+
current_company = ""
|
251 |
+
|
252 |
+
for line in work_section:
|
253 |
+
# New company entry often has a date
|
254 |
+
if re.search(r'(19|20)\d{2}', line):
|
255 |
+
company_count += 1
|
256 |
+
if company_count <= 3: # Limit to 3 most recent positions
|
257 |
+
current_company = line
|
258 |
+
work_lines.append(f"**{line}**")
|
259 |
+
else:
|
260 |
+
break
|
261 |
+
elif company_count <= 3 and len(work_lines) < 10: # Limit total lines
|
262 |
+
work_lines.append(line)
|
263 |
+
|
264 |
+
work_experience = "\nβ’ " + "\nβ’ ".join(work_lines[:7]) if work_lines else "Work experience not clearly structured"
|
265 |
+
|
266 |
+
skills_formatted = "\nβ’ " + "\nβ’ ".join(found_skills) if found_skills else "No specific technical skills clearly identified"
|
267 |
+
|
268 |
+
return skills_formatted, work_experience
|
269 |
+
|
270 |
+
#####################################
|
271 |
+
# Function: Summarize Resume Text
|
272 |
+
#####################################
|
273 |
+
def summarize_resume_text(resume_text):
|
274 |
+
"""
|
275 |
+
Generates a structured summary of the resume text
|
276 |
+
"""
|
277 |
+
start_time = time.time()
|
278 |
+
|
279 |
+
# First, generate a quick summary using pre-loaded model
|
280 |
+
max_input_length = 1024 # Model limit
|
281 |
+
|
282 |
+
# Only summarize the first portion of text for speed
|
283 |
+
text_to_summarize = resume_text[:min(len(resume_text), max_input_length)]
|
284 |
+
base_summary = models['summarizer'](text_to_summarize)[0]['summary_text']
|
285 |
+
|
286 |
+
# Extract information in parallel where possible
|
287 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
288 |
+
# These can run in parallel
|
289 |
+
name_future = executor.submit(extract_name, resume_text[:500]) # Only use start of text
|
290 |
+
age_future = executor.submit(extract_age, resume_text)
|
291 |
+
industry_future = executor.submit(extract_industry, resume_text, base_summary)
|
292 |
+
skills_work_future = executor.submit(extract_skills_and_work, resume_text)
|
293 |
+
|
294 |
+
# Get results
|
295 |
+
name = name_future.result()
|
296 |
+
age = age_future.result()
|
297 |
+
industry = industry_future.result()
|
298 |
+
skills, work_experience = skills_work_future.result()
|
299 |
+
|
300 |
+
# Format the structured summary
|
301 |
+
formatted_summary = f"Name: {name}\n"
|
302 |
+
formatted_summary += f"Age: {age}\n"
|
303 |
+
formatted_summary += f"Expected Job Industry: {industry}\n\n"
|
304 |
+
formatted_summary += f"Previous Work Experience: {work_experience}\n\n"
|
305 |
+
formatted_summary += f"Skills: {skills}"
|
306 |
+
|
307 |
+
execution_time = time.time() - start_time
|
308 |
+
|
309 |
+
return formatted_summary, execution_time
|
310 |
+
|
311 |
+
#####################################
|
312 |
+
# Function: Extract Job Requirements
|
313 |
+
#####################################
|
314 |
+
def extract_job_requirements(job_description):
|
315 |
+
"""
|
316 |
+
Extract key requirements and skills from a job description
|
317 |
+
"""
|
318 |
+
# Common technical skill categories to look for
|
319 |
+
tech_skill_categories = {
|
320 |
+
"programming_languages": ["Python", "Java", "C++", "JavaScript", "TypeScript", "Go", "Rust", "SQL", "Ruby", "PHP", "Swift", "Kotlin"],
|
321 |
+
"web_technologies": ["React", "Angular", "Vue", "Node.js", "HTML", "CSS", "Django", "Flask", "Spring", "REST API", "GraphQL"],
|
322 |
+
"data_tech": ["Machine Learning", "TensorFlow", "PyTorch", "Data Science", "AI", "Big Data", "Deep Learning", "NLP", "Computer Vision"],
|
323 |
+
"cloud_devops": ["AWS", "Azure", "GCP", "Docker", "Kubernetes", "CI/CD", "Jenkins", "GitHub Actions", "Terraform", "Serverless"],
|
324 |
+
"database": ["SQL", "MySQL", "PostgreSQL", "MongoDB", "Redis", "Elasticsearch", "DynamoDB", "Cassandra"],
|
325 |
}
|
326 |
|
327 |
+
# Common soft skills to look for
|
328 |
+
soft_skills = ["Communication", "Leadership", "Teamwork", "Problem-solving", "Critical thinking", "Adaptability", "Creativity", "Time management"]
|
329 |
+
|
330 |
+
# Clean the text for processing
|
331 |
+
clean_job_text = job_description.lower()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
|
333 |
+
# Extract job title
|
334 |
+
title_patterns = [
|
335 |
+
r'^([^:.\n]+?)(position|role|job|opening|vacancy)',
|
336 |
+
r'^([^:.\n]+?)\n',
|
337 |
+
r'(hiring|looking for(?: a| an)?|recruiting)(?: a| an)? ([^:.\n]+?)(:-|[.:]|\n|$)'
|
338 |
]
|
339 |
|
340 |
+
job_title = "Not specified"
|
341 |
+
for pattern in title_patterns:
|
342 |
+
title_match = re.search(pattern, clean_job_text, re.IGNORECASE)
|
343 |
+
if title_match:
|
344 |
+
potential_title = title_match.group(1).strip() if len(title_match.groups()) >= 1 else title_match.group(2).strip()
|
345 |
+
if 3 <= len(potential_title) <= 50: # Reasonable title length
|
346 |
+
job_title = potential_title.capitalize()
|
347 |
+
break
|
348 |
|
349 |
+
# Extract years of experience
|
350 |
exp_patterns = [
|
351 |
+
r'(\d+)(?:\+)?\s*(?:years|yrs)(?:\s*of)?\s*(?:experience|exp)',
|
352 |
+
r'experience\s*(?:of)?\s*(\d+)(?:\+)?\s*(?:years|yrs)'
|
353 |
]
|
354 |
|
355 |
+
years_required = 0
|
356 |
for pattern in exp_patterns:
|
357 |
+
exp_match = re.search(pattern, clean_job_text, re.IGNORECASE)
|
358 |
+
if exp_match:
|
359 |
+
try:
|
360 |
+
years_required = int(exp_match.group(1))
|
361 |
+
break
|
362 |
+
except:
|
363 |
+
pass
|
364 |
+
|
365 |
+
# Extract technical skills
|
366 |
+
found_tech_skills = {}
|
367 |
+
all_tech_skills = []
|
368 |
+
|
369 |
+
for category, skills in tech_skill_categories.items():
|
370 |
+
category_skills = []
|
371 |
+
for skill in skills:
|
372 |
+
if re.search(r'\b' + re.escape(skill.lower()) + r'\b', clean_job_text):
|
373 |
+
category_skills.append(skill)
|
374 |
+
all_tech_skills.append(skill)
|
375 |
+
|
376 |
+
if category_skills:
|
377 |
+
found_tech_skills[category] = category_skills
|
378 |
+
|
379 |
+
# Extract soft skills
|
380 |
+
found_soft_skills = []
|
381 |
+
for skill in soft_skills:
|
382 |
+
if re.search(r'\b' + re.escape(skill.lower()) + r'\b', clean_job_text):
|
383 |
+
found_soft_skills.append(skill)
|
384 |
+
|
385 |
+
# Extract educational requirements
|
386 |
+
edu_patterns = [
|
387 |
+
r"bachelor'?s degree|bs|b\.s\.",
|
388 |
+
r"master'?s degree|ms|m\.s\.",
|
389 |
+
r"phd|ph\.d\.|doctorate",
|
390 |
+
r"mba|m\.b\.a\."
|
391 |
+
]
|
392 |
|
393 |
+
education_required = []
|
394 |
+
for pattern in edu_patterns:
|
395 |
+
if re.search(pattern, clean_job_text, re.IGNORECASE):
|
396 |
+
edu_match = re.search(pattern, clean_job_text, re.IGNORECASE).group(0)
|
397 |
+
education_required.append(edu_match.capitalize())
|
398 |
|
399 |
+
# Format the job requirements
|
400 |
+
job_requirements = {
|
401 |
"title": job_title,
|
402 |
+
"years_experience": years_required,
|
403 |
+
"technical_skills": all_tech_skills,
|
404 |
+
"soft_skills": found_soft_skills,
|
405 |
+
"education": education_required,
|
|
|
|
|
406 |
}
|
407 |
|
408 |
+
return job_requirements
|
409 |
|
410 |
+
#####################################
|
411 |
+
# Function: Analyze Job Fit
|
412 |
+
#####################################
|
413 |
+
def analyze_job_fit(resume_summary, job_description):
|
414 |
+
"""
|
415 |
+
Analyze how well the candidate fits the job requirements with detailed category breakdowns.
|
416 |
+
"""
|
417 |
+
start_time = time.time()
|
418 |
|
419 |
+
# Extract job requirements
|
420 |
+
job_requirements = extract_job_requirements(job_description)
|
|
|
421 |
|
422 |
+
# Define skill categories to evaluate against
|
423 |
+
resume_lower = resume_summary.lower()
|
424 |
+
job_lower = job_description.lower()
|
425 |
|
426 |
+
# Define keyword categories based on the job description
|
427 |
+
# We'll dynamically build these based on the job requirements
|
428 |
+
skill_keywords = {
|
429 |
+
"technical_skills": job_requirements["technical_skills"],
|
430 |
+
"soft_skills": job_requirements["soft_skills"],
|
431 |
+
"education": job_requirements["education"],
|
432 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
433 |
|
434 |
+
# Add additional keywords from the job description for comprehensive analysis
|
435 |
+
additional_keywords = {
|
436 |
+
"problem_solving": ["problem solving", "analytical", "critical thinking", "troubleshooting", "debugging",
|
437 |
+
"optimization", "solution", "resolve", "analyze"],
|
438 |
+
"domain_knowledge": ["industry", "experience", "expertise", "knowledge", "familiar with", "understanding of"],
|
439 |
+
"collaboration": ["team", "collaborate", "cooperation", "cross-functional", "communication", "stakeholder"]
|
440 |
+
}
|
441 |
+
|
442 |
+
# Merge the keywords
|
443 |
+
skill_keywords.update(additional_keywords)
|
444 |
+
|
445 |
+
# Category weights with descriptive labels
|
446 |
+
category_weights = {
|
447 |
+
"technical_skills": {"weight": 0.40, "label": "Technical Skills"},
|
448 |
+
"soft_skills": {"weight": 0.15, "label": "Soft Skills"},
|
449 |
+
"education": {"weight": 0.10, "label": "Education"},
|
450 |
+
"problem_solving": {"weight": 0.15, "label": "Problem Solving"},
|
451 |
+
"domain_knowledge": {"weight": 0.10, "label": "Domain Knowledge"},
|
452 |
+
"collaboration": {"weight": 0.10, "label": "Collaboration"}
|
453 |
+
}
|
454 |
|
455 |
+
# Calculate category scores and store detailed information
|
456 |
+
category_scores = {}
|
457 |
+
category_details = {}
|
458 |
+
found_skills = {}
|
459 |
+
|
460 |
+
for category, keywords in skill_keywords.items():
|
461 |
+
if not keywords: # Skip empty categories
|
462 |
+
category_scores[category] = 0.0
|
463 |
+
category_details[category] = {
|
464 |
+
"raw_percentage": 0,
|
465 |
+
"adjusted_score": 0,
|
466 |
+
"matching_keywords": [],
|
467 |
+
"total_keywords": 0,
|
468 |
+
"matches": 0
|
469 |
+
}
|
470 |
+
found_skills[category] = []
|
471 |
+
continue
|
472 |
+
|
473 |
+
# Find the specific matching keywords for feedback
|
474 |
+
category_matches = []
|
475 |
+
for keyword in keywords:
|
476 |
+
if keyword.lower() in resume_lower:
|
477 |
+
category_matches.append(keyword)
|
478 |
+
|
479 |
+
found_skills[category] = category_matches
|
480 |
+
|
481 |
+
# Count matches but cap at a reasonable level
|
482 |
+
matches = len(category_matches)
|
483 |
+
total_keywords = len(keywords)
|
484 |
+
|
485 |
+
# Calculate raw percentage for this category
|
486 |
+
raw_percentage = int((matches / max(1, total_keywords)) * 100)
|
487 |
+
|
488 |
+
# Apply logarithmic scaling for more realistic scores
|
489 |
+
if matches == 0:
|
490 |
+
adjusted_score = 0.0
|
491 |
else:
|
492 |
+
# Logarithmic scaling to prevent perfect scores
|
493 |
+
adjusted_score = min(0.95, (math.log(matches + 1) / math.log(min(total_keywords, 8) + 1)))
|
494 |
|
495 |
+
# Store both raw and adjusted scores for feedback
|
496 |
+
category_scores[category] = adjusted_score
|
497 |
+
category_details[category] = {
|
498 |
+
"raw_percentage": raw_percentage,
|
499 |
+
"adjusted_score": int(adjusted_score * 100),
|
500 |
+
"matching_keywords": category_matches,
|
501 |
+
"total_keywords": total_keywords,
|
502 |
+
"matches": matches
|
|
|
|
|
503 |
}
|
504 |
|
505 |
+
# Check for years of experience match
|
506 |
+
years_required = job_requirements["years_experience"]
|
|
|
|
|
507 |
|
508 |
+
# Extract years of experience from resume
|
509 |
+
experience_years = 0
|
510 |
+
year_patterns = [
|
511 |
+
r'(\d+)\s*(?:\+)?\s*years?\s*(?:of)?\s*experience',
|
512 |
+
r'experience\s*(?:of)?\s*(\d+)\s*(?:\+)?\s*years?'
|
513 |
+
]
|
514 |
|
515 |
+
for pattern in year_patterns:
|
516 |
+
exp_match = re.search(pattern, resume_lower)
|
517 |
+
if exp_match:
|
518 |
+
try:
|
519 |
+
experience_years = int(exp_match.group(1))
|
520 |
+
break
|
521 |
+
except:
|
522 |
+
pass
|
523 |
+
|
524 |
+
# If we couldn't find explicit years, try to count based on work history
|
525 |
+
if experience_years == 0:
|
526 |
+
# Try to extract from work experience section
|
527 |
+
work_exp_match = re.search(r'work experience:(.*?)(?=\n\n|$)', resume_summary, re.IGNORECASE | re.DOTALL)
|
528 |
+
if work_exp_match:
|
529 |
+
work_text = work_exp_match.group(1).lower()
|
530 |
+
years = re.findall(r'(\d{4})\s*-\s*(\d{4}|present|current)', work_text)
|
531 |
+
|
532 |
+
total_years = 0
|
533 |
+
for year_range in years:
|
534 |
+
start_year = int(year_range[0])
|
535 |
+
if year_range[1].isdigit():
|
536 |
+
end_year = int(year_range[1])
|
537 |
+
else:
|
538 |
+
end_year = 2025 # Assume "present" is current year
|
539 |
+
|
540 |
+
total_years += (end_year - start_year)
|
541 |
+
|
542 |
+
experience_years = total_years
|
543 |
|
544 |
+
# Calculate experience match score
|
545 |
+
if years_required > 0:
|
546 |
+
if experience_years >= years_required:
|
547 |
+
exp_score = 1.0
|
548 |
+
else:
|
549 |
+
exp_score = experience_years / years_required
|
550 |
+
else:
|
551 |
+
exp_score = 1.0 # If no specific years required, assume full match
|
552 |
+
|
553 |
+
category_scores["experience"] = exp_score
|
554 |
+
category_details["experience"] = {
|
555 |
+
"raw_percentage": int(exp_score * 100),
|
556 |
+
"adjusted_score": int(exp_score * 100),
|
557 |
+
"candidate_years": experience_years,
|
558 |
+
"required_years": years_required
|
559 |
+
}
|
560 |
|
561 |
+
# Calculate weighted score
|
562 |
+
weighted_score = 0
|
563 |
+
for category, score in category_scores.items():
|
564 |
+
if category in category_weights:
|
565 |
+
weighted_score += score * category_weights[category]["weight"]
|
566 |
|
567 |
+
# Add experience separately (not in the original weights)
|
568 |
+
weighted_score = (weighted_score * 0.8) + (category_scores["experience"] * 0.2)
|
|
|
|
|
|
|
569 |
|
570 |
+
# Apply final curve to keep scores in a realistic range
|
571 |
+
match_percentage = min(95, max(35, int(weighted_score * 100)))
|
572 |
|
573 |
+
# Determine fit/not fit status
|
574 |
+
fit_status = "FIT" if match_percentage >= 70 else "NOT FIT"
|
|
|
|
|
575 |
|
576 |
+
# Get more specific information for a better prompt
|
577 |
+
# Get top skills across all categories (up to 5 total)
|
578 |
+
all_matching_skills = []
|
579 |
+
for category, matches in found_skills.items():
|
580 |
+
if matches:
|
581 |
+
all_matching_skills.extend(matches)
|
582 |
|
583 |
+
top_skills = list(set(all_matching_skills))[:5] # Remove duplicates and take top 5
|
584 |
+
skills_text = ", ".join(top_skills) if top_skills else "limited relevant skills"
|
585 |
|
586 |
+
# Get strongest and weakest categories for more specific feedback
|
587 |
+
categories_sorted = sorted(
|
588 |
+
[(cat, category_details[cat]["adjusted_score"]) for cat in category_weights.keys() if cat in category_details],
|
589 |
+
key=lambda x: x[1],
|
590 |
+
reverse=True
|
591 |
+
)
|
592 |
|
593 |
+
top_category = category_weights[categories_sorted[0][0]]["label"] if categories_sorted else "Technical Skills"
|
594 |
+
weak_category = category_weights[categories_sorted[-1][0]]["label"] if categories_sorted else "Domain Knowledge"
|
595 |
|
596 |
+
# Create a prompt for the evaluation model
|
597 |
+
prompt = f"""
|
598 |
+
Generate a professional expert assessment for a job candidate applying for the position: {job_requirements['title']}.
|
599 |
+
Skills detected in candidate: {skills_text}.
|
600 |
+
Strongest area: {top_category} ({categories_sorted[0][1]}%).
|
601 |
+
Weakest area: {weak_category} ({categories_sorted[-1][1]}%).
|
602 |
+
Overall match: {match_percentage}%.
|
603 |
+
Fit status: {fit_status}
|
604 |
+
|
605 |
+
Write an evaluative assessment that analyzes the candidate's fit for this position.
|
606 |
+
Start with "{fit_status}: This candidate" and provide a professional evaluation of their fit.
|
607 |
+
|
608 |
+
{fit_status}: This candidate"""
|
609 |
+
|
610 |
try:
|
611 |
+
# Generate the assessment using the evaluation model
|
612 |
+
assessment_results = models['evaluator'](
|
613 |
+
prompt,
|
614 |
+
max_length=300,
|
|
|
|
|
|
|
|
|
615 |
do_sample=True,
|
616 |
+
temperature=0.75,
|
617 |
+
num_return_sequences=3
|
618 |
)
|
619 |
+
|
620 |
+
# Find the best response with thorough cleaning
|
621 |
+
best_assessment = None
|
622 |
+
for result in assessment_results:
|
623 |
+
# Get the raw text
|
624 |
+
raw_text = result['generated_text'].strip()
|
625 |
|
626 |
+
# Extract just the part that starts with the fit status
|
627 |
+
if f"{fit_status}: This candidate" in raw_text:
|
628 |
+
# Find the start of the actual assessment
|
629 |
+
start_idx = raw_text.find(f"{fit_status}: This candidate")
|
630 |
+
text = raw_text[start_idx:]
|
631 |
+
|
632 |
+
# Check if it's actually an assessment (not just instructions)
|
633 |
+
if len(text) > 50 and not any(x in text.lower() for x in [
|
634 |
+
"actionable advice",
|
635 |
+
"include specific",
|
636 |
+
"make an assessment",
|
637 |
+
"evaluate their",
|
638 |
+
"assess their",
|
639 |
+
"provide specific areas"
|
640 |
+
]):
|
641 |
+
best_assessment = text
|
642 |
+
break
|
643 |
+
|
644 |
+
# Use the best response or generate a fallback if none were ideal
|
645 |
+
if best_assessment:
|
646 |
+
assessment = best_assessment
|
647 |
+
else:
|
648 |
+
# Generate a completely manual assessment
|
649 |
+
assessment = generate_fallback_assessment(
|
650 |
+
resume_summary,
|
651 |
+
job_requirements,
|
652 |
+
match_percentage,
|
653 |
+
top_skills,
|
654 |
+
top_category,
|
655 |
+
weak_category,
|
656 |
+
fit_status
|
657 |
+
)
|
658 |
|
|
|
|
|
|
|
659 |
except Exception as e:
|
660 |
+
# Fallback to a manual assessment
|
661 |
+
assessment = generate_fallback_assessment(
|
662 |
+
resume_summary,
|
663 |
+
job_requirements,
|
664 |
+
match_percentage,
|
665 |
+
top_skills,
|
666 |
+
top_category,
|
667 |
+
weak_category,
|
668 |
+
fit_status
|
669 |
+
)
|
670 |
+
|
671 |
+
# Final cleanup
|
672 |
+
assessment = re.sub(r'include specific actionable advice.*?improvement\.', '', assessment, flags=re.DOTALL|re.IGNORECASE)
|
673 |
+
assessment = re.sub(r'make an assessment.*?resume\.', '', assessment, flags=re.DOTALL|re.IGNORECASE)
|
674 |
+
assessment = re.sub(r'evaluate their technical skills.*?position\.', '', assessment, flags=re.DOTALL|re.IGNORECASE)
|
675 |
+
assessment = re.sub(r'assess their strengths.*?contributions', '', assessment, flags=re.DOTALL|re.IGNORECASE)
|
676 |
+
assessment = re.sub(r'provide specific areas.*?needed', '', assessment, flags=re.DOTALL|re.IGNORECASE)
|
677 |
+
assessment = re.sub(r'give an overall.*?position', '', assessment, flags=re.DOTALL|re.IGNORECASE)
|
678 |
+
|
679 |
+
# Clean up any double spaces, newlines, etc.
|
680 |
+
assessment = re.sub(r'\s+', ' ', assessment)
|
681 |
+
assessment = assessment.strip()
|
682 |
+
|
683 |
+
# If cleaning removed too much text, use the fallback
|
684 |
+
if len(assessment) < 50 or not assessment.startswith(f"{fit_status}: This candidate"):
|
685 |
+
assessment = generate_fallback_assessment(
|
686 |
+
resume_summary,
|
687 |
+
job_requirements,
|
688 |
+
match_percentage,
|
689 |
+
top_skills,
|
690 |
+
top_category,
|
691 |
+
weak_category,
|
692 |
+
fit_status
|
693 |
+
)
|
694 |
+
|
695 |
+
# Make sure percentages are consistent
|
696 |
+
assessment = re.sub(r'\b\d{1,2}%\b', f"{match_percentage}%", assessment)
|
697 |
+
|
698 |
+
execution_time = time.time() - start_time
|
699 |
+
|
700 |
+
return assessment, match_percentage, category_details, job_requirements, execution_time
|
701 |
|
702 |
# Generate fallback assessment
|
703 |
+
def generate_fallback_assessment(resume_summary, job_requirements, match_percentage, top_skills, top_category, weak_category, fit_status):
|
704 |
"""Generate a fallback assessment if the model fails"""
|
705 |
+
job_title = job_requirements["title"]
|
706 |
+
skills_text = ", ".join(top_skills) if top_skills else "relevant skills"
|
|
|
|
|
707 |
|
708 |
if fit_status == "FIT":
|
709 |
+
assessment = f"""{fit_status}: This candidate demonstrates strong alignment with the {job_title} position, achieving an overall match score of {match_percentage}%. Their proficiency in {skills_text} positions them well to contribute effectively, with particular strength in {top_category}. The candidate's experience level is suitable for the role's requirements. To maximize their success, they could consider developing expertise in {weak_category} to round out their skill set for this position.
|
710 |
"""
|
711 |
else:
|
712 |
+
assessment = f"""{fit_status}: This candidate currently shows limited alignment with the {job_title} position, with an overall match score of {match_percentage}%. While they demonstrate some capabilities in {top_category} and have experience with {skills_text}, they would need to develop expertise in {weak_category} to be more competitive for this role. The candidate may become a stronger fit by focusing on these skill gaps and gaining more relevant experience in the key requirements for this position.
|
713 |
"""
|
714 |
|
715 |
return assessment
|
716 |
|
717 |
+
#####################################
|
718 |
+
# Main Streamlit Interface
|
719 |
+
#####################################
|
720 |
st.title("Resume-Job Fit Analyzer")
|
721 |
+
st.markdown(
|
722 |
+
"""
|
723 |
+
Upload your resume file in **.docx**, **.doc**, or **.txt** format and enter a job description to see how well you match with the job requirements. The app performs the following tasks:
|
724 |
+
1. Extracts text from your resume.
|
725 |
+
2. Uses AI to generate a structured candidate summary.
|
726 |
+
3. Analyzes how well your profile fits the specific job requirements.
|
727 |
+
"""
|
728 |
+
)
|
729 |
|
730 |
# Resume upload
|
731 |
+
uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
|
|
|
732 |
|
733 |
# Job description input
|
734 |
+
job_description = st.text_area("Enter Job Description", height=200, placeholder="Paste the job description here...")
|
735 |
+
|
736 |
+
# Process button with optimized flow
|
737 |
+
if uploaded_file is not None and job_description and st.button("Analyze Job Fit"):
|
738 |
+
# Create a placeholder for the progress bar
|
739 |
+
progress_bar = st.progress(0)
|
740 |
+
status_text = st.empty()
|
741 |
+
|
742 |
+
# Step 1: Extract text
|
743 |
+
status_text.text("Step 1/3: Extracting text from resume...")
|
744 |
+
resume_text = extract_text_from_file(uploaded_file)
|
745 |
+
progress_bar.progress(25)
|
746 |
+
|
747 |
+
if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .docx, .doc, or .txt file.":
|
748 |
+
st.error(resume_text)
|
|
|
|
|
|
|
|
|
749 |
else:
|
750 |
+
# Step 2: Generate summary
|
751 |
+
status_text.text("Step 2/3: Analyzing resume and generating summary...")
|
752 |
+
summary, summarization_time = summarize_resume_text(resume_text)
|
753 |
+
progress_bar.progress(50)
|
754 |
+
|
755 |
+
# Display summary
|
756 |
+
st.subheader("Your Resume Summary")
|
757 |
+
st.markdown(summary)
|
758 |
+
st.info(f"Summary generated in {summarization_time:.2f} seconds")
|
759 |
+
|
760 |
+
# Step 3: Generate job fit assessment
|
761 |
+
status_text.text("Step 3/3: Evaluating job fit...")
|
762 |
+
assessment, match_percentage, category_details, job_requirements, assessment_time = analyze_job_fit(summary, job_description)
|
763 |
+
progress_bar.progress(100)
|
764 |
+
|
765 |
+
# Clear status messages
|
766 |
+
status_text.empty()
|
767 |
+
|
768 |
+
# Display job fit results
|
769 |
+
st.subheader("Job Fit Assessment")
|
770 |
+
|
771 |
+
# Display match percentage with appropriate color and emoji
|
772 |
+
if match_percentage >= 85:
|
773 |
+
st.success(f"**Overall Job Match Score:** {match_percentage}% π")
|
774 |
+
elif match_percentage >= 70:
|
775 |
+
st.success(f"**Overall Job Match Score:** {match_percentage}% β
")
|
776 |
+
elif match_percentage >= 50:
|
777 |
+
st.warning(f"**Overall Job Match Score:** {match_percentage}% β οΈ")
|
778 |
+
else:
|
779 |
+
st.error(f"**Overall Job Match Score:** {match_percentage}% π")
|
780 |
+
|
781 |
+
# Add detailed score breakdown
|
782 |
+
st.markdown("### Score Breakdown")
|
783 |
+
|
784 |
+
# Create a neat table with category scores
|
785 |
+
breakdown_data = []
|
786 |
+
for category, details in category_details.items():
|
787 |
+
if category == "experience":
|
788 |
+
label = "Experience"
|
789 |
+
matching_info = f"{details['candidate_years']} years (Required: {details['required_years']} years)"
|
790 |
+
else:
|
791 |
+
# Get the nice label for the category
|
792 |
+
label = {"technical_skills": "Technical Skills",
|
793 |
+
"soft_skills": "Soft Skills",
|
794 |
+
"education": "Education",
|
795 |
+
"problem_solving": "Problem Solving",
|
796 |
+
"domain_knowledge": "Domain Knowledge",
|
797 |
+
"collaboration": "Collaboration"}[category]
|
798 |
+
|
799 |
+
matching_info = ", ".join(details["matching_keywords"][:3]) if details.get("matching_keywords") else "None detected"
|
800 |
+
|
801 |
+
# Add formatted breakdown row
|
802 |
+
breakdown_data.append({
|
803 |
+
"Category": label,
|
804 |
+
"Score": f"{details['adjusted_score']}%",
|
805 |
+
"Matching Items": matching_info
|
806 |
+
})
|
807 |
+
|
808 |
+
# Convert to DataFrame and display
|
809 |
+
breakdown_df = pd.DataFrame(breakdown_data)
|
810 |
+
# Remove the index column entirely
|
811 |
+
st.table(breakdown_df.set_index('Category').reset_index()) # This removes the numerical index
|
812 |
+
|
813 |
+
# Show a note about how scores are calculated
|
814 |
+
with st.expander("How are these scores calculated?"):
|
815 |
+
st.markdown("""
|
816 |
+
- **Technical Skills** (40% of total): Evaluates programming languages, software tools, and technical requirements
|
817 |
+
- **Soft Skills** (15% of total): Assesses communication, teamwork, and interpersonal abilities
|
818 |
+
- **Education** (10% of total): Compares educational requirements with candidate's background
|
819 |
+
- **Problem Solving** (15% of total): Measures analytical thinking and approach to challenges
|
820 |
+
- **Domain Knowledge** (10% of total): Evaluates industry-specific experience and knowledge
|
821 |
+
- **Collaboration** (10% of total): Assesses team skills and cross-functional collaboration
|
822 |
+
- **Experience** (20% overall modifier): Years of relevant experience compared to job requirements
|
823 |
+
|
824 |
+
Scores are calculated based on keyword matches in your resume, with diminishing returns applied (first few skills matter more than later ones).
|
825 |
+
""")
|
826 |
+
|
827 |
+
# Display assessment
|
828 |
+
st.markdown("### Expert Assessment")
|
829 |
+
st.markdown(assessment)
|
830 |
+
|
831 |
+
st.info(f"Assessment completed in {assessment_time:.2f} seconds")
|
832 |
+
|
833 |
+
# Add potential next steps based on the match percentage
|
834 |
+
st.subheader("Recommended Next Steps")
|
835 |
+
|
836 |
+
if match_percentage >= 80:
|
837 |
+
st.markdown("""
|
838 |
+
- Consider applying for this position as you appear to be a strong match
|
839 |
+
- Prepare for technical interviews by focusing on your strongest skills
|
840 |
+
- Review the job description again to prepare for specific interview questions
|
841 |
+
""")
|
842 |
+
elif match_percentage >= 60:
|
843 |
+
st.markdown("""
|
844 |
+
- Focus on strengthening your weaker areas before applying
|
845 |
+
- Highlight your strongest skills and experience in your cover letter
|
846 |
+
- Consider gaining additional experience or certifications in key required areas
|
847 |
+
""")
|
848 |
+
else:
|
849 |
+
st.markdown("""
|
850 |
+
- This position may not be the best fit for your current skills and experience
|
851 |
+
- Consider roles that better align with your strengths
|
852 |
+
- If you're set on this type of position, focus on developing skills in the areas mentioned in the job description
|
853 |
+
""")
|